From 75af136d4feb962da1652aaa69ec3a79a00a4eb1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 3 Jun 2021 13:56:45 +0200 Subject: [PATCH] Moved sparse documentation in src/templates_front/templator_front.org --- Sparse.org | 111 +----------------------- src/templates_front/templator_front.org | 104 +++++++++++++++++++++- 2 files changed, 104 insertions(+), 111 deletions(-) diff --git a/Sparse.org b/Sparse.org index 727cd24..6e4af31 100644 --- a/Sparse.org +++ b/Sparse.org @@ -1,113 +1,4 @@ - -* JSON - "ao_2e_int" : { - "eri_num" : [ "int", [ ] ] - "eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ] - }, - -* Front end - - For example, the integral $\langle ij | kl \rangle = x$ can be - represented represented as - - a quartet of integers $(i,j,k,l)$ - - a floating point value $x$ - - To store $N$ integrals in the file, we can store - - An array of quartets of integers - - An array of values (floats) - - These two arrays have the same size, $N$. - - As the number of integrals to store can be prohibitively large, it - is important to be able to read/write the integrals in chunks. So we - need to give two extra parameters to the functions: - - ~offset~ : the index of the 1st integral we want to read. An - offset of zero implies to read the first integral - - ~num~ : the number of integrals to read - - We need to provide one function to read a chunk of indices, and one - function to read a chunk of values, because some users might want to - read only the values of the integrals, or only the indices. - - Here is an example for the indices: - -#+BEGIN_SRC c -trexio_exit_code -trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file, - const int64_t offset, - const int64_t num, - int32_t* buffer) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (offset < 0L) return TREXIO_INVALID_ARG_2; - if (num < 0L) return TREXIO_INVALID_ARG_3; - - const uint32_t rank = 4; // To be set by generator : number of indices - - int64_t nmax; // Max number of integrals - trexio_exit_code rc; - - rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); - if (rc != TREXIO_SUCCESS) return rc; - - switch (file->back_end) { - - case TREXIO_TEXT: - return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); - break; - - case TREXIO_HDF5: - return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); - break; -/* - case TREXIO_JSON: - return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); - break; -,*/ - default: - return TREXIO_FAILURE; /* Impossible case */ - } -} -#+END_SRC - -For the values, - -#+BEGIN_SRC c -trexio_exit_code -trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file, - const int64_t offset, - const int64_t num, - double* buffer) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (offset < 0L) return TREXIO_INVALID_ARG_2; - if (num < 0L) return TREXIO_INVALID_ARG_3; - - int64_t nmax; // Max number of integrals - trexio_exit_code rc; - - rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); - if (rc != TREXIO_SUCCESS) return rc; - - switch (file->back_end) { - - case TREXIO_TEXT: - return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax); - break; - - case TREXIO_HDF5: - return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); - break; -/* - case TREXIO_JSON: - return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); - break; -,*/ - default: - return TREXIO_FAILURE; /* Impossible case */ - } -} -#+END_SRC +See templator_front.org * Text back end As the size of the dataset should be extensible, the simplest diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index f62cb23..8ba9f9f 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -1405,7 +1405,7 @@ end interface Sparse data structures are used typically for large tensors such as two-electron integrals. For example, in the =trex.json= file sparse - arrays appear as the ~eri~ array: + arrays appear as for the ~eri~ : #+begin_src python "ao_2e_int" : { @@ -1413,6 +1413,108 @@ end interface "eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ] } #+end_src + + The electron repulsion integral $\langle ij | kl \rangle$ is + represented as a quartet of integers $(i,j,k,l)$ and a floating + point value. + + To store $N$ integrals in the file, we store + + - An array of quartets of integers + - An array of values (floats) + + Both arrays have the same size, $N$, the number of non-zero integrals. + Knowing the maximum dimensions allows to check that the integers are + in a valid range, and also lets the library choose the smallest + integer representation to compress the storage. + + Fortran uses 1-based array indexing, while C uses 0-based indexing. + Internally, we use a 0-based representation but the Fortran binding + does the appropriate conversion when reading or writing. + + As the number of integrals to store can be prohibitively large, we + provide the possibility to read/write the integrals in chunks. So the + functions take two extra parameters: + - ~offset~ : the index of the 1st integral we want to read. An + offset of zero implies to read the first integral. + - ~num~ : the number of integrals to read. + + We provide a function to read a chunk of indices, and a function to + read a chunk of values, because some users might want to read only + the values of the integrals, or only the indices. + + Here is an example for the indices: + + #+BEGIN_SRC c +trexio_exit_code +trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file, + const int64_t offset, + const int64_t num, + int32_t* buffer) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (offset < 0L) return TREXIO_INVALID_ARG_2; + if (num < 0L) return TREXIO_INVALID_ARG_3; + + const uint32_t rank = 4; // To be set by generator : number of indices + + int64_t nmax; // Max number of integrals + trexio_exit_code rc; + + rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + if (rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + break; + + case TREXIO_HDF5: + return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + break; + + default: + return TREXIO_FAILURE; /* Impossible case */ + } +} + #+END_SRC + + For the values, + + #+BEGIN_SRC c +trexio_exit_code +trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file, + const int64_t offset, + const int64_t num, + double* buffer) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (offset < 0L) return TREXIO_INVALID_ARG_2; + if (num < 0L) return TREXIO_INVALID_ARG_3; + + int64_t nmax; // Max number of integrals + trexio_exit_code rc; + + rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + if (rc != TREXIO_SUCCESS) return rc; + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax); + break; + + case TREXIO_HDF5: + return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); + break; + + default: + return TREXIO_FAILURE; /* Impossible case */ + } +} + #+END_SRC + * Fortran helper/wrapper functions The function below adapts the original C-based ~trexio_open~ for Fortran.