trexio/Sparse.org


* JSON
      "ao_2e_int" : {
        "eri_num"            : [ "int", [  ] ]
        "eri"                : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ]
    },
    
* Front end

  For example, the integral $\langle ij | kl \rangle = x$ can be
  represented represented as
  - a quartet of integers $(i,j,k,l)$
  - a floating point value $x$

  To store $N$ integrals in the file, we can store
  - An array of quartets of integers
  - An array of values (floats)

  These two arrays have the same size, $N$.

  As the number of integrals to store can be prohibitively large, it
  is important to be able to read/write the integrals in chunks. So we
  need to give two extra parameters to the functions:
  - ~offset~ : the index of the 1st integral we want to read. An
    offset of zero implies to read the first integral
  - ~num~ : the number of integrals to read

  We need to provide one function to read a chunk of indices, and one
  function to read a chunk of values, because some users might want to
  read only the values of the integrals, or only the indices.

  Here is an example for the indices:

#+BEGIN_SRC c
trexio_exit_code
trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file,
                                         const int64_t offset,
                                         const int64_t num,
                                         int32_t* buffer)
{
  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if (offset   < 0L) return TREXIO_INVALID_ARG_2;
  if (num      < 0L) return TREXIO_INVALID_ARG_3;

  const uint32_t rank = 4;  // To be set by generator : number of indices

  int64_t nmax;             // Max number of integrals
  trexio_exit_code rc;

  rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
  if (rc != TREXIO_SUCCESS) return rc;

  switch (file->back_end) {

  case TREXIO_TEXT:
    return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
    break;

  case TREXIO_HDF5:
    return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
    break;
/*
  case TREXIO_JSON:
    return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
    break;
,*/
  default:
    return TREXIO_FAILURE; /* Impossible case */
  }
}
#+END_SRC

For the values, 

#+BEGIN_SRC c
trexio_exit_code
trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,
                                         const int64_t offset,
                                         const int64_t num,
                                         double* buffer)
{
  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if (offset   < 0L) return TREXIO_INVALID_ARG_2;
  if (num      < 0L) return TREXIO_INVALID_ARG_3;

  int64_t nmax;             // Max number of integrals
  trexio_exit_code rc;

  rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
  if (rc != TREXIO_SUCCESS) return rc;

  switch (file->back_end) {

  case TREXIO_TEXT:
    return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax);
    break;

  case TREXIO_HDF5:
    return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
    break;
/*
  case TREXIO_JSON:
    return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
    break;
,*/
  default:
    return TREXIO_FAILURE; /* Impossible case */
  }
}
#+END_SRC

* Text back end
  As the size of the dataset should be extensible, the simplest
  solution is to use one file for each sparse data set, and store a
  the name of this file in the group.
  Each integral can be a line in the file:
  i  j  k  l  x
  which can be read with "%10ld %10ld %10ld %10ld %24.16e".
  The offset can be used with ~fseek(69L*offset, SEEK_SET)~
  
* HDF5 Back end

  We need to declare the number of rows of the dataset as
 ~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
  chunks should absolutely not be larger than 1MB.

  To extend the storage, see :
  https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
  (figure 17)

  If the offset+num > nmax, we need to extend the dataset.
Added notes for sparse data structures 2021-05-07 00:02:12 +02:00
			`* JSON`
			`"ao_2e_int" : {`
			`"eri_num" : [ "int", [ ] ]`
			`"eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ]`
			`},`

			`* Front end`

			`For example, the integral $\langle ij \| kl \rangle = x$ can be`
			`represented represented as`
			`- a quartet of integers $(i,j,k,l)$`
			`- a floating point value $x$`

			`To store $N$ integrals in the file, we can store`
			`- An array of quartets of integers`
			`- An array of values (floats)`

			`These two arrays have the same size, $N$.`

			`As the number of integrals to store can be prohibitively large, it`
			`is important to be able to read/write the integrals in chunks. So we`
			`need to give two extra parameters to the functions:`
			`- ~offset~ : the index of the 1st integral we want to read. An`
			`offset of zero implies to read the first integral`
			`- ~num~ : the number of integrals to read`

			`We need to provide one function to read a chunk of indices, and one`
			`function to read a chunk of values, because some users might want to`
			`read only the values of the integrals, or only the indices.`

			`Here is an example for the indices:`

			`#+BEGIN_SRC c`
			`trexio_exit_code`
			`trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file,`
			`const int64_t offset,`
			`const int64_t num,`
			`int32_t* buffer)`
			`{`
			`if (file == NULL) return TREXIO_INVALID_ARG_1;`
			`if (offset < 0L) return TREXIO_INVALID_ARG_2;`
			`if (num < 0L) return TREXIO_INVALID_ARG_3;`

			`const uint32_t rank = 4; // To be set by generator : number of indices`

			`int64_t nmax; // Max number of integrals`
			`trexio_exit_code rc;`

			`rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);`
			`if (rc != TREXIO_SUCCESS) return rc;`

			`switch (file->back_end) {`

			`case TREXIO_TEXT:`
			`return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);`
			`break;`

			`case TREXIO_HDF5:`
			`return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);`
			`break;`
			`/*`
			`case TREXIO_JSON:`
			`return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);`
			`break;`
			`,*/`
			`default:`
			`return TREXIO_FAILURE; /* Impossible case */`
			`}`
			`}`
			`#+END_SRC`

			`For the values,`

			`#+BEGIN_SRC c`
			`trexio_exit_code`
			`trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,`
			`const int64_t offset,`
			`const int64_t num,`
			`double* buffer)`
			`{`
			`if (file == NULL) return TREXIO_INVALID_ARG_1;`
			`if (offset < 0L) return TREXIO_INVALID_ARG_2;`
			`if (num < 0L) return TREXIO_INVALID_ARG_3;`

			`int64_t nmax; // Max number of integrals`
			`trexio_exit_code rc;`

			`rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);`
			`if (rc != TREXIO_SUCCESS) return rc;`

			`switch (file->back_end) {`

			`case TREXIO_TEXT:`
			`return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax);`
			`break;`

			`case TREXIO_HDF5:`
			`return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);`
			`break;`
			`/*`
			`case TREXIO_JSON:`
			`return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);`
			`break;`
			`,*/`
			`default:`
			`return TREXIO_FAILURE; /* Impossible case */`
			`}`
			`}`
			`#+END_SRC`

			`* Text back end`
			`As the size of the dataset should be extensible, the simplest`
			`solution is to use one file for each sparse data set, and store a`
			`the name of this file in the group.`
			`Each integral can be a line in the file:`
			`i j k l x`
			`which can be read with "%10ld %10ld %10ld %10ld %24.16e".`
			`The offset can be used with ~fseek(69L*offset, SEEK_SET)~`

			`* HDF5 Back end`

			`We need to declare the number of rows of the dataset as`
			`~UNLIMITED~. This requires to use the ~Chunked~ storage, and the`
			`chunks should absolutely not be larger than 1MB.`

			`To extend the storage, see :`
			`https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html`
			`(figure 17)`

			`If the offset+num > nmax, we need to extend the dataset.`