mirror of
https://github.com/TREX-CoE/trexio.git
synced 2024-11-03 20:54:07 +01:00
132 lines
4.0 KiB
Org Mode
132 lines
4.0 KiB
Org Mode
|
|
||
|
* JSON
|
||
|
"ao_2e_int" : {
|
||
|
"eri_num" : [ "int", [ ] ]
|
||
|
"eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ]
|
||
|
},
|
||
|
|
||
|
* Front end
|
||
|
|
||
|
For example, the integral $\langle ij | kl \rangle = x$ can be
|
||
|
represented represented as
|
||
|
- a quartet of integers $(i,j,k,l)$
|
||
|
- a floating point value $x$
|
||
|
|
||
|
To store $N$ integrals in the file, we can store
|
||
|
- An array of quartets of integers
|
||
|
- An array of values (floats)
|
||
|
|
||
|
These two arrays have the same size, $N$.
|
||
|
|
||
|
As the number of integrals to store can be prohibitively large, it
|
||
|
is important to be able to read/write the integrals in chunks. So we
|
||
|
need to give two extra parameters to the functions:
|
||
|
- ~offset~ : the index of the 1st integral we want to read. An
|
||
|
offset of zero implies to read the first integral
|
||
|
- ~num~ : the number of integrals to read
|
||
|
|
||
|
We need to provide one function to read a chunk of indices, and one
|
||
|
function to read a chunk of values, because some users might want to
|
||
|
read only the values of the integrals, or only the indices.
|
||
|
|
||
|
Here is an example for the indices:
|
||
|
|
||
|
#+BEGIN_SRC c
|
||
|
trexio_exit_code
|
||
|
trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file,
|
||
|
const int64_t offset,
|
||
|
const int64_t num,
|
||
|
int32_t* buffer)
|
||
|
{
|
||
|
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||
|
if (offset < 0L) return TREXIO_INVALID_ARG_2;
|
||
|
if (num < 0L) return TREXIO_INVALID_ARG_3;
|
||
|
|
||
|
const uint32_t rank = 4; // To be set by generator : number of indices
|
||
|
|
||
|
int64_t nmax; // Max number of integrals
|
||
|
trexio_exit_code rc;
|
||
|
|
||
|
rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
|
||
|
if (rc != TREXIO_SUCCESS) return rc;
|
||
|
|
||
|
switch (file->back_end) {
|
||
|
|
||
|
case TREXIO_TEXT:
|
||
|
return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
|
||
|
break;
|
||
|
|
||
|
case TREXIO_HDF5:
|
||
|
return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
|
||
|
break;
|
||
|
/*
|
||
|
case TREXIO_JSON:
|
||
|
return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
|
||
|
break;
|
||
|
,*/
|
||
|
default:
|
||
|
return TREXIO_FAILURE; /* Impossible case */
|
||
|
}
|
||
|
}
|
||
|
#+END_SRC
|
||
|
|
||
|
For the values,
|
||
|
|
||
|
#+BEGIN_SRC c
|
||
|
trexio_exit_code
|
||
|
trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,
|
||
|
const int64_t offset,
|
||
|
const int64_t num,
|
||
|
double* buffer)
|
||
|
{
|
||
|
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||
|
if (offset < 0L) return TREXIO_INVALID_ARG_2;
|
||
|
if (num < 0L) return TREXIO_INVALID_ARG_3;
|
||
|
|
||
|
int64_t nmax; // Max number of integrals
|
||
|
trexio_exit_code rc;
|
||
|
|
||
|
rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
|
||
|
if (rc != TREXIO_SUCCESS) return rc;
|
||
|
|
||
|
switch (file->back_end) {
|
||
|
|
||
|
case TREXIO_TEXT:
|
||
|
return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax);
|
||
|
break;
|
||
|
|
||
|
case TREXIO_HDF5:
|
||
|
return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
|
||
|
break;
|
||
|
/*
|
||
|
case TREXIO_JSON:
|
||
|
return trexio_json_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
|
||
|
break;
|
||
|
,*/
|
||
|
default:
|
||
|
return TREXIO_FAILURE; /* Impossible case */
|
||
|
}
|
||
|
}
|
||
|
#+END_SRC
|
||
|
|
||
|
* Text back end
|
||
|
As the size of the dataset should be extensible, the simplest
|
||
|
solution is to use one file for each sparse data set, and store a
|
||
|
the name of this file in the group.
|
||
|
Each integral can be a line in the file:
|
||
|
i j k l x
|
||
|
which can be read with "%10ld %10ld %10ld %10ld %24.16e".
|
||
|
The offset can be used with ~fseek(69L*offset, SEEK_SET)~
|
||
|
|
||
|
* HDF5 Back end
|
||
|
|
||
|
We need to declare the number of rows of the dataset as
|
||
|
~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
|
||
|
chunks should absolutely not be larger than 1MB.
|
||
|
|
||
|
To extend the storage, see :
|
||
|
https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
|
||
|
(figure 17)
|
||
|
|
||
|
If the offset+num > nmax, we need to extend the dataset.
|