From 6ba8c5ddc3714afc552243d3d49c7fc3c3540b8d Mon Sep 17 00:00:00 2001 From: q-posev Date: Tue, 27 Sep 2022 19:56:09 +0200 Subject: [PATCH] Buffered floats v1: determinant tests pass --- src/templates_front/templator_front.org | 819 +++++++++++++----------- src/templates_hdf5/templator_hdf5.org | 321 +++++----- src/templates_text/build.sh | 4 + src/templates_text/templator_text.org | 518 +++++++-------- tools/generator.py | 8 +- tools/generator_tools.py | 48 +- trex.org | 44 +- 7 files changed, 936 insertions(+), 826 deletions(-) diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 11e25b7..2f0fd6d 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -766,7 +766,7 @@ class File: One of the currently supported TREXIO open modes. For example, 'r' or 'w'. state: int - Active state of the file (needed to write determinant_coefficient). + Active state of the file (needed to write $group_dset$). Default is 0. isOpen: bool Flag indicating whether the current object is still open for I/O @@ -4140,6 +4140,426 @@ def has_$group_dset$(trexio_file) -> bool: return rc == TREXIO_SUCCESS #+end_src +** Templates for front end has/read/write a buffered vector + + This corresponds to the ~buffer~ data type and is particularly useful for incremental additiona of values like + it was done for ~sparse~ data but without the need to supply tuples of indices. + + + | Function name | Description | Precision | + |----------------------------------+------------------------------------------------+-----------| + | ~trexio_has_$group_dset$~ | Check if a buffered dset is present in a file | --- | + | ~trexio_read_$group_dset$~ | Read values of a vector in buffers | Double | + | ~trexio_read_$group_dset$_size~ | Read the number of elements stored in the file | Double | + | ~trexio_write_$group_dset$~ | Write values of a vector in buffers | Double | + | ~trexio_read_safe_$group_dset$~ | Safe (bounded) read (for Python API) | Double | + | ~trexio_write_safe_$group_dset$~ | Safe (bounded) write (for Python API) | Double | + +*** C source code + + #+begin_src c :tangle hrw_buffered_front.h :exports none +trexio_exit_code trexio_has_$group_dset$(trexio_t* const file); +trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, double* const dset); +trexio_exit_code trexio_read_safe_$group_dset$(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, double* const dset_out, const int64_t dim_out); +trexio_exit_code trexio_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset); +trexio_exit_code trexio_write_safe_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset_in, const int64_t dim_in); +trexio_exit_code trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max); + #+end_src + + #+begin_src c :tangle read_buffered_front.c +trexio_exit_code +trexio_read_$group_dset$ (trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, double* const dset) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset == NULL) return TREXIO_INVALID_ARG_2; + if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; + + trexio_exit_code rc; + + uint32_t rank = 1; + uint64_t det_size = (uint64_t) (*buffer_size_read); + uint64_t dims[1] = {det_size}; + + // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered + int64_t eof_read_size = 0L; + + switch (file->back_end) { + + case TREXIO_TEXT: + rc = trexio_text_read_$group_dset$(file, offset_file, rank, dims, &eof_read_size, dset); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + rc = trexio_hdf5_read_$group_dset$(file, offset_file, rank, dims, &eof_read_size, dset); + break; +#else + rc = TREXIO_BACK_END_MISSING; + break; +#endif +/* + case TREXIO_JSON: + return trexio_json_read_$group_dset$(...); + break; +,*/ + default: + rc = TREXIO_FAILURE; /* Impossible case */ + break; + } + + if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc; + + if (rc == TREXIO_END) *buffer_size_read = eof_read_size; + + return rc; +} + #+end_src + + #+begin_src c :tangle read_buffered_front.c +trexio_exit_code +trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (size_max == NULL) return TREXIO_INVALID_ARG_2; + if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_read_$group_dset$_size(file, size_max); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_read_$group_dset$_size(file, size_max); + break; +#else + return TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + return trexio_json_read_ + break; +,*/ + default: + return TREXIO_FAILURE; /* Impossible case */ + } +} + #+end_src + + #+begin_src c :tangle read_buffered_front.c +trexio_exit_code +trexio_read_safe_$group_dset$ (trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, double* const dset_out, const int64_t dim_out) +{ + return trexio_read_$group_dset$(file, offset_file, buffer_size_read, dset_out); +} + #+end_src + + #+begin_src c :tangle write_buffered_front.c +trexio_exit_code +trexio_write_$group_dset$ (trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset == NULL) return TREXIO_INVALID_ARG_2; + + uint32_t rank = 1; + uint64_t dims[1] = {buffer_size}; + + assert(file->back_end < TREXIO_INVALID_BACK_END); + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_write_$group_dset$(file, offset_file, rank, dims, dset); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_write_$group_dset$(file, offset_file, rank, dims, dset); + break; +#else + return TREXIO_BACK_END_MISSING; + break; +#endif +/* + case TREXIO_JSON: + rc = trexio_json_read_ + break; +,*/ + } + + return TREXIO_FAILURE; +} + #+end_src + + #+begin_src c :tangle write_buffered_front.c +trexio_exit_code +trexio_write_safe_$group_dset$ (trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset_in, const int64_t dim_in) +{ + return trexio_write_$group_dset$(file, offset_file, buffer_size, dset_in); +} + #+end_src + + #+begin_src c :tangle has_buffered_front.c +trexio_exit_code +trexio_has_$group_dset$ (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + assert(file->back_end < TREXIO_INVALID_BACK_END); + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_has_$group_dset$(file); + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_$group_dset$(file); +#else + return TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + return trexio_json_has_ + break; +,*/ + } + + return TREXIO_FAILURE; +} + #+end_src + +*** Fortran interface + + The ~Fortran~ templates that provide an access to the ~C~ API calls from Fortran. + These templates are based on the use of ~iso_c_binding~. Pointers have to be passed by value. + + #+begin_src f90 :tangle write_buffered_front_fortran.f90 +interface + integer(trexio_exit_code) function trexio_write_$group_dset$(trex_file, & + offset_file, buffer_size, dset) bind(C) + use, intrinsic :: iso_c_binding + import + integer(c_int64_t), intent(in), value :: trex_file + integer(c_int64_t), intent(in), value :: offset_file + integer(c_int64_t), intent(in), value :: buffer_size + real(c_double), intent(in) :: dset(*) + end function trexio_write_$group_dset$ +end interface + +interface + integer(trexio_exit_code) function trexio_write_safe_$group_dset$ (trex_file, & + offset_file, buffer_size, & + dset, dset_size) bind(C) + use, intrinsic :: iso_c_binding + import + integer(c_int64_t), intent(in), value :: trex_file + integer(c_int64_t), intent(in), value :: offset_file + integer(c_int64_t), intent(in), value :: buffer_size + real(c_double), intent(in) :: dset(*) + integer(c_int64_t), intent(in), value :: dset_size + end function trexio_write_safe_$group_dset$ +end interface + #+end_src + + #+begin_src f90 :tangle read_buffered_front_fortran.f90 +interface + integer(trexio_exit_code) function trexio_read_safe_$group_dset$ (trex_file, & + offset_file, buffer_size, & + dset, dset_size) bind(C) + use, intrinsic :: iso_c_binding + import + integer(c_int64_t), intent(in), value :: trex_file + integer(c_int64_t), intent(in), value :: offset_file + integer(c_int64_t), intent(inout) :: buffer_size + real(c_double), intent(out) :: dset(*) + integer(c_int64_t), intent(in), value :: dset_size + end function trexio_read_safe_$group_dset$ +end interface + +interface + integer(trexio_exit_code) function trexio_read_$group_dset$(trex_file, & + offset_file, buffer_size, dset) bind(C) + use, intrinsic :: iso_c_binding + import + integer(c_int64_t), intent(in), value :: trex_file + integer(c_int64_t), intent(in), value :: offset_file + integer(c_int64_t), intent(inout) :: buffer_size + real(c_double), intent(out) :: dset(*) + end function trexio_read_$group_dset$ +end interface + +interface + integer(trexio_exit_code) function trexio_read_$group_dset$_size (trex_file, & + size_max) bind(C) + use, intrinsic :: iso_c_binding + import + integer(c_int64_t), intent(in), value :: trex_file + integer(c_int64_t), intent(out) :: size_max + end function trexio_read_$group_dset$_size +end interface + #+end_src + + #+begin_src f90 :tangle has_buffered_front_fortran.f90 +interface + integer(trexio_exit_code) function trexio_has_$group_dset$ (trex_file) bind(C) + use, intrinsic :: iso_c_binding + import + integer(c_int64_t), intent(in), value :: trex_file + end function trexio_has_$group_dset$ +end interface + #+end_src + +*** Python interface + + #+begin_src python :tangle write_buffered_front.py +def write_$group_dset$(trexio_file: File, offset_file: int, buffer_size: int, dset) -> None: + """Write the $group_dset$ in the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + + offset_file: int + The number of values to be skipped in the file when writing. + + buffer_size: int + The number of values to write in the file. + + dset: list OR numpy.ndarray + Array of $group_dset$ to be written. If array data type does not correspond to int64, the conversion is performed. + + Raises: + - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. + - Exception from some other error (e.g. RuntimeError). + """ + + if not isinstance(offset_file, int): + raise TypeError("offset_file argument has to be an integer.") + if not isinstance(buffer_size, int): + raise TypeError("buffer_size argument has to be an integer.") + if not isinstance(dset, (list, tuple, np.ndarray)): + raise TypeError("dset argument has to be an array (list, tuple or NumPy ndarray).") + + if isinstance(dset, np.ndarray) and not coefficients.dtype==np.float64: + # convert to float64 if input is in a different precision + dset_64 = np.float64(dset) + rc = pytr.trexio_write_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, buffer_size, dset_64) + else: + rc = pytr.trexio_write_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, buffer_size, dset) + + if rc != TREXIO_SUCCESS: + raise Error(rc) + #+end_src + + #+begin_src python :tangle read_buffered_front.py +def read_$group_dset$(trexio_file: File, offset_file: int, buffer_size: int) -> tuple: + """Read $group_dset$ from the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + + offset_file: int + The number of values to be skipped in the file when reading. + + buffer_size: int + The number of values to read from the file. + + Returns: + (dset, n_int_read, eof_flag) tuple where + - dset is the NumPy array [numpy.ndarray] with the default int64 precision; + - n_int_read [int] is the number of coefficients read from the trexio_file + (either strictly equal to buffer_size or less than buffer_size if EOF has been reached); + - eof_flag [bool] is True when EOF has been reached (i.e. when call to low-level pytrexio API returns TREXIO_END) + False otherwise. + + Raises: + - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. + - Exception from some other error (e.g. RuntimeError). + """ + + if not isinstance(offset_file, int): + raise TypeError("offset_file argument has to be an integer.") + if not isinstance(buffer_size, int): + raise TypeError("buffer_size argument has to be an integer.") + + + # read the number of values already in the file + det_num = read_$group_dset$_size(trexio_file) + + # additional modification needed to avoid allocating more memory than needed if EOF will be reached during read + overflow = offset_file + buffer_size - det_num + eof_flag = False + if overflow > 0: + verified_size = buffer_size - overflow + eof_flag = True + else: + verified_size = buffer_size + + # main call to the low-level (SWIG-wrapped) trexio_read function, which also requires the sizes of the output to be provided + # read_buf_size contains the number of elements being read from the file, useful when EOF has been reached + rc, n_int_read, dset = pytr.trexio_read_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, verified_size, verified_size) + + if rc != TREXIO_SUCCESS: + raise Error(rc) + if n_int_read == 0: + raise ValueError("No integrals have been read from the file.") + if dset is None: + raise ValueError("Returned NULL array from the low-level pytrexio API.") + + return (dset, n_int_read, eof_flag) + + +def read_$group_dset$_size(trexio_file) -> int: + """Read the number of elements stored in the TREXIO file. + + Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. + + Returns: + ~num~: int + Integer value of corresponding to the size of the $group_dset$ array from ~trexio_file~. + + Raises: + - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. + - Exception from some other error (e.g. RuntimeError). + """ + + rc, num = pytr.trexio_read_$group_dset$_size(trexio_file.pytrexio_s) + if rc != TREXIO_SUCCESS: + raise Error(rc) + + return num + #+end_src + + #+begin_src python :tangle has_buffered_front.py +def has_$group_dset$(trexio_file) -> bool: + """Check that $group_dset$ exists in the TREXIO file. + + Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. + + Returns: + True if the variable exists, False otherwise + + Raises: + - trexio.Error if TREXIO return code ~rc~ is TREXIO_FAILURE and prints the error message using string_of_error. + - Exception from some other error (e.g. RuntimeError). + """ + + rc = pytr.trexio_has_$group_dset$(trexio_file.pytrexio_s) + if rc == TREXIO_FAILURE: + raise Error(rc) + + return rc == TREXIO_SUCCESS + #+end_src + ** Templates for front end has/read/write a single string attribute *** Introduction @@ -4479,6 +4899,7 @@ def delete_$group$(trexio_file) -> None: if rc != TREXIO_SUCCESS: raise Error(rc) #+end_src + * Source code for the determinant part Storage of the determinants is a particular case, @@ -4488,31 +4909,21 @@ def delete_$group$(trexio_file) -> None: This section concerns API calls related to Slater determinants. - | Function name | Description | - |--------------------------------------------+----------------------------------------------------| - | ~trexio_has_determinant_coefficient~ | Check if an attribute exists in a file | - | ~trexio_has_determinant_list~ | Check if an attribute exists in a file | - | ~trexio_write_determinant_coefficient~ | Write an attribute | - | ~trexio_write_determinant_list~ | Write an attribute | - | ~trexio_read_determinant_coefficient~ | Read an attribute | - | ~trexio_read_determinant_list~ | Read an attribute | - | ~trexio_read_determinant_coefficient_size~ | Get the number of the coefficients | - | ~trexio_get_int64_num~ | Get the number of int64 bit fields per determinant | + | Function name | Description | + |---------------------------------+----------------------------------------------------| + | ~trexio_has_determinant_list~ | Check if an attribute exists in a file | + | ~trexio_write_determinant_list~ | Write an attribute | + | ~trexio_read_determinant_list~ | Read an attribute | + | ~trexio_get_int64_num~ | Get the number of int64 bit fields per determinant | *** C source code #+begin_src c :tangle hrw_determinant_front.h :exports none trexio_exit_code trexio_has_determinant_list(trexio_t* const file); -trexio_exit_code trexio_has_determinant_coefficient(trexio_t* const file); trexio_exit_code trexio_read_determinant_list(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, int64_t* const dset); trexio_exit_code trexio_read_safe_determinant_list(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, int64_t* const dset_out, const int64_t dim_out); -trexio_exit_code trexio_read_determinant_coefficient(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, double* const dset); -trexio_exit_code trexio_read_safe_determinant_coefficient(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, double* const dset_out, const int64_t dim_out); trexio_exit_code trexio_write_determinant_list(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int64_t* dset); trexio_exit_code trexio_write_safe_determinant_list(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int64_t* dset_in, const int64_t dim_in); -trexio_exit_code trexio_write_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset); -trexio_exit_code trexio_write_safe_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset_in, const int64_t dim_in); -trexio_exit_code trexio_read_determinant_coefficient_size(trexio_t* const file, int64_t* const size_max); trexio_exit_code trexio_get_int64_num(trexio_t* const file, int32_t* const num); #+end_src @@ -4595,99 +5006,10 @@ trexio_read_determinant_list (trexio_t* const file, const int64_t offset_file, i #+begin_src c :tangle read_determinant_front.c trexio_exit_code -trexio_read_determinant_coefficient (trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, double* const dset) -{ - - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (dset == NULL) return TREXIO_INVALID_ARG_2; - if (trexio_has_determinant_coefficient(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; - - trexio_exit_code rc; - - uint32_t rank = 1; - uint64_t det_size = (uint64_t) (*buffer_size_read); - uint64_t dims[1] = {det_size}; - - // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered - int64_t eof_read_size = 0L; - - switch (file->back_end) { - - case TREXIO_TEXT: - rc = trexio_text_read_determinant_coefficient(file, offset_file, rank, dims, &eof_read_size, dset); - break; - - case TREXIO_HDF5: -#ifdef HAVE_HDF5 - rc = trexio_hdf5_read_determinant_coefficient(file, offset_file, rank, dims, &eof_read_size, dset); - break; -#else - rc = TREXIO_BACK_END_MISSING; - break; -#endif -/* - case TREXIO_JSON: - return trexio_json_read_$group_dset$(...); - break; -,*/ - default: - rc = TREXIO_FAILURE; /* Impossible case */ - break; - } - - if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc; - - if (rc == TREXIO_END) *buffer_size_read = eof_read_size; - - return rc; -} - #+end_src - - #+begin_src c :tangle read_determinant_front.c -trexio_exit_code -trexio_read_determinant_coefficient_size(trexio_t* const file, int64_t* const size_max) -{ - - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (size_max == NULL) return TREXIO_INVALID_ARG_2; - if (trexio_has_determinant_coefficient(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; - - switch (file->back_end) { - - case TREXIO_TEXT: - return trexio_text_read_determinant_coefficient_size(file, size_max); - break; - - case TREXIO_HDF5: -#ifdef HAVE_HDF5 - return trexio_hdf5_read_determinant_coefficient_size(file, size_max); - break; -#else - return TREXIO_BACK_END_MISSING; -#endif -/* - case TREXIO_JSON: - return trexio_json_read_ - break; -,*/ - default: - return TREXIO_FAILURE; /* Impossible case */ - } -} - #+end_src - - #+begin_src c :tangle read_determinant_front.c -trexio_exit_code trexio_read_safe_determinant_list (trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, int64_t* const dset_out, const int64_t dim_out) { return trexio_read_determinant_list(file, offset_file, buffer_size_read, dset_out); } - -trexio_exit_code -trexio_read_safe_determinant_coefficient (trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, double* const dset_out, const int64_t dim_out) -{ - return trexio_read_determinant_coefficient(file, offset_file, buffer_size_read, dset_out); -} #+end_src #+begin_src c :tangle write_determinant_front.c @@ -4758,54 +5080,10 @@ trexio_write_determinant_list (trexio_t* const file, const int64_t offset_file, #+begin_src c :tangle write_determinant_front.c trexio_exit_code -trexio_write_determinant_coefficient (trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset) -{ - - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (dset == NULL) return TREXIO_INVALID_ARG_2; - - uint32_t rank = 1; - uint64_t dims[1] = {buffer_size}; - - assert(file->back_end < TREXIO_INVALID_BACK_END); - - switch (file->back_end) { - - case TREXIO_TEXT: - return trexio_text_write_determinant_coefficient(file, offset_file, rank, dims, dset); - break; - - case TREXIO_HDF5: -#ifdef HAVE_HDF5 - return trexio_hdf5_write_determinant_coefficient(file, offset_file, rank, dims, dset); - break; -#else - return TREXIO_BACK_END_MISSING; - break; -#endif -/* - case TREXIO_JSON: - rc = trexio_json_read_ - break; -,*/ - } - - return TREXIO_FAILURE; -} - #+end_src - - #+begin_src c :tangle write_determinant_front.c -trexio_exit_code trexio_write_safe_determinant_list (trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int64_t* dset_in, const int64_t dim_in) { return trexio_write_determinant_list(file, offset_file, buffer_size, dset_in); } - -trexio_exit_code -trexio_write_safe_determinant_coefficient (trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const double* dset_in, const int64_t dim_in) -{ - return trexio_write_determinant_coefficient(file, offset_file, buffer_size, dset_in); -} #+end_src #+begin_src c :tangle has_determinant_front.c @@ -4835,35 +5113,6 @@ trexio_has_determinant_list (trexio_t* const file) ,*/ } - return TREXIO_FAILURE; -} - -trexio_exit_code -trexio_has_determinant_coefficient (trexio_t* const file) -{ - - if (file == NULL) return TREXIO_INVALID_ARG_1; - - assert(file->back_end < TREXIO_INVALID_BACK_END); - - switch (file->back_end) { - - case TREXIO_TEXT: - return trexio_text_has_determinant_coefficient(file); - - case TREXIO_HDF5: -#ifdef HAVE_HDF5 - return trexio_hdf5_has_determinant_coefficient(file); -#else - return TREXIO_BACK_END_MISSING; -#endif -/* - case TREXIO_JSON: - return trexio_json_has_ - break; -,*/ - } - return TREXIO_FAILURE; } #+end_src @@ -4899,32 +5148,6 @@ interface integer(c_int64_t), intent(in), value :: list_size end function trexio_write_safe_determinant_list end interface - -interface - integer(trexio_exit_code) function trexio_write_determinant_coefficient(trex_file, & - offset_file, buffer_size, coefficient) bind(C) - use, intrinsic :: iso_c_binding - import - integer(c_int64_t), intent(in), value :: trex_file - integer(c_int64_t), intent(in), value :: offset_file - integer(c_int64_t), intent(in), value :: buffer_size - real(c_double), intent(in) :: coefficient(*) - end function trexio_write_determinant_coefficient -end interface - -interface - integer(trexio_exit_code) function trexio_write_safe_determinant_coefficient (trex_file, & - offset_file, buffer_size, & - coefficient, coefficient_size) bind(C) - use, intrinsic :: iso_c_binding - import - integer(c_int64_t), intent(in), value :: trex_file - integer(c_int64_t), intent(in), value :: offset_file - integer(c_int64_t), intent(in), value :: buffer_size - real(c_double), intent(in) :: coefficient(*) - integer(c_int64_t), intent(in), value :: coefficient_size - end function trexio_write_safe_determinant_coefficient -end interface #+end_src #+begin_src f90 :tangle read_determinant_front_fortran.f90 @@ -4953,42 +5176,6 @@ interface integer(c_int64_t), intent(in), value :: list_size end function trexio_read_safe_determinant_list end interface - -interface - integer(trexio_exit_code) function trexio_read_safe_determinant_coefficient (trex_file, & - offset_file, buffer_size, & - coefficient, coefficient_size) bind(C) - use, intrinsic :: iso_c_binding - import - integer(c_int64_t), intent(in), value :: trex_file - integer(c_int64_t), intent(in), value :: offset_file - integer(c_int64_t), intent(inout) :: buffer_size - real(c_double), intent(out) :: coefficient(*) - integer(c_int64_t), intent(in), value :: coefficient_size - end function trexio_read_safe_determinant_coefficient -end interface - -interface - integer(trexio_exit_code) function trexio_read_determinant_coefficient(trex_file, & - offset_file, buffer_size, coefficient) bind(C) - use, intrinsic :: iso_c_binding - import - integer(c_int64_t), intent(in), value :: trex_file - integer(c_int64_t), intent(in), value :: offset_file - integer(c_int64_t), intent(inout) :: buffer_size - real(c_double), intent(out) :: coefficient(*) - end function trexio_read_determinant_coefficient -end interface - -interface - integer(trexio_exit_code) function trexio_read_determinant_coefficient_size (trex_file, & - size_max) bind(C) - use, intrinsic :: iso_c_binding - import - integer(c_int64_t), intent(in), value :: trex_file - integer(c_int64_t), intent(out) :: size_max - end function trexio_read_determinant_coefficient_size -end interface #+end_src #+begin_src f90 :tangle has_determinant_front_fortran.f90 @@ -5000,14 +5187,6 @@ interface end function trexio_has_determinant_list end interface -interface - integer(trexio_exit_code) function trexio_has_determinant_coefficient (trex_file) bind(C) - use, intrinsic :: iso_c_binding - import - integer(c_int64_t), intent(in), value :: trex_file - end function trexio_has_determinant_coefficient -end interface - interface integer(trexio_exit_code) function trexio_get_int64_num (trex_file, num) bind(C) use, intrinsic :: iso_c_binding @@ -5083,46 +5262,6 @@ def write_determinant_list(trexio_file: File, offset_file: int, buffer_size: int if rc != TREXIO_SUCCESS: raise Error(rc) - - -def write_determinant_coefficient(trexio_file: File, offset_file: int, buffer_size: int, coefficients: list) -> None: - """Write the determinant coefficients in the TREXIO file. - - Parameters: - - trexio_file: - TREXIO File object. - - offset_file: int - The number of coefficients to be skipped in the file when writing. - - buffer_size: int - The number of coefficients to write in the file. - - coefficients: list OR numpy.ndarray - Array of determinant_coefficient to be written. If array data type does not correspond to int64, the conversion is performed. - - Raises: - - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. - - Exception from some other error (e.g. RuntimeError). - """ - - if not isinstance(offset_file, int): - raise TypeError("offset_file argument has to be an integer.") - if not isinstance(buffer_size, int): - raise TypeError("buffer_size argument has to be an integer.") - if not isinstance(coefficients, (list, tuple, np.ndarray)): - raise TypeError("coefficients argument has to be an array (list, tuple or NumPy ndarray).") - - if isinstance(coefficients, np.ndarray) and not coefficients.dtype==np.float64: - # convert to float64 if input is in a different precision - coefficients_64 = np.float64(coefficients) - rc = pytr.trexio_write_safe_determinant_coefficient(trexio_file.pytrexio_s, offset_file, buffer_size, coefficients_64) - else: - rc = pytr.trexio_write_safe_determinant_coefficient(trexio_file.pytrexio_s, offset_file, buffer_size, coefficients) - - if rc != TREXIO_SUCCESS: - raise Error(rc) #+end_src #+begin_src python :tangle read_determinant_front.py @@ -5192,88 +5331,6 @@ def read_determinant_list(trexio_file: File, offset_file: int, buffer_size: int) return (dets_reshaped, n_int_read, eof_flag) -def read_determinant_coefficient(trexio_file: File, offset_file: int, buffer_size: int) -> tuple: - """Read determinant_coefficient from the TREXIO file. - - Parameters: - - trexio_file: - TREXIO File object. - - offset_file: int - The number of coefficient to be skipped in the file when reading. - - buffer_size: int - The number of coefficients to read from the file. - - Returns: - (coefficients, n_int_read, eof_flag) tuple where - - coefficients are NumPy arrays [numpy.ndarray] with the default int64 precision; - - n_int_read [int] is the number of coefficients read from the trexio_file - (either strictly equal to buffer_size or less than buffer_size if EOF has been reached); - - eof_flag [bool] is True when EOF has been reached (i.e. when call to low-level pytrexio API returns TREXIO_END) - False otherwise. - - Raises: - - trexio.Error if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message. - - Exception from some other error (e.g. RuntimeError). - """ - - if not isinstance(offset_file, int): - raise TypeError("offset_file argument has to be an integer.") - if not isinstance(buffer_size, int): - raise TypeError("buffer_size argument has to be an integer.") - - - # read the number of determinants already in the file - det_num = read_determinant_coefficient_size(trexio_file) - - # additional modification needed to avoid allocating more memory than needed if EOF will be reached during read - overflow = offset_file + buffer_size - det_num - eof_flag = False - if overflow > 0: - verified_size = buffer_size - overflow - eof_flag = True - else: - verified_size = buffer_size - - # main call to the low-level (SWIG-wrapped) trexio_read function, which also requires the sizes of the output to be provided - # read_buf_size contains the number of elements being read from the file, useful when EOF has been reached - rc, n_int_read, coefficients = pytr.trexio_read_safe_determinant_coefficient(trexio_file.pytrexio_s, - offset_file, - verified_size, - verified_size) - if rc != TREXIO_SUCCESS: - raise Error(rc) - if n_int_read == 0: - raise ValueError("No integrals have been read from the file.") - if coefficients is None: - raise ValueError("Returned NULL array from the low-level pytrexio API.") - - return (coefficients, n_int_read, eof_flag) - - -def read_determinant_coefficient_size(trexio_file) -> int: - """Read the number of determinant coefficients stored in the TREXIO file. - - Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. - - Returns: - ~num~: int - Integer value of corresponding to the size of the determinant_coefficient array from ~trexio_file~. - - Raises: - - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. - - Exception from some other error (e.g. RuntimeError). - """ - - rc, num = pytr.trexio_read_determinant_coefficient_size(trexio_file.pytrexio_s) - if rc != TREXIO_SUCCESS: - raise Error(rc) - - return num - - def get_int64_num(trexio_file) -> int: """Compute the number of int64 bit fields corresponding to the TREXIO file. @@ -5314,26 +5371,6 @@ def has_determinant_list(trexio_file) -> bool: raise Error(rc) return rc == TREXIO_SUCCESS - - -def has_determinant_coefficient(trexio_file) -> bool: - """Check that determinant_coefficient exists in the TREXIO file. - - Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. - - Returns: - True if the variable exists, False otherwise - - Raises: - - trexio.Error if TREXIO return code ~rc~ is TREXIO_FAILURE and prints the error message using string_of_error. - - Exception from some other error (e.g. RuntimeError). - """ - - rc = pytr.trexio_has_determinant_coefficient(trexio_file.pytrexio_s) - if rc == TREXIO_FAILURE: - raise Error(rc) - - return rc == TREXIO_SUCCESS #+end_src * General helper functions diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index 989fb67..494dace 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -670,6 +670,173 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file) } #+end_src +* Template for HDF5 has/read/write a dataset of buffered vectors + + Chunked I/O in HDF5 for ~buffered~ data. + + #+begin_src c :tangle hrw_buffered_hdf5.h :exports none +trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file); +trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, double* const dset); +trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const double* dset); +trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max); + #+end_src + + #+begin_src c :tangle read_buffered_hdf5.c +trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, + const int64_t offset_file, + const uint32_t rank, + const uint64_t* dims, + int64_t* const eof_read_size, + double* const dset) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; + if (dset == NULL) return TREXIO_INVALID_ARG_6; + + char dset_dset_name[256]; + memset(dset_dset_name, 0, sizeof(dset_dset_name)); + const int32_t trexio_state = $with_state$ ; + + if (trexio_state != 0) { + sprintf(dset_dset_name, "$group_dset$_state_%" PRId32, trexio_state); + } else { + strncpy(dset_dset_name, "$group_dset$", 256); + } + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + hsize_t offset[1] = {(hsize_t) offset_file}; + hsize_t count[1] = {(hsize_t) dims[0]}; + + /* Attempt to read values (if EOF -> eof_read_size is modified with the number of elements read and return code is TREXIO_END) + 0 argument below is requires to skip internal treatment specific to sparse indices (i.e. their de-compression).*/ + return trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_dset_name, 1, offset, count, eof_read_size, 0, dset); +} + #+end_src + + #+begin_src c :tangle write_buffered_hdf5.c +trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, + const int64_t offset_file, + const uint32_t rank, + const uint64_t* dims, + const double* dset) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset == NULL) return TREXIO_INVALID_ARG_5; + + char dset_dset_name[256]; + memset(dset_dset_name, 0, sizeof(dset_dset_name)); + const int32_t trexio_state = $with_state$ ; + + if (trexio_state != 0) { + sprintf(dset_dset_name, "$group_dset$_state_%" PRId32, trexio_state); + } else { + strncpy(dset_dset_name, "$group_dset$", 256); + } + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + hid_t dtype = H5T_NATIVE_DOUBLE; + + /* Arrays of chunk dims that will be used for chunking the dataset */ + const hsize_t chunk_dims[1] = {(hsize_t) dims[0]}; + + trexio_exit_code rc_write = TREXIO_FAILURE; + /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */ + if ( H5LTfind_dataset(f->$group$_group, dset_dset_name) != 1 ) { + /* If the file does not exist -> create it and write */ + + /* Create chunked dataset with dtype datatype and write indices into it */ + rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_dset_name, dtype, chunk_dims, dset); + if (rc_write != TREXIO_SUCCESS) return rc_write; + + } else { + /* If the file exists -> open it and write */ + hsize_t offset_data[1] = {(hsize_t) offset_file}; + + /* Create chunked dataset with dtype datatype and write indices into it */ + rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_dset_name, dtype, chunk_dims, offset_data, dset); + if (rc_write != TREXIO_SUCCESS) return rc_write; + + } + + return TREXIO_SUCCESS; +} + +trexio_exit_code +trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (size_max == NULL) return TREXIO_INVALID_ARG_2; + + char dset_dset_name[128]; + memset(dset_dset_name, 0, sizeof(dset_dset_name)); + const int32_t trexio_state = file->state; + + if (trexio_state != 0) { + sprintf(dset_dset_name, "$group_dset$_state_%" PRId32, trexio_state); + } else { + strncpy(dset_dset_name, "$group_dset$", 24); + } + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + hid_t dset_id = H5Dopen(f->$group$_group, dset_dset_name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + hid_t fspace_id = H5Dget_space(dset_id); + if (fspace_id < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // allocate space for the dimensions to be read + hsize_t ddims[1] = {0}; + + // get the rank and dimensions of the dataset + H5Sget_simple_extent_dims(fspace_id, ddims, NULL); + + H5Dclose(dset_id); + H5Sclose(fspace_id); + + *size_max = (int64_t) ddims[0]; + + return TREXIO_SUCCESS; +} + #+end_src + + #+begin_src c :tangle has_buffered_hdf5.c +trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + if (f->$group$_group == (hsize_t) 0) return TREXIO_HAS_NOT; + + char dset_dset_name[128]; + memset(dset_dset_name, 0, sizeof(dset_dset_name)); + const int32_t trexio_state = file->state; + + if (trexio_state != 0) { + sprintf(dset_dset_name, "$group_dset$_state_%" PRId32, trexio_state); + } else { + strncpy(dset_dset_name, "$group_dset$", 24); + } + + herr_t status = H5LTfind_dataset(f->$group$_group, dset_dset_name); + /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ + if (status == 1){ + return TREXIO_SUCCESS; + } else if (status == 0) { + return TREXIO_HAS_NOT; + } else { + return TREXIO_FAILURE; + } +} + #+end_src + * Template for HDF5 has/read/write a dataset of strings #+begin_src c :tangle hrw_dset_str_hdf5.h :exports none @@ -1045,10 +1212,6 @@ trexio_hdf5_delete_$group$ (trexio_t* const file) trexio_exit_code trexio_hdf5_has_determinant_list(trexio_t* const file); trexio_exit_code trexio_hdf5_read_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, int64_t* const list); trexio_exit_code trexio_hdf5_write_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const int64_t* list); -trexio_exit_code trexio_hdf5_has_determinant_coefficient(trexio_t* const file); -trexio_exit_code trexio_hdf5_read_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, double* const coeff); -trexio_exit_code trexio_hdf5_write_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const double* coeff); -trexio_exit_code trexio_hdf5_read_determinant_coefficient_size(trexio_t* const file, int64_t* const size_max); #+end_src #+begin_src c :tangle read_determinant_hdf5.c @@ -1074,37 +1237,6 @@ trexio_exit_code trexio_hdf5_read_determinant_list(trexio_t* const file, 0 argument below is requires to skip internal treatment specific to sparse indices (i.e. their de-compression).*/ return trexio_hdf5_open_read_dset_sparse(f->determinant_group, dset_det_name, (uint32_t) dims[1], offset, count, eof_read_size, 0, list); } - -trexio_exit_code trexio_hdf5_read_determinant_coefficient(trexio_t* const file, - const int64_t offset_file, - const uint32_t rank, - const uint64_t* dims, - int64_t* const eof_read_size, - double* const coeff) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; - if (coeff == NULL) return TREXIO_INVALID_ARG_6; - - char dset_coeff_name[128]; - memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); - } else { - strncpy(dset_coeff_name, "determinant_coefficient", 24); - } - - const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; - - hsize_t offset[1] = {(hsize_t) offset_file}; - hsize_t count[1] = {(hsize_t) dims[0]}; - - /* Attempt to read determinants (if EOF -> eof_read_size is modified with the number of elements read and return code is TREXIO_END) - 0 argument below is requires to skip internal treatment specific to sparse indices (i.e. their de-compression).*/ - return trexio_hdf5_open_read_dset_sparse(f->determinant_group, dset_coeff_name, 1, offset, count, eof_read_size, 0, coeff); -} #+end_src #+begin_src c :tangle write_determinant_hdf5.c @@ -1150,97 +1282,6 @@ trexio_exit_code trexio_hdf5_write_determinant_list(trexio_t* const file, return TREXIO_SUCCESS; } - -trexio_exit_code trexio_hdf5_write_determinant_coefficient(trexio_t* const file, - const int64_t offset_file, - const uint32_t rank, - const uint64_t* dims, - const double* coeff) -{ - - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (coeff == NULL) return TREXIO_INVALID_ARG_5; - - char dset_coeff_name[128]; - memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); - } else { - strncpy(dset_coeff_name, "determinant_coefficient", 24); - } - - trexio_hdf5_t* f = (trexio_hdf5_t*) file; - - hid_t det_dtype = H5T_NATIVE_DOUBLE; - - /* Arrays of chunk dims that will be used for chunking the dataset */ - const hsize_t chunk_dims[1] = {(hsize_t) dims[0]}; - - trexio_exit_code rc_write = TREXIO_FAILURE; - /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */ - if ( H5LTfind_dataset(f->determinant_group, dset_coeff_name) != 1 ) { - /* If the file does not exist -> create it and write */ - - /* Create chunked dataset with det_dtype datatype and write indices into it */ - rc_write = trexio_hdf5_create_write_dset_sparse(f->determinant_group, dset_coeff_name, det_dtype, chunk_dims, coeff); - if (rc_write != TREXIO_SUCCESS) return rc_write; - - } else { - /* If the file exists -> open it and write */ - hsize_t offset_data[1] = {(hsize_t) offset_file}; - - /* Create chunked dataset with det_dtype datatype and write indices into it */ - rc_write = trexio_hdf5_open_write_dset_sparse(f->determinant_group, dset_coeff_name, det_dtype, chunk_dims, offset_data, coeff); - if (rc_write != TREXIO_SUCCESS) return rc_write; - - } - - return TREXIO_SUCCESS; -} - -trexio_exit_code -trexio_hdf5_read_determinant_coefficient_size (trexio_t* const file, int64_t* const size_max) -{ - - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (size_max == NULL) return TREXIO_INVALID_ARG_2; - - char dset_coeff_name[128]; - memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); - } else { - strncpy(dset_coeff_name, "determinant_coefficient", 24); - } - - const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; - - hid_t dset_id = H5Dopen(f->determinant_group, dset_coeff_name, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; - - hid_t fspace_id = H5Dget_space(dset_id); - if (fspace_id < 0) { - H5Dclose(dset_id); - return TREXIO_INVALID_ID; - } - - // allocate space for the dimensions to be read - hsize_t ddims[1] = {0}; - - // get the rank and dimensions of the dataset - H5Sget_simple_extent_dims(fspace_id, ddims, NULL); - - H5Dclose(dset_id); - H5Sclose(fspace_id); - - *size_max = (int64_t) ddims[0]; - - return TREXIO_SUCCESS; -} #+end_src #+begin_src c :tangle has_determinant_hdf5.c @@ -1261,34 +1302,6 @@ trexio_exit_code trexio_hdf5_has_determinant_list(trexio_t* const file) return TREXIO_FAILURE; } } - -trexio_exit_code trexio_hdf5_has_determinant_coefficient(trexio_t* const file) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - - trexio_hdf5_t* f = (trexio_hdf5_t*) file; - if (f->determinant_group == (hsize_t) 0) return TREXIO_HAS_NOT; - - char dset_coeff_name[128]; - memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); - } else { - strncpy(dset_coeff_name, "determinant_coefficient", 24); - } - - herr_t status = H5LTfind_dataset(f->determinant_group, dset_coeff_name); - /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ - if (status == 1){ - return TREXIO_SUCCESS; - } else if (status == 0) { - return TREXIO_HAS_NOT; - } else { - return TREXIO_FAILURE; - } -} #+end_src * Helper functions diff --git a/src/templates_text/build.sh b/src/templates_text/build.sh index fccd3d2..93732f0 100644 --- a/src/templates_text/build.sh +++ b/src/templates_text/build.sh @@ -30,23 +30,27 @@ cat populated/pop_has_dset_str_text.c >> trexio_text.c cat populated/pop_has_dset_sparse_text.c >> trexio_text.c cat populated/pop_has_attr_num_text.c >> trexio_text.c cat populated/pop_has_attr_str_text.c >> trexio_text.c +cat populated/pop_has_buffered_text.c >> trexio_text.c cat populated/pop_read_dset_data_text.c >> trexio_text.c cat populated/pop_read_dset_str_text.c >> trexio_text.c cat populated/pop_read_dset_sparse_text.c >> trexio_text.c cat populated/pop_read_attr_str_text.c >> trexio_text.c cat populated/pop_read_attr_num_text.c >> trexio_text.c +cat populated/pop_read_buffered_text.c >> trexio_text.c cat populated/pop_write_dset_data_text.c >> trexio_text.c cat populated/pop_write_dset_str_text.c >> trexio_text.c cat populated/pop_write_dset_sparse_text.c >> trexio_text.c cat populated/pop_write_attr_str_text.c >> trexio_text.c cat populated/pop_write_attr_num_text.c >> trexio_text.c +cat populated/pop_write_buffered_text.c >> trexio_text.c cat populated/pop_hrw_dset_data_text.h >> trexio_text.h cat populated/pop_hrw_dset_str_text.h >> trexio_text.h cat populated/pop_hrw_dset_sparse_text.h >> trexio_text.h cat populated/pop_hrw_attr_num_text.h >> trexio_text.h cat populated/pop_hrw_attr_str_text.h >> trexio_text.h +cat populated/pop_hrw_buffered_text.h >> trexio_text.h cat suffix_text.h >> trexio_text.h diff --git a/src/templates_text/templator_text.org b/src/templates_text/templator_text.org index c59e81c..7a08d72 100644 --- a/src/templates_text/templator_text.org +++ b/src/templates_text/templator_text.org @@ -1333,6 +1333,272 @@ trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file) } #+end_src +* Template for has/read/write a buffered vector + + Each array is stored in a separate =.txt= file due to the fact that buffered I/O has to be decoupled + from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write the data + to prevent memory overflow. Chunks have a given ~int64_t size~. + Size specifies the number of vector elements to be written. + + #+begin_src c :tangle hrw_buffered_text.h :exports none +trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file); +trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, double* const dset); +trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const double* dset); +trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max); + #+end_src + + #+begin_src c :tangle read_buffered_text.c +trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, + const int64_t offset_file, + const uint32_t rank, + const uint64_t* dims, + int64_t* const eof_read_size, + double* const dset) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; + if (dset == NULL) return TREXIO_INVALID_ARG_6; + + char file_name[256]; + memset(file_name, 0, sizeof(file_name)); + const int32_t trexio_state = $with_state$; + + if (trexio_state != 0) { + sprintf(file_name, "/$group_dset$_state_%" PRId32 ".txt", trexio_state); + } else { + strncpy(file_name, "/$group_dset$.txt", 256); + } + + /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ + char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen(file_name)); + + /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */ + FILE* f = fopen(file_full_path, "r"); + if (f == NULL) return TREXIO_FILE_ERROR; + + /* Specify the line length in order to offset properly. + Each double value 24 elements + one newline char. + ,*/ + uint64_t line_length = 25UL; + + /* Offset in the file according to the provided value of offset_file and optimal line_length */ + fseek(f, (long) offset_file * line_length, SEEK_SET); + + /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */ + int rc; + char buffer[64]; + uint32_t buf_size = sizeof(buffer); + /* Counter for number of elements beind processed */ + uint64_t count = 0UL; + + for (uint64_t i=0UL; i < dims[0]; ++i) { + + memset(buffer, 0, buf_size); + if (fgets(buffer, buf_size-1, f) == NULL){ + + fclose(f); + ,*eof_read_size = count; + return TREXIO_END; + + } else { + + rc = sscanf(buffer, "%lf", dset + i); + if (rc <= 0) { + fclose(f); + return TREXIO_FAILURE; + } + count += 1UL; + + } + } + + /* Close the TXT file */ + rc = fclose(f); + if (rc != 0) return TREXIO_FILE_ERROR; + + return TREXIO_SUCCESS; +} + +trexio_exit_code +trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (size_max == NULL) return TREXIO_INVALID_ARG_2; + + char file_name[256]; + memset(file_name, 0, sizeof(file_name)); + const int32_t trexio_state = $with_state$; + + if (trexio_state != 0) { + sprintf(file_name, "/$group_dset$_state_%" PRId32 ".txt.size", trexio_state); + } else { + strncpy(file_name, "/$group_dset$.txt.size", 256); + } + + /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ + char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen(file_name)); + + /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */ + FILE* f = fopen(file_full_path, "r"); + if (f == NULL) return TREXIO_FILE_ERROR; + + /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */ + int rc; + int64_t size_item, size_accum=0L; + + /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */ + while(fscanf(f, "%" SCNd64, &size_item) != EOF) { + /* Check that summation will not overflow the int64_t value */ + if (INT64_MAX - size_accum > size_item) { + size_accum += size_item; + } else { + fclose(f); + ,*size_max = -1L; + return TREXIO_INT_SIZE_OVERFLOW; + } + } + + /* Close the TXT file */ + rc = fclose(f); + if (rc != 0) return TREXIO_FILE_ERROR; + + /* Overwrite the value at the input address and return TREXIO_SUCCESS */ + ,*size_max = size_accum; + return TREXIO_SUCCESS; +} + #+end_src + + #+begin_src c :tangle write_buffered_text.c +trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, + const int64_t offset_file, + const uint32_t rank, + const uint64_t* dims, + const double* dset) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (dset == NULL) return TREXIO_INVALID_ARG_5; + + char file_name[256]; + memset(file_name, 0, sizeof(file_name)); + const int32_t trexio_state = $with_state$; + + if (trexio_state != 0) { + sprintf(file_name, "/$group_dset$_state_%" PRId32 ".txt", trexio_state); + } else { + strncpy(file_name, "/$group_dset$.txt", 256); + } + + /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ + char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen(file_name)); + + /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */ + FILE* f = fopen(file_full_path, "a"); + if (f == NULL) return TREXIO_FILE_ERROR; + + /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */ + int rc; + for (uint64_t i=0UL; i < dims[0]; ++i) { + + rc = fprintf(f, "%24.16e\n", *(dset+ i)); + if (rc <= 0) { + fclose(f); + return TREXIO_FAILURE; + } + + } + + /* Close the TXT file */ + rc = fclose(f); + if (rc != 0) return TREXIO_FILE_ERROR; + + /* Append .size to the file_full_path in order to write additional info about the written buffer of data */ + strncat(file_full_path, ".size", 6); + + /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */ + FILE *f_wSize = fopen(file_full_path, "a"); + if (f_wSize == NULL) return TREXIO_FILE_ERROR; + + /* Write the buffer_size */ + rc = fprintf(f_wSize, "%" PRIu64 "\n", dims[0]); + if (rc <= 0) { + fclose(f_wSize); + return TREXIO_FAILURE; + } + + /* Close the TXT file */ + rc = fclose(f_wSize); + if (rc != 0) return TREXIO_FILE_ERROR; + + /* Additional part for the trexio_text_has_group to work */ + const char group_file_name[256] = "/$group$.txt"; + + memset (file_full_path, 0, TREXIO_MAX_FILENAME_LENGTH); + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, group_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen(group_file_name)); + + bool file_exists = trexio_text_file_exists(file_full_path); + + /* Create an empty file for the trexio_text_has_group to work */ + if (!file_exists) { + FILE *fp = fopen(file_full_path, "ab+"); + fclose(fp); + } + + /* Exit upon success */ + return TREXIO_SUCCESS; +} + + #+end_src + + #+begin_src c :tangle has_buffered_text.c +trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + + char file_name[256]; + memset(file_name, 0, sizeof(file_name)); + const int32_t trexio_state = $with_state$; + + if (trexio_state != 0) { + sprintf(file_name, "/$group_dset$_state_%" PRId32 ".txt", trexio_state); + } else { + strncpy(file_name, "/$group_dset$.txt", 256); + } + + /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ + char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen(file_name)); + + /* Check the return code of access function to determine whether the file with data exists or not */ + if (access(file_full_path, F_OK) == 0){ + return TREXIO_SUCCESS; + } else { + return TREXIO_HAS_NOT; + } +} + #+end_src * Template for text delete a group (UNSAFE mode) #+begin_src c :tangle delete_group_text.h :exports none @@ -1374,10 +1640,6 @@ trexio_text_delete_$group$ (trexio_t* const file) trexio_exit_code trexio_text_has_determinant_list(trexio_t* const file); trexio_exit_code trexio_text_read_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, int64_t* const list); trexio_exit_code trexio_text_write_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const int64_t* list); -trexio_exit_code trexio_text_has_determinant_coefficient(trexio_t* const file); -trexio_exit_code trexio_text_read_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, double* const coeff); -trexio_exit_code trexio_text_write_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const double* coeff); -trexio_exit_code trexio_text_read_determinant_coefficient_size(trexio_t* const file, int64_t* const size_max); #+end_src #+begin_src c :tangle read_determinant_text.c @@ -1462,136 +1724,6 @@ trexio_exit_code trexio_text_read_determinant_list(trexio_t* const file, return TREXIO_SUCCESS; } - -trexio_exit_code trexio_text_read_determinant_coefficient(trexio_t* const file, - const int64_t offset_file, - const uint32_t rank, - const uint64_t* dims, - int64_t* const eof_read_size, - double* const coeff) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; - if (coeff == NULL) return TREXIO_INVALID_ARG_6; - - char coeff_file_name[256]; - memset(coeff_file_name, 0, sizeof(coeff_file_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(coeff_file_name, "/determinant_coefficient_state_%" PRId32 ".txt", trexio_state); - } else { - strncpy(coeff_file_name, "/determinant_coefficient.txt", 32); - } - - /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ - char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; - /* Copy directory name in file_full_path */ - strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); - /* Append name of the file with sparse data */ - strncat (file_full_path, coeff_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(coeff_file_name)); - - /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */ - FILE* f = fopen(file_full_path, "r"); - if (f == NULL) return TREXIO_FILE_ERROR; - - /* Specify the line length in order to offset properly. - Each double value 24 elements + one newline char. - ,*/ - uint64_t line_length = 25UL; - - /* Offset in the file according to the provided value of offset_file and optimal line_length */ - fseek(f, (long) offset_file * line_length, SEEK_SET); - - /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */ - int rc; - /* Declare fixed buffer which will be used to read the determinant string */ - char buffer[64]; - uint32_t buf_size = sizeof(buffer); - /* Counter for number of elements beind processed */ - uint64_t count = 0UL; - - for (uint64_t i=0UL; i < dims[0]; ++i) { - - memset(buffer, 0, buf_size); - if (fgets(buffer, buf_size-1, f) == NULL){ - - fclose(f); - ,*eof_read_size = count; - return TREXIO_END; - - } else { - - rc = sscanf(buffer, "%lf", coeff + i); - if (rc <= 0) { - fclose(f); - return TREXIO_FAILURE; - } - count += 1UL; - - } - } - - /* Close the TXT file */ - rc = fclose(f); - if (rc != 0) return TREXIO_FILE_ERROR; - - return TREXIO_SUCCESS; -} - -trexio_exit_code -trexio_text_read_determinant_coefficient_size(trexio_t* const file, int64_t* const size_max) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (size_max == NULL) return TREXIO_INVALID_ARG_2; - - char coeff_file_name[256]; - memset(coeff_file_name, 0, sizeof(coeff_file_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(coeff_file_name, "/determinant_coefficient_state_%" PRId32 ".txt.size", trexio_state); - } else { - strncpy(coeff_file_name, "/determinant_coefficient.txt.size", 64); - } - - /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ - char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; - /* Copy directory name in file_full_path */ - strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); - /* Append name of the file with sparse data */ - strncat (file_full_path, coeff_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(coeff_file_name)); - - /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */ - FILE* f = fopen(file_full_path, "r"); - if (f == NULL) return TREXIO_FILE_ERROR; - - /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */ - int rc; - int64_t size_item, size_accum=0L; - - /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */ - while(fscanf(f, "%" SCNd64, &size_item) != EOF) { - /* Check that summation will not overflow the int64_t value */ - if (INT64_MAX - size_accum > size_item) { - size_accum += size_item; - } else { - fclose(f); - ,*size_max = -1L; - return TREXIO_INT_SIZE_OVERFLOW; - } - } - - /* Close the TXT file */ - rc = fclose(f); - if (rc != 0) return TREXIO_FILE_ERROR; - - /* Overwrite the value at the input address and return TREXIO_SUCCESS */ - ,*size_max = size_accum; - return TREXIO_SUCCESS; -} #+end_src #+begin_src c :tangle write_determinant_text.c @@ -1659,94 +1791,6 @@ trexio_exit_code trexio_text_write_determinant_list(trexio_t* const file, /* Exit upon success */ return TREXIO_SUCCESS; } - -trexio_exit_code trexio_text_write_determinant_coefficient(trexio_t* const file, - const int64_t offset_file, - const uint32_t rank, - const uint64_t* dims, - const double* coeff) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (coeff == NULL) return TREXIO_INVALID_ARG_5; - - char coeff_file_name[256]; - memset(coeff_file_name, 0, sizeof(coeff_file_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(coeff_file_name, "/determinant_coefficient_state_%" PRId32 ".txt", trexio_state); - } else { - strncpy(coeff_file_name, "/determinant_coefficient.txt", 32); - } - - /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ - char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; - /* Copy directory name in file_full_path */ - strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); - /* Append name of the file with sparse data */ - strncat (file_full_path, coeff_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(coeff_file_name)); - - /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */ - FILE* f = fopen(file_full_path, "a"); - if (f == NULL) return TREXIO_FILE_ERROR; - - /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */ - int rc; - for (uint64_t i=0UL; i < dims[0]; ++i) { - - rc = fprintf(f, "%24.16e\n", *(coeff + i)); - if (rc <= 0) { - fclose(f); - return TREXIO_FAILURE; - } - - } - - /* Close the TXT file */ - rc = fclose(f); - if (rc != 0) return TREXIO_FILE_ERROR; - - /* Append .size to the file_full_path in order to write additional info about the written buffer of data */ - strncat(file_full_path, ".size", 6); - - /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */ - FILE *f_wSize = fopen(file_full_path, "a"); - if (f_wSize == NULL) return TREXIO_FILE_ERROR; - - /* Write the buffer_size */ - rc = fprintf(f_wSize, "%" PRIu64 "\n", dims[0]); - if (rc <= 0) { - fclose(f_wSize); - return TREXIO_FAILURE; - } - - /* Close the TXT file */ - rc = fclose(f_wSize); - if (rc != 0) return TREXIO_FILE_ERROR; - - /* Additional part for the trexio_text_has_group to work */ - const char det_file_name[256] = "/determinant.txt"; - - memset (file_full_path, 0, TREXIO_MAX_FILENAME_LENGTH); - /* Copy directory name in file_full_path */ - strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); - /* Append name of the file with sparse data */ - strncat (file_full_path, det_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(det_file_name)); - - bool file_exists = trexio_text_file_exists(file_full_path); - - /* Create an empty file for the trexio_text_has_group to work */ - if (!file_exists) { - FILE *fp = fopen(file_full_path, "ab+"); - fclose(fp); - } - - /* Exit upon success */ - return TREXIO_SUCCESS; -} - #+end_src #+begin_src c :tangle has_determinant_text.c @@ -1771,36 +1815,6 @@ trexio_exit_code trexio_text_has_determinant_list(trexio_t* const file) return TREXIO_HAS_NOT; } } - -trexio_exit_code trexio_text_has_determinant_coefficient(trexio_t* const file) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - - char coeff_file_name[256]; - memset(coeff_file_name, 0, sizeof(coeff_file_name)); - const int32_t trexio_state = file->state; - - if (trexio_state != 0) { - sprintf(coeff_file_name, "/determinant_coefficient_state_%" PRId32 ".txt", trexio_state); - } else { - strncpy(coeff_file_name, "/determinant_coefficient.txt", 32); - } - - /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ - char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; - /* Copy directory name in file_full_path */ - strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); - /* Append name of the file with sparse data */ - strncat (file_full_path, coeff_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(coeff_file_name)); - - /* Check the return code of access function to determine whether the file with data exists or not */ - if (access(file_full_path, F_OK) == 0){ - return TREXIO_SUCCESS; - } else { - return TREXIO_HAS_NOT; - } -} #+end_src * Constant file suffixes (not used by the generator) :noexport: diff --git a/tools/generator.py b/tools/generator.py index 22db593..26606fc 100644 --- a/tools/generator.py +++ b/tools/generator.py @@ -12,12 +12,12 @@ detailed_nums = get_detailed_num_dict(trex_config) detailed_strs = get_detailed_str_dict(trex_config) # helper dictionaries that contain names of groups, nums or dsets as keys dsets = get_dset_dict(trex_config) -detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets) +detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse, detailed_dsets_buf = split_dset_dict_detailed(dsets) detailed_dsets = detailed_dsets_nostr.copy() detailed_dsets.update(detailed_dsets_str) # build a big dictionary with all pre-processed data detailed_all = { - 'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse), + 'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse, **detailed_dsets_buf), 'groups' : group_dict, 'numbers' : detailed_nums, 'strings' : detailed_strs @@ -62,6 +62,10 @@ for fname in files_todo['dset_str']: for fname in files_todo['dset_sparse']: recursive_populate_file(fname, template_paths, detailed_dsets_sparse) +# populate has/read/write_buffered functions with recursive scheme +for fname in files_todo['buffered']: + recursive_populate_file(fname, template_paths, detailed_dsets_buf) + # populate group-related functions with mixed scheme for fname in files_todo['group']: # recursive scheme for delete_group functions diff --git a/tools/generator_tools.py b/tools/generator_tools.py index ab03b9b..7b61f59 100644 --- a/tools/generator_tools.py +++ b/tools/generator_tools.py @@ -41,7 +41,7 @@ def get_files_todo(source_files: dict) -> dict: f for f in all_files if 'read' in f or 'write' in f or 'has' in f or 'flush' in f or 'free' in f or 'hrw' in f or 'delete' in f ] - for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group']: + for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group', 'buffered']: files_todo[key] = list(filter(lambda x: key in x, files_todo['all'])) files_todo['group'].append('struct_text_group_dset.h') @@ -111,7 +111,7 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N 'group_dset_rank', 'group_dset_dim_list', 'group_dset_f_dims', 'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single', 'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single', - 'group_num_h5_dtype', 'group_num_py_dtype', + 'group_num_h5_dtype', 'group_num_py_dtype', 'with_state', 'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim', 'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf', 'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32', @@ -495,6 +495,21 @@ def get_dtype_dict (dtype: str, target: str, rank = None, int_len_printf = None) f'group_{target}_format_scanf' : 'lf', f'group_{target}_py_dtype' : 'float' }) + elif 'buffer' in dtype: + dtype_dict.update({ + 'default_prec' : '64', + f'group_{target}_dtype' : 'double', + f'group_{target}_h5_dtype' : 'native_double', + f'group_{target}_f_dtype_default' : 'real(c_double)', + f'group_{target}_f_dtype_double' : 'real(c_double)', + f'group_{target}_f_dtype_single' : 'real(c_float)', + f'group_{target}_dtype_default' : 'double', + f'group_{target}_dtype_double' : 'double', + f'group_{target}_dtype_single' : 'float', + f'group_{target}_format_printf' : '24.16e', + f'group_{target}_format_scanf' : 'lf', + f'group_{target}_py_dtype' : 'float' + }) elif dtype in ['int', 'dim', 'dim readonly', 'index']: dtype_dict.update({ 'default_prec' : '32', @@ -657,11 +672,12 @@ def split_dset_dict_detailed (datasets: dict) -> tuple: configuration (dict) : configuration from `trex.json` Returns: - dset_numeric_dict, dset_string_dict (tuple) : dictionaries corresponding to all numeric- and string-based datasets, respectively. + (tuple) : dictionaries corresponding to all types of datasets in trexio. """ dset_numeric_dict = {} - dset_string_dict = {} - dset_sparse_dict = {} + dset_string_dict = {} + dset_sparse_dict = {} + dset_buffer_dict = {} for k,v in datasets.items(): # create a temp dictionary @@ -698,11 +714,25 @@ def split_dset_dict_detailed (datasets: dict) -> tuple: else: tmp_dict['is_index'] = 'false' + # add the list of dimensions + if 'state.num' in v[1] and len(v[1]) > 1: + tmp_dict['with_state'] = 'file->state' + # we have to remove state.num from the list of dimensions + # because it is opaque dimension, namely it is controlled by a global state switch + tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1] if 'state.num' not in dim] + rank -= 1 + elif 'state.num' in v[1] and len(v[1]) == 1: + tmp_dict['with_state'] = '0' + tmp_dict['dims'] = ['state_num'] + rank = 1 + else: + tmp_dict['with_state'] = '0' + tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]] + # add the rank tmp_dict['rank'] = rank tmp_dict['group_dset_rank'] = str(rank) - # add the list of dimensions - tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]] + # build a list of dimensions to be inserted in the dims array initialization, e.g. {ao_num, ao_num} dim_list = tmp_dict['dims'][0] if rank > 1: @@ -755,12 +785,14 @@ def split_dset_dict_detailed (datasets: dict) -> tuple: # split datasets in numeric- and string- based if 'str' in datatype: dset_string_dict[k] = tmp_dict + elif 'buffer' in datatype: + dset_buffer_dict[k] = tmp_dict elif is_sparse: dset_sparse_dict[k] = tmp_dict else: dset_numeric_dict[k] = tmp_dict - return (dset_numeric_dict, dset_string_dict, dset_sparse_dict) + return (dset_numeric_dict, dset_string_dict, dset_sparse_dict, dset_buffer_dict) def check_dim_consistency(num: dict, dset: dict) -> None: diff --git a/trex.org b/trex.org index f8080d2..3850fd1 100644 --- a/trex.org +++ b/trex.org @@ -35,10 +35,16 @@ For sparse data structures such as electron replusion integrals, the data can be too large to fit in memory and the data needs to be fetched using multiple function calls to perform I/O on buffers. For more information on how to read/write sparse data structures, see -the [[./examples.html][examples]]. +the [[./examples.html][examples]]. The ~sparse~ data representation implies the +[[https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_(COO)][coordinate list]] representation, namely the user has to write a list +of indices and values. -For determinants, the ~special~ attribute is present in the type. This -means that the source code is not produced by the generator, but hand-written. +For the Configuration Interfaction (CI) and Configuration State Function (CSF) +groups, the ~buffer~ data type is introduced, which allows similar incremental +I/O as for ~sparse~ data but without the need to write indices of the sparse values. + +For determinant lists (integer bit fields), the ~special~ attribute is present in the type. +This means that the source code is not produced by the generator, but hand-written. #+begin_src python :tangle trex.json :exports none { @@ -696,11 +702,11 @@ prim_factor = An illustration on how to read determinants is presented in the [[./examples.html][examples]]. #+NAME: determinant - | Variable | Type | Dimensions | Description | - |---------------+-----------------+-------------------------------+--------------------------------------------------------| - | ~num~ | ~dim readonly~ | | Number of determinants | - | ~list~ | ~int special~ | ~(determinant.num)~ | List of determinants as integer bit fields | - | ~coefficient~ | ~float special~ | ~(state.num,determinant.num)~ | Coefficients of the determinants from the CI expansion | + | Variable | Type | Dimensions | Description | + |---------------+----------------+-------------------------------+--------------------------------------------------------| + | ~num~ | ~dim readonly~ | | Number of determinants | + | ~list~ | ~int special~ | ~(determinant.num)~ | List of determinants as integer bit fields | + | ~coefficient~ | ~float buffer~ | ~(state.num,determinant.num)~ | Coefficients of the determinants from the CI expansion | #+CALL: json(data=determinant, title="determinant") @@ -708,9 +714,9 @@ prim_factor = :results: #+begin_src python :tangle trex.json "determinant": { - "num" : [ "dim readonly" , [] ] - , "list" : [ "int special" , [ "determinant.num" ] ] - , "coefficient" : [ "float special", [ "determinant.num", "state.num" ] ] + "num" : [ "dim readonly", [] ] + , "list" : [ "int special" , [ "determinant.num" ] ] + , "coefficient" : [ "float buffer", [ "determinant.num", "state.num" ] ] } , #+end_src :end: @@ -732,11 +738,11 @@ prim_factor = the basis of Slater determinants. #+NAME: csf - | Variable | Type | Dimensions | Description | - |-------------------+-----------------+-----------------------------+------------------------------------------------| - | ~num~ | ~dim readonly~ | | Number of CSFs | - | ~coefficient~ | ~float special~ | ~(state.num,csf.num)~ | Coefficients of the CSFs from the CI expansion | - | ~det_coefficient~ | ~float sparse~ | ~(determinant.num,csf.num)~ | Projection on the determinant basis | + | Variable | Type | Dimensions | Description | + |-------------------+----------------+-----------------------------+------------------------------------------------| + | ~num~ | ~dim readonly~ | | Number of CSFs | + | ~coefficient~ | ~float buffer~ | ~(state.num,csf.num)~ | Coefficients of the CSFs from the CI expansion | + | ~det_coefficient~ | ~float sparse~ | ~(determinant.num,csf.num)~ | Projection on the determinant basis | #+CALL: json(data=csf, title="csf") @@ -744,9 +750,9 @@ prim_factor = :results: #+begin_src python :tangle trex.json "csf": { - "num" : [ "dim readonly" , [] ] - , "coefficient" : [ "float special", [ "csf.num", "state.num" ] ] - , "det_coefficient" : [ "float sparse" , [ "csf.num", "determinant.num" ] ] + "num" : [ "dim readonly", [] ] + , "coefficient" : [ "float buffer", [ "csf.num", "state.num" ] ] + , "det_coefficient" : [ "float sparse", [ "csf.num", "determinant.num" ] ] } , #+end_src :end: