diff --git a/.gitignore b/.gitignore index 8b733a2..2a962ab 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 autom4te.cache/ +build-config/ +ar-lib compile config.guess config.log @@ -38,5 +40,3 @@ test-suite.log *.h5 trexio-*.tar.gz trex.json - - diff --git a/Makefile.am b/Makefile.am index 5d4f475..baac474 100644 --- a/Makefile.am +++ b/Makefile.am @@ -90,6 +90,7 @@ TESTS_C = \ tests/io_num_text \ tests/io_dset_float_text \ tests/io_dset_int_text \ + tests/io_dset_sparse_text \ tests/io_safe_dset_float_text \ tests/io_str_text \ tests/io_dset_str_text \ @@ -102,6 +103,7 @@ TESTS_C += \ tests/io_num_hdf5 \ tests/io_dset_float_hdf5 \ tests/io_dset_int_hdf5 \ + tests/io_dset_sparse_hdf5 \ tests/io_safe_dset_float_hdf5 \ tests/io_str_hdf5 \ tests/io_dset_str_hdf5 \ @@ -117,8 +119,8 @@ check_PROGRAMS = $(TESTS) # specify common LDADD options for all tests LDADD = src/libtrexio.la - test_trexio_f = $(srcdir)/tests/trexio_f.f90 +CLEANFILES += $(test_trexio_f) $(test_trexio_f): $(trexio_f) cp $(trexio_f) $(test_trexio_f) @@ -126,7 +128,6 @@ $(test_trexio_f): $(trexio_f) trexio.mod: tests/trexio_f.o tests_test_f_SOURCES = $(test_trexio_f) tests/test_f.f90 -tests_test_f_LDFLAGS = -no-install clean-local: -rm -rf -- *.dir/ *.h5 __pycache__/ @@ -134,7 +135,7 @@ clean-local: # =============== DOCUMENTATION =============== # HTML_TANGLED = docs/index.html \ - docs/Sparse.html \ + docs/examples.html \ docs/templator_hdf5.html \ docs/trex.html \ docs/README.html \ @@ -179,11 +180,13 @@ BUILT_SOURCES += $(SOURCES) $(trexio_f) $(test_trexio_f) all: .git_hash +GENERATOR_FILES = $(srcdir)/tools/generator.py \ + $(srcdir)/tools/generator_tools.py $(SOURCES): $(trexio_f) src/trexio.c: $(trexio_h) -$(trexio_f): $(ORG_FILES) +$(trexio_f): $(ORG_FILES) $(GENERATOR_FILES) cd $(srcdir)/tools && ./build_trexio.sh $(htmlizer): $(ORG_FILES) $(srcdir)/src/README.org @@ -227,7 +230,7 @@ $(pytrexio_py): $(pytrexio_c) # Build Python module and C wrapper code for TREXIO using SWIG # [?] swig -python -threads pytrexio.i ----> Add thread support for all the interface -$(pytrexio_c): $(ORG_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i) +$(pytrexio_c): $(ORG_FILES) $(GENERATOR_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i) cp $(trexio_h) src/ cd src/ && \ $(SWIG) -python -py3 -o pytrexio_wrap.c pytrexio.i @@ -248,4 +251,3 @@ CLEANFILES += $(pytrexio_c) \ .PHONY: cppcheck python-test python-install python-sdist check-numpy FORCE endif - diff --git a/Sparse.org b/Sparse.org deleted file mode 100644 index 6e4af31..0000000 --- a/Sparse.org +++ /dev/null @@ -1,22 +0,0 @@ -See templator_front.org - -* Text back end - As the size of the dataset should be extensible, the simplest - solution is to use one file for each sparse data set, and store a - the name of this file in the group. - Each integral can be a line in the file: - i j k l x - which can be read with "%10ld %10ld %10ld %10ld %24.16e". - The offset can be used with ~fseek(69L*offset, SEEK_SET)~ - -* HDF5 Back end - - We need to declare the number of rows of the dataset as - ~UNLIMITED~. This requires to use the ~Chunked~ storage, and the - chunks should absolutely not be larger than 1MB. - - To extend the storage, see : - https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html - (figure 17) - - If the offset+num > nmax, we need to extend the dataset. diff --git a/examples.org b/examples.org new file mode 100644 index 0000000..69ead9f --- /dev/null +++ b/examples.org @@ -0,0 +1,262 @@ +#+TITLE: Examples +#+STARTUP: latexpreview +#+SETUPFILE: docs/theme.setup + + +* Accessing sparse quantities +** Fortran + :PROPERTIES: + :header-args: :tangle print_energy.f90 + :END: + + #+begin_src f90 +program print_energy + use trexio + implicit none + + character*(128) :: filename ! Name of the input file + integer :: rc ! Return code for error checking + integer(8) :: f ! TREXIO file handle + character*(128) :: err_msg ! Error message + #+end_src + + This program computes the energy as: + + \[ + E = E_{\text{NN}} + \sum_{ij} D_{ij}\, \langle i | h | j \rangle\, + +\, \frac{1}{2} \sum_{ijkl} \Gamma_{ijkl}\, \langle i j | k l + \rangle\; \textrm{ with } \; 0 < i,j,k,l \le n + \] + + One needs to read from the TREXIO file: + + - $n$ :: The number of molecular orbitals + - $E_{\text{NN}}$ :: The nuclear repulsion energy + - $D_{ij}$ :: The one-body reduced density matrix + - $\langle i |h| j \rangle$ :: The one-electron Hamiltonian integrals + - $\Gamma_{ijkl}$ :: The two-body reduced density matrix + - $\langle i j | k l \rangle$ :: The electron repulsion integrals + + #+begin_src f90 + integer :: n + double precision :: E, E_nn + double precision, allocatable :: D(:,:), h0(:,:) + double precision, allocatable :: G(:,:,:,:), W(:,:,:,:) + #+end_src + +*** Declare Temporary variables + + #+begin_src f90 + integer :: i, j, k, l, m + integer(8), parameter :: BUFSIZE = 100000_8 + integer(8) :: offset, icount, size_max + integer :: buffer_index(4,BUFSIZE) + double precision :: buffer_values(BUFSIZE) + + double precision, external :: ddot ! BLAS dot product + #+end_src + +*** Obtain the name of the TREXIO file from the command line, and open it for reading + + #+begin_src f90 + call getarg(1, filename) + + f = trexio_open (filename, 'r', TREXIO_HDF5, rc) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error opening TREXIO file: '//trim(err_msg) + stop + end if + #+end_src + +*** Read the nuclear repulsion energy + + #+begin_src f90 + rc = trexio_read_nucleus_repulsion(f, E_nn) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error reading nuclear repulsion: '//trim(err_msg) + stop + end if + #+end_src + +*** Read the number of molecular orbitals + + #+begin_src f90 + rc = trexio_read_mo_num(f, n) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error reading number of MOs: '//trim(err_msg) + stop + end if + #+end_src + +*** Allocate memory + + #+begin_src f90 + allocate( D(n,n), h0(n,n) ) + allocate( G(n,n,n,n), W(n,n,n,n) ) + G(:,:,:,:) = 0.d0 + W(:,:,:,:) = 0.d0 + #+end_src + +*** Read one-electron quantities + + #+begin_src f90 + rc = trexio_has_mo_1e_int_core_hamiltonian(f) + if (rc /= TREXIO_SUCCESS) then + stop 'No core hamiltonian in file' + end if + + rc = trexio_read_mo_1e_int_core_hamiltonian(f, h0) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error reading core Hamiltonian: '//trim(err_msg) + stop + end if + + + rc = trexio_has_rdm_1e(f) + if (rc /= TREXIO_SUCCESS) then + stop 'No 1e RDM in file' + end if + + rc = trexio_read_rdm_1e(f, D) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error reading one-body RDM: '//trim(err_msg) + stop + end if + #+end_src + +*** Read two-electron quantities + + Reading is done with OpenMP. Each thread reads its own buffer, and + the buffers are then processed in parallel. + + Reading the file requires a lock, so it is done in a critical + section. The ~offset~ variable is shared, and it is incremented in + the critical section. For each read, the function returns in + ~icount~ the number of read integrals, so this variable needs also + to be protected in the critical section when modified. + +**** Electron repulsion integrals + + #+begin_src f90 + rc = trexio_has_mo_2e_int_eri(f) + if (rc /= TREXIO_SUCCESS) then + stop 'No electron repulsion integrals in file' + end if + + rc = trexio_read_mo_2e_int_eri_size (f, size_max) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error reading number of ERIs: '//trim(err_msg) + stop + end if + + offset = 0_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, & + !$OMP buffer_index, buffer_values, m) + icount = BUFSIZE + do while (icount == BUFSIZE) + !$OMP CRITICAL + if (offset < size_max) then + rc = trexio_read_mo_2e_int_eri(f, offset, icount, buffer_index, buffer_values) + offset = offset + icount + else + icount = 0 + end if + !$OMP END CRITICAL + do m=1,icount + i = buffer_index(1,m) + j = buffer_index(2,m) + k = buffer_index(3,m) + l = buffer_index(4,m) + W(i,j,k,l) = buffer_values(m) + W(k,j,i,l) = buffer_values(m) + W(i,l,k,j) = buffer_values(m) + W(k,l,i,j) = buffer_values(m) + W(j,i,l,k) = buffer_values(m) + W(j,k,l,i) = buffer_values(m) + W(l,i,j,k) = buffer_values(m) + W(l,k,j,i) = buffer_values(m) + end do + end do + !$OMP END PARALLEL + #+end_src + +**** Reduced density matrix + + #+begin_src f90 + rc = trexio_has_rdm_2e(f) + if (rc /= TREXIO_SUCCESS) then + stop 'No two-body density matrix in file' + end if + + rc = trexio_read_rdm_2e_size (f, size_max) + if (rc /= TREXIO_SUCCESS) then + call trexio_string_of_error(rc, err_msg) + print *, 'Error reading number of 2-RDM elements: '//trim(err_msg) + stop + end if + + offset = 0_8 + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, & + !$OMP buffer_index, buffer_values, m) + icount = bufsize + do while (offset < size_max) + !$OMP CRITICAL + if (offset < size_max) then + rc = trexio_read_rdm_2e(f, offset, icount, buffer_index, buffer_values) + offset = offset + icount + else + icount = 0 + end if + !$OMP END CRITICAL + do m=1,icount + i = buffer_index(1,m) + j = buffer_index(2,m) + k = buffer_index(3,m) + l = buffer_index(4,m) + G(i,j,k,l) = buffer_values(m) + end do + end do + !$OMP END PARALLEL + + #+end_src + +*** Compute the energy + + As $(n,m)$ 2D arrays are stored in memory as $(\n times m)$ 1D + arrays, we could pass the matrices to the ~ddot~ BLAS function to + perform the summations in a single call for the 1-electron quantities. + Instead, we prefer to interleave the 1-electron (negative) and + 2-electron (positive) summations to have a better cancellation of + numerical errors. + + Here $n^4$ can be larger than the largest possible 32-bit integer, + so it is not safe to pass $n^4$ to the ~ddot~ BLAS + function. Hence, we perform $n^2$ loops, using vectors of size $n^2$. + + #+begin_src f90 + + E = 0.d0 + do l=1,n + E = E + ddot( n, D(1,l), 1, h0(1,l), 1 ) + do k=1,n + E = E + 0.5d0 * ddot( n*n, G(1,1,k,l), 1, W(1,1,k,l), 1 ) + end do + end do + E = E + E_nn + + print *, 'Energy: ', E + #+end_src + +*** Terminate + + #+begin_src f90 + deallocate( D, h0, G, W ) + +end program + #+end_src diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 3a81741..874c36a 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -17,7 +17,7 @@ */ #+end_src - + ** C #+begin_src c :tangle prefix_front.h :noweb yes @@ -48,7 +48,7 @@ typedef int32_t trexio_exit_code; #include "trexio_private.h" #include "trexio_s.h" #include "trexio_text.h" -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 #include "trexio_hdf5.h" #endif /* @@ -91,7 +91,7 @@ module trexio ** Python #+begin_src python :tangle prefix_python.py -"""The Python API of the TREXIO library. +"""The Python API of the TREXIO library. This package is a top-level wrapper of the SWIG-generated pytrexio module. """ @@ -150,7 +150,7 @@ __trexio_path__ = None #+end_src * Front end - + All calls to TREXIO are thread-safe. TREXIO front end is modular, which simplifies implementation of new back ends. @@ -188,10 +188,16 @@ __trexio_path__ = None | ~TREXIO_DSET_MISSING~ | 25 | 'Dataset does not exist in the file' | | ~TREXIO_BACK_END_MISSING~ | 26 | 'Requested back end is disabled' | | ~TREXIO_INVALID_STR_LEN~ | 30 | 'Invalid max_str_len' | + | ~TREXIO_INT_SIZE_OVERFLOW~ | 31 | 'Possible integer overflow' | # We need to force Emacs not to indent the Python code: # -*- org-src-preserve-indentation: t + *IMPORTANT!* + The code below has to be executed within Emacs each time + a new error code is added to the table above. Otherwise, the codes + and the corresponding message are not propagated to the source code. + #+begin_src python :var table=table-exit-codes :results drawer """ This script generates the C and Fortran constants for the error codes from the org-mode table. @@ -257,6 +263,7 @@ return '\n'.join(result) #define TREXIO_DSET_MISSING ((trexio_exit_code) 25) #define TREXIO_BACK_END_MISSING ((trexio_exit_code) 26) #define TREXIO_INVALID_STR_LEN ((trexio_exit_code) 30) + #define TREXIO_INT_SIZE_OVERFLOW ((trexio_exit_code) 31) #+end_src #+begin_src f90 :tangle prefix_fortran.f90 :exports none @@ -289,6 +296,7 @@ return '\n'.join(result) integer(trexio_exit_code), parameter :: TREXIO_DSET_MISSING = 25 integer(trexio_exit_code), parameter :: TREXIO_BACK_END_MISSING = 26 integer(trexio_exit_code), parameter :: TREXIO_INVALID_STR_LEN = 30 + integer(trexio_exit_code), parameter :: TREXIO_INT_SIZE_OVERFLOW = 31 #+end_src #+begin_src python :tangle prefix_python.py :exports none @@ -322,9 +330,10 @@ return '\n'.join(result) TREXIO_DSET_MISSING = 25 TREXIO_BACK_END_MISSING = 26 TREXIO_INVALID_STR_LEN = 30 + TREXIO_INT_SIZE_OVERFLOW = 31 #+end_src :END: - + *** Decoding errors The ~trexio_string_of_error~ converts an exit code into a string. The @@ -342,7 +351,10 @@ const char* trexio_string_of_error(const trexio_exit_code error); void trexio_string_of_error_f(const trexio_exit_code error, char result[<>]); #+end_src - The text strings are extracted from the previous table. + *IMPORTANT!* + The code below has to be executed within Emacs each time + a new error code is added to the table above. Otherwise, the codes + and the corresponding message are not propagated to the source code. #+NAME:cases #+begin_src python :var table=table-exit-codes :exports none :noweb yes @@ -442,9 +454,15 @@ return '\n'.join(result) case TREXIO_DSET_MISSING: return "Dataset does not exist in the file"; break; + case TREXIO_BACK_END_MISSING: + return "Requested back end is disabled"; + break; case TREXIO_INVALID_STR_LEN: return "Invalid max_str_len"; break; + case TREXIO_INT_SIZE_OVERFLOW: + return "Possible integer overflow"; + break; #+end_example **** C source code @@ -508,7 +526,7 @@ def string_of_error(return_code: int) -> str: try: error_str = pytr.trexio_string_of_error(return_code) except: - raise + raise return error_str #+end_src @@ -526,7 +544,7 @@ def string_of_error(return_code: int) -> str: Then the corresponding back-end ~has/read/write~ functions has to be implemented. For example, see the commented lines that correspond to the ~TREXIO_JSON~ back end (not implemented yet). - _Note_: It is important to increment the value of TREXIO_INVALID_BACK_END when a new back end is added. Otherwise, it will not be available. + _Note_: It is important to increment the value of TREXIO_INVALID_BACK_END when a new back end is added. Otherwise, it will not be available. *** C @@ -561,13 +579,13 @@ bool trexio_has_backend(back_end_t back_end) { #endif } return false; -} +} #+end_src - + *** Fortran #+begin_src f90 :tangle prefix_fortran.f90 - integer(trexio_backend), parameter :: TREXIO_HDF5 = 0 + integer(trexio_backend), parameter :: TREXIO_HDF5 = 0 integer(trexio_backend), parameter :: TREXIO_TEXT = 1 ! integer(trexio_backend), parameter :: TREXIO_JSON = 2 integer(trexio_backend), parameter :: TREXIO_INVALID_BACK_END = 2 @@ -588,12 +606,12 @@ end interface #+begin_src python :tangle prefix_python.py # define TREXIO back ends -TREXIO_HDF5 = 0 +TREXIO_HDF5 = 0 TREXIO_TEXT = 1 #TREXIO_JSON = 2 TREXIO_INVALID_BACK_END = 2 #+end_src - + ** Read/write behavior Every time a reading function is called, the data is read from the @@ -662,7 +680,7 @@ class File: pytrexio_s: A PyObject corresponding to SWIG proxy of the trexio_s struct in C. This argument is in fact a TREXIO file handle, which is required for - communicating with the C back end. + communicating with the C back end. info: dict Dictionary of key-value pairs with additional information about the file. """ @@ -675,8 +693,8 @@ class File: self.filename = filename self.mode = mode self.back_end = back_end - - self.isOpen = False + + self.isOpen = False if pytrexio_s is None: self.pytrexio_s = open(filename, mode, back_end) self.isOpen = True @@ -705,7 +723,7 @@ class File: elif self.isOpen is None: raise Exception("[WIP]: TREXIO file handle provided but what if the file is already closed?") else: - pass + pass #+end_src ** Polymorphism of the file handle @@ -749,7 +767,7 @@ struct trexio_back_end_s { *** C #+begin_src c :tangle prefix_front.h :exports none -trexio_t* trexio_open(const char* file_name, const char mode, +trexio_t* trexio_open(const char* file_name, const char mode, const back_end_t back_end, trexio_exit_code* const rc_open); #+end_src @@ -786,7 +804,7 @@ trexio_open(const char* file_name, const char mode, break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 result_tmp = malloc(sizeof(trexio_hdf5_t)); break; #else @@ -842,7 +860,7 @@ trexio_open(const char* file_name, const char mode, break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_init(result); break; #else @@ -862,42 +880,6 @@ trexio_open(const char* file_name, const char mode, return NULL; } - rc = trexio_has_metadata_package_version(result); - if (rc == TREXIO_FAILURE) { - if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR; - free(result); - return NULL; - } - - if (rc == TREXIO_HAS_NOT) { - switch (back_end) { - - case TREXIO_TEXT: - rc = trexio_text_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION); - break; - - case TREXIO_HDF5: -#ifdef HAVE_HDF5 - rc = trexio_hdf5_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION); - break; -#else - if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING; - return NULL; -#endif -/* - case TREXIO_JSON: - rc = trexio_json_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION); - break; -,*/ - } - } - - if (rc != TREXIO_SUCCESS) { - if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR; - free(result); - return NULL; - } - /* File locking */ @@ -910,7 +892,7 @@ trexio_open(const char* file_name, const char mode, break; /* HDF5 v.>=1.10 has file locking activated by default */ case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = TREXIO_SUCCESS; break; #else @@ -930,7 +912,42 @@ trexio_open(const char* file_name, const char mode, return NULL; } + /* Write metadata (i.e. package version) upon creation */ + rc = trexio_has_metadata_package_version(result); + if (rc == TREXIO_FAILURE) { + if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR; + free(result); + return NULL; + } + + if (rc == TREXIO_HAS_NOT) { + switch (back_end) { + + case TREXIO_TEXT: + rc = trexio_text_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + rc = trexio_hdf5_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION); + break; +#else + if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING; + return NULL; +#endif + + } + } + + if (rc != TREXIO_SUCCESS) { + if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR; + free(result); + return NULL; + } + + /* Exit upon success */ if (rc_open != NULL) *rc_open = TREXIO_SUCCESS; + return result; } #+end_src @@ -965,9 +982,9 @@ def open(file_name: str, mode: str, back_end: int): One of the currently supported ~open~ modes (e.g. 'w', 'r') back_end: int - One of the currently supported TREXIO back ends (e.g. TREXIO_HDF5, TREXIO_TEXT) + One of the currently supported TREXIO back ends (e.g. TREXIO_HDF5, TREXIO_TEXT) - Return: + Return: SWIG object of type trexio_s. Examples: @@ -1059,7 +1076,7 @@ trexio_close (trexio_t* file) break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_deinit(file); break; #else @@ -1088,7 +1105,7 @@ trexio_close (trexio_t* file) break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = TREXIO_SUCCESS; break; #else @@ -1182,8 +1199,8 @@ def close(trexio_file): | ~$group_dset_dim_list$~ | All dimensions of the dataset | ~{nucleus_num, 3}~ | | ~$group_dset_dtype$~ | Basic type of the dataset (int/float/char) | ~float~ | | ~$group_dset_h5_dtype$~ | Type of the dataset in HDF5 | ~double~ | - | ~$group_dset_std_dtype_in$~ | Input type of the dataset in TEXT [fscanf] | ~%lf~ | - | ~$group_dset_std_dtype_out$~ | Output type of the dataset in TEXT [fprintf] | ~%24.16e~ | + | ~$group_dset_format_scanf$~ | Input type of the dataset in TEXT [fscanf] | ~%lf~ | + | ~$group_dset_format_printf$~ | Output type of the dataset in TEXT [fprintf] | ~%24.16e~ | | ~$group_dset_dtype_default$~ | Default datatype of the dataset [C] | ~double/int32_t~ | | ~$group_dset_dtype_single$~ | Single precision datatype of the dataset [C] | ~float/int32_t~ | | ~$group_dset_dtype_double$~ | Double precision datatype of the dataset [C] | ~double/int64_t~ | @@ -1194,9 +1211,9 @@ def close(trexio_file): | ~$group_dset_py_dtype$~ | Standard datatype of the dataset [Python] | ~float/int~ | | ~$default_prec$~ | Default precision for read/write without suffix [C] | ~64/32~ | | ~$is_index$~ | Expands to ~true~ if dataset has a type ~index~ [C] | ~true/false~ | - - Some of the aforementioned template variables with ~group_dset~ prefix are duplicated with ~group_num~ prefix, + + Some of the aforementioned template variables with ~group_dset~ prefix are duplicated with ~group_num~ prefix, e.g. you might find $group_num_dtype_double$ in the templates corresponding to numerical attributes. The expanding values are the same as for ~group_dset~ and thus are not listed in the table above. @@ -1276,7 +1293,7 @@ trexio_read_$group_num$_64 (trexio_t* const file, $group_num_dtype_double$* cons break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_read_$group_num$(file, num); break; #else @@ -1305,17 +1322,17 @@ trexio_write_$group_num$_64 (trexio_t* const file, const $group_num_dtype_double case TREXIO_TEXT: return trexio_text_write_$group_num$(file, num); - break; - + break; + case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_write_$group_num$(file, num); break; #else return TREXIO_BACK_END_MISSING; #endif -/* - case TREXIO_JSON: +/* + case TREXIO_JSON: return trexio_json_write_$group_num$(file, num); break; ,*/ @@ -1344,7 +1361,7 @@ trexio_read_$group_num$_32 (trexio_t* const file, $group_num_dtype_single$* cons break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_read_$group_num$(file, &num_64); break; #else @@ -1377,17 +1394,17 @@ trexio_write_$group_num$_32 (trexio_t* const file, const $group_num_dtype_single case TREXIO_TEXT: return trexio_text_write_$group_num$(file, ($group_num_dtype_double$) num); - break; - + break; + case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_write_$group_num$(file, ($group_num_dtype_double$) num); break; #else return TREXIO_BACK_END_MISSING; -#endif -/* - case TREXIO_JSON: +#endif +/* + case TREXIO_JSON: return trexio_json_write_$group_num$(file, ($group_num_dtype_double$) num); break; ,*/ @@ -1431,7 +1448,7 @@ trexio_has_$group_num$ (trexio_t* const file) break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_has_$group_num$(file); break; #else @@ -1525,7 +1542,7 @@ end interface *** Python templates for front end #+begin_src python :tangle write_attr_num_front.py -def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None: +def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None: """Write the $group_num$ variable in the TREXIO file. Parameters: @@ -1535,7 +1552,7 @@ def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None: num_w: int Value of the $group_num$ variable to be written. - + Raises: - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. - Exception from some other error (e.g. RuntimeError). @@ -1546,11 +1563,11 @@ def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None: if rc != TREXIO_SUCCESS: raise Error(rc) except: - raise + raise #+end_src #+begin_src python :tangle read_attr_num_front.py -def read_$group_num$(trexio_file) -> $group_num_py_dtype$: +def read_$group_num$(trexio_file) -> $group_num_py_dtype$: """Read the $group_num$ variable from the TREXIO file. Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. @@ -1569,13 +1586,13 @@ def read_$group_num$(trexio_file) -> $group_num_py_dtype$: if rc != TREXIO_SUCCESS: raise Error(rc) except: - raise + raise return num_r #+end_src #+begin_src python :tangle has_attr_num_front.py -def has_$group_num$(trexio_file) -> bool: +def has_$group_num$(trexio_file) -> bool: """Check that $group_num$ variable exists in the TREXIO file. Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. @@ -1593,7 +1610,7 @@ def has_$group_num$(trexio_file) -> bool: if rc == TREXIO_FAILURE: raise Error(rc) except: - raise + raise if rc == TREXIO_SUCCESS: return True @@ -1628,7 +1645,6 @@ def has_$group_num$(trexio_file) -> bool: **** Function declarations - #+begin_src c :tangle hrw_dset_data_front.h :exports none trexio_exit_code trexio_has_$group_dset$(trexio_t* const file); trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, $group_dset_dtype_default$* const $group_dset$); @@ -1647,7 +1663,6 @@ trexio_exit_code trexio_write_safe_$group_dset$_64(trexio_t* const file, const $ **** Source code for double precision functions - #+begin_src c :tangle read_dset_data_64_front.c trexio_exit_code trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* const $group_dset$) @@ -1672,13 +1687,13 @@ trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* co rc = TREXIO_FAILURE; switch (file->back_end) { - + case TREXIO_TEXT: rc = trexio_text_read_$group_dset$(file, $group_dset$, rank, dims); break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_read_$group_dset$(file, $group_dset$, rank, dims); break; #else @@ -1691,7 +1706,7 @@ trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* co ,*/ } if (rc != TREXIO_SUCCESS) return rc; - + /* Handle index type */ if ($is_index$) { uint64_t dim_size = 1; @@ -1702,7 +1717,7 @@ trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* co $group_dset$[i] += ($group_dset_dtype_double$) 1; } } - + return TREXIO_SUCCESS; } #+end_src @@ -1756,7 +1771,7 @@ trexio_write_$group_dset$_64 (trexio_t* const file, const $group_dset_dtype_doub break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_write_$group_dset$(file, $group_dset$_p, rank, dims); break; #else @@ -1820,7 +1835,7 @@ trexio_read_$group_dset$_32 (trexio_t* const file, $group_dset_dtype_single$* co break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_read_$group_dset$(file, $group_dset$_64, rank, dims); break; #else @@ -1903,7 +1918,7 @@ trexio_write_$group_dset$_32 (trexio_t* const file, const $group_dset_dtype_sing break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_write_$group_dset$(file, $group_dset$_64, rank, dims); break; #else @@ -1957,7 +1972,7 @@ trexio_read_safe_$group_dset$_32 (trexio_t* const file, $group_dset_dtype_single if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; <> - + if (dim_out > (int64_t) dim_size) return TREXIO_UNSAFE_ARRAY_DIM; return trexio_read_$group_dset$_32(file, dset_out); @@ -1992,7 +2007,7 @@ trexio_read_safe_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; <> - + if (dim_out > (int64_t) dim_size) return TREXIO_UNSAFE_ARRAY_DIM; return trexio_read_$group_dset$_64(file, dset_out); @@ -2071,7 +2086,7 @@ trexio_has_$group_dset$ (trexio_t* const file) break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_has_$group_dset$(file); break; #else @@ -2164,7 +2179,7 @@ end interface *** Python templates for front end #+begin_src python :tangle write_dset_data_front.py -def write_$group_dset$(trexio_file, dset_w) -> None: +def write_$group_dset$(trexio_file, dset_w) -> None: """Write the $group_dset$ array of numbers in the TREXIO file. Parameters: @@ -2174,7 +2189,7 @@ def write_$group_dset$(trexio_file, dset_w) -> None: dset_w: list OR numpy.ndarray Array of $group_dset$ values to be written. If array data type does not correspond to int64 or float64, the conversion is performed. - + Raises: - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. - Exception from some other error (e.g. RuntimeError). @@ -2201,7 +2216,7 @@ def write_$group_dset$(trexio_file, dset_w) -> None: else: if doConversion: dset_64 = np.$group_dset_py_dtype$64(dset_w) - + else: # if input array is a multidimensional list or tuple, we have to convert it try: @@ -2219,13 +2234,13 @@ def write_$group_dset$(trexio_file, dset_w) -> None: rc = pytr.trexio_write_safe_$group_dset$_64(trexio_file.pytrexio_s, dset_flat) else: rc = pytr.trexio_write_safe_$group_dset$_64(trexio_file.pytrexio_s, dset_w) - + if rc != TREXIO_SUCCESS: raise Error(rc) #+end_src #+begin_src python :tangle read_dset_data_front.py -def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): +def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): """Read the $group_dset$ array of numbers from the TREXIO file. Parameters: @@ -2239,13 +2254,13 @@ def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): dtype (Optional): type NumPy data type of the output (e.g. np.int32|int16 or np.float32|float16). If specified, the output array will be converted from the default double precision. - + doReshape (Optional): bool Flag to determine whether the output NumPy array has be reshaped or not. Be default, reshaping is performed based on the dimensions from the ~trex.json~ file. Otherwise, ~shape~ array (list or tuple) is used if provided by the user. Returns: - ~dset_64~ if dtype is None or ~dset_converted~ otherwise: numpy.ndarray + ~dset_64~ if dtype is None or ~dset_converted~ otherwise: numpy.ndarray 1D NumPy array with ~dim~ elements corresponding to $group_dset$ values read from the TREXIO file. Raises: @@ -2260,18 +2275,18 @@ def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): if doReshape is None: doReshape = True - + # if dim is not specified, read dimensions from the TREXIO file dims_list = None if dim is None or doReshape: - $group_dset_dim$ = read_$group_dset_dim$(trexio_file) - + $group_dset_dim$ = read_$group_dset_dim$(trexio_file) + dims_list = [$group_dset_dim_list$] dim = 1 for i in range($group_dset_rank$): dim *= dims_list[i] - + shape = tuple(dims_list) if shape is None and doReshape: raise ValueError("Reshaping failure: shape is None.") @@ -2288,7 +2303,7 @@ def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): isConverted = False dset_converted = None if dtype is not None: - + try: assert isinstance(dtype, type) except AssertionError: @@ -2300,7 +2315,7 @@ def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): dset_converted = np.array(dset_64, dtype=dtype) except: raise - + isConverted = True # additional assert can be added here to check that read_safe functions returns numpy array of proper dimension @@ -2308,14 +2323,14 @@ def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): if doReshape: try: # in-place reshaping did not work so I have to make a copy - if isConverted: + if isConverted: dset_reshaped = np.reshape(dset_converted, shape, order='C') else: dset_reshaped = np.reshape(dset_64, shape, order='C') except: raise - if isConverted: + if isConverted: return dset_converted elif doReshape: return dset_reshaped @@ -2324,7 +2339,7 @@ def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): #+end_src #+begin_src python :tangle has_dset_data_front.py -def has_$group_dset$(trexio_file) -> bool: +def has_$group_dset$(trexio_file) -> bool: """Check that $group_dset$ variable exists in the TREXIO file. Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. @@ -2342,7 +2357,7 @@ def has_$group_dset$(trexio_file) -> bool: if rc == TREXIO_FAILURE: raise Error(rc) except: - raise + raise if rc == TREXIO_SUCCESS: return True @@ -2350,7 +2365,8 @@ def has_$group_dset$(trexio_file) -> bool: return False #+end_src -** Sparse data structures +** Templates for front end has/read/write a dataset of sparse data +*** Introduction Sparse data structures are used typically for large tensors such as two-electron integrals. For example, in the =trex.json= file sparse @@ -2358,12 +2374,11 @@ def has_$group_dset$(trexio_file) -> bool: #+begin_src python "ao_2e_int" : { - "eri_num" : [ "int", [ ] ] "eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ] } #+end_src - - The electron repulsion integral $\langle ij | kl \rangle$ is + + The electron repulsion integral (eri) $\langle ij | kl \rangle$ is represented as a quartet of integers $(i,j,k,l)$ and a floating point value. @@ -2376,93 +2391,299 @@ def has_$group_dset$(trexio_file) -> bool: Knowing the maximum dimensions allows to check that the integers are in a valid range, and also lets the library choose the smallest integer representation to compress the storage. - + Fortran uses 1-based array indexing, while C uses 0-based indexing. Internally, we use a 0-based representation but the Fortran binding does the appropriate conversion when reading or writing. - As the number of integrals to store can be prohibitively large, we + As the number of integrals to store can be prohibitively large, we provide the possibility to read/write the integrals in chunks. So the functions take two extra parameters: - - ~offset~ : the index of the 1st integral we want to read. An - offset of zero implies to read the first integral. - - ~num~ : the number of integrals to read. + + - ~offset~ : how many integrals in the file should be skipped when reading. + An offset of zero implies to read the first integral. + - ~size~ : the number of integrals to read. We provide a function to read a chunk of indices, and a function to read a chunk of values, because some users might want to read only the values of the integrals, or only the indices. - Here is an example for the indices: +*** C templates for front end +**** Function declarations - #+BEGIN_SRC c + #+begin_src c :tangle hrw_dset_sparse_front.h :exports none +trexio_exit_code trexio_has_$group_dset$(trexio_t* const file); +trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, int32_t* const index_sparse, double* const value_sparse); +trexio_exit_code trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max); +trexio_exit_code trexio_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int32_t* index_sparse, const double* value_sparse); + #+end_src + +**** Source code for default functions + + #+begin_src c :tangle read_dset_sparse_front.c trexio_exit_code -trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file, - const int64_t offset, - const int64_t num, - int32_t* buffer) +trexio_read_$group_dset$(trexio_t* const file, + const int64_t offset_file, + int64_t* const buffer_size, + int32_t* const index_sparse, + double* const value_sparse + ) { if (file == NULL) return TREXIO_INVALID_ARG_1; - if (offset < 0L) return TREXIO_INVALID_ARG_2; - if (num < 0L) return TREXIO_INVALID_ARG_3; + if (offset_file < 0L) return TREXIO_INVALID_ARG_2; + if (*buffer_size <= 0L) return TREXIO_INVALID_ARG_3; + if (index_sparse == NULL) return TREXIO_INVALID_ARG_4; + if (value_sparse == NULL) return TREXIO_INVALID_ARG_5; + if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING; - const uint32_t rank = 4; // To be set by generator : number of indices + const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices - int64_t nmax; // Max number of integrals + int64_t size_max; // Max number of integrals (already in the file) trexio_exit_code rc; - rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + /* Read the max number of integrals stored in the file */ + rc = trexio_read_$group_dset$_size(file, &size_max); if (rc != TREXIO_SUCCESS) return rc; + int64_t num; + rc = trexio_read_$group_dset_sparse_dim$_64(file, &num); + if (rc != TREXIO_SUCCESS) return rc; + + // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered + int64_t eof_read_size = 0UL; + switch (file->back_end) { case TREXIO_TEXT: - return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse); break; case TREXIO_HDF5: - return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); +#ifdef HAVE_HDF5 + rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse); break; - +#else + rc = TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + return trexio_json_read_$group_dset$(...); + break; +,*/ default: - return TREXIO_FAILURE; /* Impossible case */ + rc = TREXIO_FAILURE; /* Impossible case */ } + + if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc; + + if (rc == TREXIO_END) *buffer_size = eof_read_size; + + // shift indices to be one-based if Fortran API is used + if (file->one_based) { + // if EOF is reached - shift only indices that have been read, not an entire buffer + uint64_t index_size = rank*(*buffer_size) ; + for (uint64_t i=0; iback_end) { case TREXIO_TEXT: - return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax); + return trexio_text_read_$group_dset$_size(file, size_max); break; case TREXIO_HDF5: - return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); +#ifdef HAVE_HDF5 + return trexio_hdf5_read_$group_dset$_size(file, size_max); break; - +#else + return TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + return trexio_json_read_$group_dset$_size(...); + break; +,*/ default: return TREXIO_FAILURE; /* Impossible case */ } } - #+END_SRC + #+end_src + + + #+begin_src c :tangle write_dset_sparse_front.c +trexio_exit_code +trexio_write_$group_dset$(trexio_t* const file, + const int64_t offset_file, + const int64_t buffer_size, + const int32_t* index_sparse, + const double* value_sparse + ) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (offset_file < 0L) return TREXIO_INVALID_ARG_2; + if (buffer_size <= 0L) return TREXIO_INVALID_ARG_3; + if (index_sparse == NULL) return TREXIO_INVALID_ARG_4; + if (value_sparse == NULL) return TREXIO_INVALID_ARG_5; + + const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices + + int64_t size_max=0L; // Max number of integrals (already in the file) + trexio_exit_code rc; + + /* Read the max number of integrals stored in the file */ + rc = trexio_read_$group_dset$_size(file, &size_max); + if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc; + if (rc == TREXIO_DSET_MISSING) size_max = 0L; + + int64_t num; + rc = trexio_read_$group_dset_sparse_dim$_64(file, &num); + if (rc != TREXIO_SUCCESS) return rc; + + int32_t* index_sparse_p = (int32_t*) index_sparse; + // shift indices to be zero-based if Fortran API is used + if (file->one_based) { + + uint64_t index_size = rank * buffer_size; + index_sparse_p = CALLOC(index_size, int32_t); + if (index_sparse_p == NULL) return TREXIO_ALLOCATION_FAILED; + + for (uint64_t i=0; iback_end) { + + case TREXIO_TEXT: + rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num, size_max, index_sparse_p, value_sparse); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, index_sparse_p, value_sparse); + break; +#else + rc = TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + rc = trexio_json_write_$group_dset$(...); + break; +,*/ + default: + rc = TREXIO_FAILURE; /* Impossible case */ + } + + // free the memory allocated to shift indices to be zero-based + if (file->one_based) FREE(index_sparse_p); + + return rc; +} + #+end_src + + + #+begin_src c :tangle has_dset_sparse_front.c +trexio_exit_code +trexio_has_$group_dset$ (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + assert(file->back_end < TREXIO_INVALID_BACK_END); + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_has_$group_dset$(file); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_has_$group_dset$(file); + break; +#else + return TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + return trexio_json_has_$group_dset$(file); + break; +,*/ + } + + return TREXIO_FAILURE; +} + #+end_src + +*** Fortran templates for front end + + The ~Fortran~ templates that provide an access to the ~C~ API calls from ~Fortran~. + These templates are based on the use of ~iso_c_binding~. Pointers have to be passed by value. + + #+begin_src f90 :tangle write_dset_sparse_front_fortran.f90 +interface + integer function trexio_write_$group_dset$ (trex_file, & + offset_file, buffer_size, & + index_sparse, value_sparse) bind(C) + use, intrinsic :: iso_c_binding + integer(8), intent(in), value :: trex_file + integer(8), intent(in), value :: offset_file + integer(8), intent(in), value :: buffer_size + integer(4), intent(in) :: index_sparse(*) + double precision, intent(in) :: value_sparse(*) + end function trexio_write_$group_dset$ +end interface + #+end_src + + #+begin_src f90 :tangle read_dset_sparse_front_fortran.f90 +interface + integer function trexio_read_$group_dset$ (trex_file, & + offset_file, buffer_size, & + index_sparse, value_sparse) bind(C) + use, intrinsic :: iso_c_binding + integer(8), intent(in), value :: trex_file + integer(8), intent(in), value :: offset_file + integer(8), intent(inout) :: buffer_size + integer(4), intent(out) :: index_sparse(*) + double precision, intent(out) :: value_sparse(*) + end function trexio_read_$group_dset$ +end interface + #+end_src + + #+begin_src f90 :tangle read_dset_sparse_size_front_fortran.f90 +interface + integer function trexio_read_$group_dset$_size (trex_file, & + size_max) bind(C) + use, intrinsic :: iso_c_binding + integer(8), intent(in), value :: trex_file + integer(8), intent(out) :: size_max + end function trexio_read_$group_dset$_size +end interface + #+end_src + + #+begin_src f90 :tangle has_dset_sparse_front_fortran.f90 +interface + integer function trexio_has_$group_dset$ (trex_file) bind(C) + use, intrinsic :: iso_c_binding + integer(8), intent(in), value :: trex_file + end function trexio_has_$group_dset$ +end interface + #+end_src ** Templates for front end has/read/write a dataset of strings *** Introduction @@ -2520,7 +2741,7 @@ trexio_read_$group_dset$_low (trexio_t* const file, char* dset_out, const int32_ break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_read_$group_dset$(file, dset_out, rank, dims, (uint32_t) max_str_len); break; #else @@ -2640,7 +2861,7 @@ trexio_write_$group_dset$_low (trexio_t* const file, const char* dset_in, const tmp_str += pch_len + 1; } - rc = TREXIO_FAILURE; + rc = TREXIO_FAILURE; switch (file->back_end) { case TREXIO_TEXT: @@ -2648,7 +2869,7 @@ trexio_write_$group_dset$_low (trexio_t* const file, const char* dset_in, const break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 rc = trexio_hdf5_write_$group_dset$(file, (const char**) dset_str, rank, dims); break; #else @@ -2663,7 +2884,7 @@ trexio_write_$group_dset$_low (trexio_t* const file, const char* dset_in, const FREE(dset_str[0]); FREE(dset_str); - + return rc; } @@ -2720,7 +2941,7 @@ trexio_has_$group_dset$ (trexio_t* const file) break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_has_$group_dset$(file); break; #else @@ -2783,14 +3004,14 @@ end interface character, allocatable :: str_compiled(:) integer(8) :: $group_dset_dim$ integer :: rc - + rc = trexio_read_$group_dset_dim$_64(trex_file, $group_dset_dim$) if (rc /= TREXIO_SUCCESS) trexio_read_$group_dset$ = rc allocate(str_compiled($group_dset_dim$*(max_str_len+1)+1)) - + rc = trexio_read_$group_dset$_low(trex_file, str_compiled, max_str_len) - if (rc /= TREXIO_SUCCESS) then + if (rc /= TREXIO_SUCCESS) then deallocate(str_compiled) trexio_read_$group_dset$ = rc else @@ -2812,7 +3033,7 @@ end interface character(len=:), allocatable :: str_compiled integer(8) :: $group_dset_dim$ integer :: rc - + rc = trexio_read_$group_dset_dim$_64(trex_file, $group_dset_dim$) if (rc /= TREXIO_SUCCESS) then trexio_write_$group_dset$ = rc @@ -2827,7 +3048,7 @@ end interface *** Python templates for front end #+begin_src python :tangle write_dset_str_front.py -def write_$group_dset$(trexio_file, dset_w: list) -> None: +def write_$group_dset$(trexio_file, dset_w: list) -> None: """Write the $group_dset$ array of strings in the TREXIO file. Parameters: @@ -2837,7 +3058,7 @@ def write_$group_dset$(trexio_file, dset_w: list) -> None: dset_w: list Array of $group_dset$ strings to be written. - + Raises: - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. - Exception from some other error (e.g. RuntimeError). @@ -2851,12 +3072,12 @@ def write_$group_dset$(trexio_file, dset_w: list) -> None: if rc != TREXIO_SUCCESS: raise Error(rc) except: - raise + raise #+end_src #+begin_src python :tangle read_dset_str_front.py -def read_$group_dset$(trexio_file, dim = None) -> list: +def read_$group_dset$(trexio_file, dim = None) -> list: """Read the $group_dset$ array of strings from the TREXIO file. Parameters: @@ -2879,8 +3100,8 @@ def read_$group_dset$(trexio_file, dim = None) -> list: # if dim is not specified, read dimensions from the TREXIO file if dim is None: - $group_dset_dim$ = read_$group_dset_dim$(trexio_file) - + $group_dset_dim$ = read_$group_dset_dim$(trexio_file) + dims_list = [$group_dset_dim_list$] dim = 1 for i in range($group_dset_rank$): @@ -2893,7 +3114,7 @@ def read_$group_dset$(trexio_file, dim = None) -> list: if rc != TREXIO_SUCCESS: raise Error(rc) except: - raise + raise try: @@ -2908,7 +3129,7 @@ def read_$group_dset$(trexio_file, dim = None) -> list: #+end_src #+begin_src python :tangle has_dset_str_front.py -def has_$group_dset$(trexio_file) -> bool: +def has_$group_dset$(trexio_file) -> bool: """Check that $group_dset$ variable exists in the TREXIO file. Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. @@ -2926,7 +3147,7 @@ def has_$group_dset$(trexio_file) -> bool: if rc == TREXIO_FAILURE: raise Error(rc) except: - raise + raise if rc == TREXIO_SUCCESS: return True @@ -2973,7 +3194,7 @@ trexio_read_$group_str$ (trexio_t* const file, char* const str_out, const int32_ break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_read_$group_str$(file, str_out, (uint32_t) max_str_len); break; #else @@ -3001,7 +3222,7 @@ trexio_write_$group_str$ (trexio_t* const file, const char* str, const int32_t m if (trexio_has_$group_str$(file) == TREXIO_SUCCESS) return TREXIO_ATTR_ALREADY_EXISTS; size_t len_write = strlen(str); - if ((size_t) max_str_len < len_write) return TREXIO_INVALID_STR_LEN; + if ((size_t) max_str_len < len_write) return TREXIO_INVALID_STR_LEN; switch (file->back_end) { @@ -3010,7 +3231,7 @@ trexio_write_$group_str$ (trexio_t* const file, const char* str, const int32_t m break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_write_$group_str$(file, str); break; #else @@ -3026,7 +3247,7 @@ trexio_write_$group_str$ (trexio_t* const file, const char* str, const int32_t m return TREXIO_FAILURE; } #+end_src - + #+begin_src c :tangle has_attr_str_front.c trexio_exit_code trexio_has_$group_str$ (trexio_t* const file) @@ -3043,7 +3264,7 @@ trexio_has_$group_str$ (trexio_t* const file) break; case TREXIO_HDF5: -#ifdef HAVE_HDF5 +#ifdef HAVE_HDF5 return trexio_hdf5_has_$group_str$(file); break; #else @@ -3065,7 +3286,7 @@ trexio_has_$group_str$ (trexio_t* const file) The ~Fortran~ templates that provide an access to the ~C~ API calls from Fortran. These templates are based on the use of ~iso_c_binding~. Pointers have to be passed by value. - + #+begin_src f90 :tangle write_attr_str_front_fortran.f90 interface integer function trexio_write_$group_str$_c (trex_file, str, max_str_len) & @@ -3105,7 +3326,7 @@ end interface integer(8), intent(in), value :: trex_file integer(4), intent(in), value :: max_str_len character, intent(out) :: str(*) - + trexio_read_$group_str$ = trexio_read_$group_str$_c(trex_file, str, max_str_len) end function trexio_read_$group_str$ @@ -3131,7 +3352,7 @@ end interface *** Python templates for front end #+begin_src python :tangle write_attr_str_front.py -def write_$group_str$(trexio_file, str_w: str) -> None: +def write_$group_str$(trexio_file, str_w: str) -> None: """Write the $group_str$ variable in the TREXIO file. Parameters: @@ -3141,7 +3362,7 @@ def write_$group_str$(trexio_file, str_w: str) -> None: str_w: str String corresponding to the $group_str$ variable to be written. - + Raises: - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. - Exception from some other error (e.g. RuntimeError). @@ -3155,11 +3376,11 @@ def write_$group_str$(trexio_file, str_w: str) -> None: if rc != TREXIO_SUCCESS: raise Error(rc) except: - raise + raise #+end_src #+begin_src python :tangle read_attr_str_front.py -def read_$group_str$(trexio_file) -> str: +def read_$group_str$(trexio_file) -> str: """Read the $group_str$ variable from the TREXIO file. Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. @@ -3179,13 +3400,13 @@ def read_$group_str$(trexio_file) -> str: if rc != TREXIO_SUCCESS: raise Error(rc) except: - raise + raise return str_r #+end_src - + #+begin_src python :tangle has_attr_str_front.py -def has_$group_str$(trexio_file) -> bool: +def has_$group_str$(trexio_file) -> bool: """Check that $group_str$ variable exists in the TREXIO file. Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function. @@ -3203,7 +3424,7 @@ def has_$group_str$(trexio_file) -> bool: if rc == TREXIO_FAILURE: raise Error(rc) except: - raise + raise if rc == TREXIO_SUCCESS: return True @@ -3243,8 +3464,8 @@ contains end function trexio_open #+end_src - The subroutine below transforms an array of Fortran strings into one big string using ~TREXIO_DELIM~ symbol - as a delimeter and adds ~NULL~ character in the end in order to properly pass the desired string to + The subroutine below transforms an array of Fortran strings into one big string using ~TREXIO_DELIM~ symbol + as a delimeter and adds ~NULL~ character in the end in order to properly pass the desired string to C API. This is needed due to the fact that strings in C are terminated by ~NULL~ character ~\0~. #+begin_src f90 :tangle helper_fortran.f90 @@ -3277,13 +3498,13 @@ contains integer, intent(in), value :: max_len_str ! maximum length of a string in an array character, intent(in) :: str_flat(*) character(len=*), intent(inout) :: str_array(*) - + character(len=max_len_str) :: tmp_str integer :: i, j, k, ind, offset integer(8) :: len_flat len_flat = (max_len_str+1)*max_num_str + 1 - + ind=1 offset=1 do i=1,max_num_str diff --git a/src/templates_hdf5/build.sh b/src/templates_hdf5/build.sh index 137c7a5..5b54c0f 100644 --- a/src/templates_hdf5/build.sh +++ b/src/templates_hdf5/build.sh @@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c cat populated/pop_write_*.c >> trexio_hdf5.c cat populated/pop_hrw_*.h >> trexio_hdf5.h +cat helpers_hdf5.c >> trexio_hdf5.c cat suffix_hdf5.h >> trexio_hdf5.h - diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index 8ff47d1..c031e3e 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -201,15 +201,15 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub /* Write the dimensioning variables */ const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); const hid_t dspace = H5Screate(H5S_SCALAR); - - const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, + + const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, dtype, dspace, H5P_DEFAULT, H5P_DEFAULT); if (num_id <= 0) { H5Sclose(dspace); H5Tclose(dtype); return TREXIO_INVALID_ID; } - + const herr_t status = H5Awrite(num_id, dtype, &(num)); if (status < 0) { H5Aclose(num_id); @@ -217,7 +217,7 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub H5Tclose(dtype); return TREXIO_FAILURE; } - + H5Sclose(dspace); H5Aclose(num_id); H5Tclose(dtype); @@ -262,7 +262,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $ { if (file == NULL) return TREXIO_INVALID_ARG_1; - if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; + if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; @@ -317,7 +317,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* { if (file == NULL) return TREXIO_INVALID_ARG_1; - if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; + if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; trexio_hdf5_t* f = (trexio_hdf5_t*) file; @@ -372,6 +372,207 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file) } #+end_src +** Template for HDF5 has/read/write the dataset of sparse data + + Sparse data is stored using extensible datasets of HDF5. Extensibility is required + due to the fact that the sparse data will be written in chunks of user-defined size. + + #+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none +trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file); +trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse); +trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse); +trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max); + #+end_src + + + #+begin_src c :tangle write_dset_sparse_hdf5.c +trexio_exit_code +trexio_hdf5_write_$group_dset$ (trexio_t* const file, + const int64_t offset_file, + const int64_t size, + const int64_t size_max, + const int32_t* index_sparse, + const double* value_sparse) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + hid_t index_dtype; + void* index_p; + uint64_t size_ranked = (uint64_t) size * $group_dset_rank$; + /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */ + if (size_max < UINT8_MAX) { + uint8_t* index = CALLOC(size_ranked, uint8_t); + if (index == NULL) return TREXIO_ALLOCATION_FAILED; + for (int64_t i=0; i$group$_group, dset_index_name) != 1 ) { + /* If the file does not exist -> create it and write */ + + /* Create chunked dataset with index_dtype datatype and write indices into it */ + rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p); + if (index_p != index_sparse) FREE(index_p); + if (rc_write != TREXIO_SUCCESS) return rc_write; + + /* Create chunked dataset with value_dtype datatype and write values into it */ + rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse); + if (rc_write != TREXIO_SUCCESS) return rc_write; + + } else { + /* If the file exists -> open it and write */ + hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$}; + hsize_t offset_v[1] = {(hsize_t) offset_file}; + + /* Create chunked dataset with index_dtype datatype and write indices into it */ + rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p); + if (index_p != index_sparse) FREE(index_p); + if (rc_write != TREXIO_SUCCESS) return rc_write; + + /* Create chunked dataset with value_dtype datatype and write values into it */ + rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse); + if (rc_write != TREXIO_SUCCESS) return rc_write; + + } + + return TREXIO_SUCCESS; +} + #+end_src + + + #+begin_src c :tangle read_dset_sparse_hdf5.c +trexio_exit_code +trexio_hdf5_read_$group_dset$ (trexio_t* const file, + const int64_t offset_file, + const int64_t size, + const int64_t size_max, + int64_t* const eof_read_size, + int32_t* const index_read, + double* const value_read) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + /* Indices and values are stored as 2 independent datasets in the HDF5 file */ + char dset_index_name[256] = "\0"; + char dset_value_name[256] = "\0"; + /* Build the names of the datasets */ + strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256); + strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256); + + hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$}; + hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$}; + + hsize_t offset_v[1] = {(hsize_t) offset_file}; + hsize_t count_v[1] = {(hsize_t) size}; + + int is_index = 1, is_value = 0; + trexio_exit_code rc_read; + + // attempt to read indices + rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read); + if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read; + // attempt to read values + // when EOF is encountered - the count_v[0] is modified and contains the number of elements being read + rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read); + if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read; + + return rc_read; +} + #+end_src + + + #+begin_src c :tangle read_dset_sparse_hdf5.c +trexio_exit_code +trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; + + hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + hid_t fspace_id = H5Dget_space(dset_id); + if (fspace_id < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // allocate space for the dimensions to be read + hsize_t ddims[1] = {0}; + + // get the rank and dimensions of the dataset + int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL); + + H5Dclose(dset_id); + H5Sclose(fspace_id); + + *size_max = (int64_t) ddims[0]; + + return TREXIO_SUCCESS; +} + #+end_src + + + #+begin_src c :tangle has_dset_sparse_hdf5.c +trexio_exit_code +trexio_hdf5_has_$group_dset$ (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values"); + /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ + if (status == 1){ + return TREXIO_SUCCESS; + } else if (status == 0) { + return TREXIO_HAS_NOT; + } else { + return TREXIO_FAILURE; + } + +} + #+end_src + ** Template for HDF5 has/read/write the dataset of strings #+begin_src c :tangle hrw_dset_str_hdf5.h :exports none @@ -403,10 +604,10 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c return TREXIO_ALLOCATION_FAILED; } - hid_t dspace = H5Dget_space(dset_id); + hid_t dspace = H5Dget_space(dset_id); if (dset_id <= 0) { FREE(ddims); - H5Dclose(dset_id); + H5Dclose(dset_id); return TREXIO_INVALID_ID; } @@ -442,7 +643,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c if (rdata == NULL) { H5Dclose(dset_id); H5Sclose(dspace); - H5Tclose(memtype); + H5Tclose(memtype); return TREXIO_ALLOCATION_FAILED; } @@ -451,7 +652,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c FREE(rdata); H5Dclose(dset_id); H5Sclose(dspace); - H5Tclose(memtype); + H5Tclose(memtype); return TREXIO_FAILURE; } @@ -474,11 +675,11 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c FREE(rdata); H5Dclose(dset_id); H5Sclose(dspace); - H5Tclose(memtype); + H5Tclose(memtype); return TREXIO_FAILURE; } - FREE(rdata); + FREE(rdata); H5Dclose(dset_id); H5Sclose(dspace); H5Tclose(memtype); @@ -509,7 +710,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$, if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { - /* code to create dataset */ + /* code to create dataset */ hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); if (filetype <= 0) return TREXIO_INVALID_ID; @@ -577,7 +778,7 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file) } #+end_src - + ** Template for HDF5 has/read/write the string attribute #+begin_src c :tangle hrw_attr_str_hdf5.h :exports none @@ -655,7 +856,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) const hid_t dspace_id = H5Screate(H5S_SCALAR); if (dspace_id <= 0) return TREXIO_INVALID_ID; - + /* Create the $group_str$ attribute of $group$ group */ const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); @@ -665,7 +866,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) H5Tclose(dtype_id); return TREXIO_INVALID_ID; } - + status = H5Awrite(str_id, dtype_id, str); if (status < 0) { H5Aclose(str_id); @@ -673,7 +874,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) H5Tclose(dtype_id); return TREXIO_FAILURE; } - + H5Aclose(str_id); H5Sclose(dspace_id); H5Tclose(dtype_id); @@ -703,11 +904,256 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file) } #+end_src +** Helper functions + + #+begin_src c :tangle helpers_hdf5.c +trexio_exit_code +trexio_hdf5_create_write_dset_sparse (const hid_t group_id, + const char* dset_name, + const hid_t dtype_id, + const hsize_t* chunk_dims, + const void* data_sparse) +{ + const int h5_rank = 1; + const hsize_t maxdims[1] = {H5S_UNLIMITED}; + + hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims); + if (dspace < 0) return TREXIO_INVALID_ID; + + hid_t prop = H5Pcreate(H5P_DATASET_CREATE); + if (prop < 0) { + H5Sclose(dspace); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims); + if (status < 0) { + H5Sclose(dspace); + H5Pclose(prop); + return TREXIO_INVALID_ID; + } + + hid_t dset_id = H5Dcreate(group_id, + dset_name, + dtype_id, + dspace, + H5P_DEFAULT, + prop, + H5P_DEFAULT); + if (dset_id < 0) { + H5Sclose(dspace); + H5Pclose(prop); + return TREXIO_INVALID_ID; + } + + status = H5Dwrite(dset_id, + dtype_id, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + data_sparse); + H5Sclose(dspace); + H5Pclose(prop); + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_open_write_dset_sparse (const hid_t group_id, + const char* dset_name, + const hid_t dtype_id, + const hsize_t* chunk_dims, + const hsize_t* offset_file, + const void* data_sparse) +{ + const int h5_rank = 1; + + hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + hid_t fspace = H5Dget_space(dset_id); + if (fspace < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // allocate space for the dimensions to be read + hsize_t ddims[1] = {0}; + + // get the rank and dimensions of the dataset + int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL); + ddims[0] += chunk_dims[0]; + + // extend the dset size + herr_t status = H5Dset_extent(dset_id, ddims); + if (status < 0) { + H5Sclose(fspace); + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // close and reopen the file dataspace to take into account the extension + H5Sclose(fspace); + fspace = H5Dget_space(dset_id); + if (fspace < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // select hyperslab to be written using chunk_dims and offset values + status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL); + if (status < 0) { + H5Sclose(fspace); + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // create memory dataspace to write from + hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL); + if (dspace < 0) { + H5Sclose(fspace); + H5Sclose(dspace); + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + status = H5Dwrite(dset_id, + dtype_id, + dspace, fspace, H5P_DEFAULT, + data_sparse); + H5Dclose(dset_id); + H5Sclose(dspace); + H5Sclose(fspace); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_open_read_dset_sparse (const hid_t group_id, + const char* dset_name, + const hsize_t* offset_file, + hsize_t* const size_read, + int64_t* const eof_read_size, + const int is_index, + void* const data_sparse + ) +{ + const int h5_rank = 1; + + // get the dataset handle + hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + // get the dataspace of the dataset + hid_t fspace_id = H5Dget_space(dset_id); + if (fspace_id < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + /* get dims of the dset stored in the file to check whether reading with user-provided chunk size + will reach end of the dataset (i.e. EOF in TEXT back end) + ,*/ + hsize_t ddims[1] = {0}; + int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL); + hsize_t max_offset = offset_file[0] + size_read[0]; + + int is_EOF = 0; + // if max_offset exceed current dim of the dset => EOF + if (max_offset > ddims[0]) { + is_EOF = 1; + // lower the value of count to reduce the number of elements which will be read + size_read[0] -= max_offset - ddims[0]; + // modified the value of eof_read_size passed by address + if (eof_read_size != NULL) *eof_read_size = size_read[0]; + } + + // special case when reading int indices + int64_t size_ranked = (int64_t) size_read[0]; + void* index_p; + // read the datatype from the dataset and compare with the pre-defined values + hid_t dtype = H5Dget_type(dset_id); + if (is_index == 1) { + if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) { + uint8_t* index = CALLOC(size_ranked, uint8_t); + if (index == NULL) return TREXIO_ALLOCATION_FAILED; + index_p = index; + } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) { + uint16_t* index = CALLOC(size_ranked, uint16_t); + if (index == NULL) return TREXIO_ALLOCATION_FAILED; + index_p = index; + } else { + index_p = data_sparse; + } + } + + herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL); + if (status < 0) { + H5Sclose(fspace_id); + H5Dclose(dset_id); + if (index_p != data_sparse) FREE(index_p); + return TREXIO_INVALID_ID; + } + + hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL); + if (memspace_id < 0) { + H5Sclose(fspace_id); + H5Dclose(dset_id); + if (index_p != data_sparse) FREE(index_p); + return TREXIO_INVALID_ID; + } + + if (is_index == 1) { + status = H5Dread(dset_id, + dtype, + memspace_id, fspace_id, H5P_DEFAULT, + index_p); + } else { + status = H5Dread(dset_id, + dtype, + memspace_id, fspace_id, H5P_DEFAULT, + data_sparse); + } + + H5Sclose(fspace_id); + H5Sclose(memspace_id); + H5Dclose(dset_id); + if (status < 0) { + if (index_p != data_sparse) FREE(index_p); + return TREXIO_FAILURE; + } + + if (is_index == 1) { + if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) { + uint8_t* index = (uint8_t*) index_p; + for (int64_t i=0; i 0) { + uint16_t* index = (uint16_t*) index_p; + for (int64_t i=0; i> trexio_text.h cat populated/pop_has_dset_data_text.c >> trexio_text.c cat populated/pop_has_dset_str_text.c >> trexio_text.c +cat populated/pop_has_dset_sparse_text.c >> trexio_text.c cat populated/pop_has_attr_num_text.c >> trexio_text.c cat populated/pop_has_attr_str_text.c >> trexio_text.c + cat populated/pop_read_dset_data_text.c >> trexio_text.c cat populated/pop_read_dset_str_text.c >> trexio_text.c +cat populated/pop_read_dset_sparse_text.c >> trexio_text.c cat populated/pop_read_attr_str_text.c >> trexio_text.c cat populated/pop_read_attr_num_text.c >> trexio_text.c + cat populated/pop_write_dset_data_text.c >> trexio_text.c cat populated/pop_write_dset_str_text.c >> trexio_text.c +cat populated/pop_write_dset_sparse_text.c >> trexio_text.c cat populated/pop_write_attr_str_text.c >> trexio_text.c cat populated/pop_write_attr_num_text.c >> trexio_text.c + cat populated/pop_hrw_dset_data_text.h >> trexio_text.h cat populated/pop_hrw_dset_str_text.h >> trexio_text.h +cat populated/pop_hrw_dset_sparse_text.h >> trexio_text.h cat populated/pop_hrw_attr_num_text.h >> trexio_text.h cat populated/pop_hrw_attr_str_text.h >> trexio_text.h -cat rdm_text.c >> trexio_text.c -cat rdm_text.h >> trexio_text.h - cat suffix_text.h >> trexio_text.h - diff --git a/src/templates_text/templator_text.org b/src/templates_text/templator_text.org index f82c3be..09fa995 100644 --- a/src/templates_text/templator_text.org +++ b/src/templates_text/templator_text.org @@ -93,22 +93,10 @@ typedef struct $group$_s { ** Template for general structure in text back end - #+begin_src c :tangle struct_text_group.h -typedef struct rdm_s { - uint64_t dim_one_e; - uint32_t to_flush; - uint32_t padding; - double* one_e; - char file_name[TREXIO_MAX_FILENAME_LENGTH]; - char two_e_file_name[TREXIO_MAX_FILENAME_LENGTH]; -} rdm_t; - #+end_src - #+begin_src c :tangle struct_text_group.h typedef struct trexio_text_s { trexio_t parent ; $group$_t* $group$; - rdm_t* rdm; int lock_file; } trexio_text_t; #+end_src @@ -269,9 +257,6 @@ trexio_text_deinit (trexio_t* const file) /* Error handling for this call is added by the generator */ rc = trexio_text_free_$group$( (trexio_text_t*) file); - rc = trexio_text_free_rdm( (trexio_text_t*) file); - if (rc != TREXIO_SUCCESS) return rc; - return TREXIO_SUCCESS; } @@ -411,7 +396,7 @@ trexio_text_read_$group$ (trexio_text_t* const file) return NULL; } - rc = fscanf(f, "%$group_num_std_dtype_in$", &($group$->$group_num$)); + rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$)); assert(!(rc != 1)); if (rc != 1) { FREE(buffer); @@ -499,7 +484,7 @@ trexio_text_read_$group$ (trexio_text_t* const file) } for (uint64_t i=0 ; i$group_dset$[i])); + rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i])); assert(!(rc != 1)); if (rc != 1) { FREE(buffer); @@ -535,16 +520,16 @@ trexio_text_read_$group$ (trexio_text_t* const file) } /* WARNING: this tmp array allows to avoid allocation of space for each element of array of string - , BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32. - ,*/ + BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32. + */ char* tmp_$group_dset$; tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char); for (uint64_t i=0 ; i$group_dset$[i] = tmp_$group_dset$; /* conventional fcanf with "%s" only return the string before the first space character - ,* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]" - ,* Q: depending on what ? */ + * to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]" + * Q: depending on what ? */ rc = fscanf(f, " %1023[^\n]", tmp_$group_dset$); assert(!(rc != 1)); if (rc != 1) { @@ -613,7 +598,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file) // START REPEAT GROUP_NUM fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet); - if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_std_dtype_out$ \n", $group$->$group_num$); + if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$); // END REPEAT GROUP_NUM // START REPEAT GROUP_ATTR_STR @@ -627,7 +612,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file) fprintf(f, "$group_dset$\n"); for (uint64_t i=0 ; i$group_dset$[i]); + fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]); } // END REPEAT GROUP_DSET_ALL @@ -1016,327 +1001,280 @@ trexio_text_has_$group_str$ (trexio_t* const file) } #+end_src -** RDM struct (hard-coded) -*** Read the complete struct +** Template for has/read/write the dataset of sparse data - #+begin_src c :tangle rdm_text.h -rdm_t* trexio_text_read_rdm(trexio_text_t* const file); - #+end_src + Each sparse array is stored in a separate =.txt= file due to the fact that sparse I/O has to be decoupled + from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data + to prevent memory overflow. Chunks have a given ~int64_t size~ + (size specifies the number of sparse data items, e.g. integrals). - #+begin_src c :tangle rdm_text.c -rdm_t* trexio_text_read_rdm(trexio_text_t* const file) { - if (file == NULL) return NULL; + User provides indices and values of the sparse array as two separate variables. - if (file->rdm != NULL) return file->rdm; - /* Allocate the data structure */ - rdm_t* rdm = MALLOC(rdm_t); - assert (rdm != NULL); + #+begin_src c :tangle hrw_dset_sparse_text.h :exports none +trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file); +trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse); +trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int64_t size_start, const int32_t* index_sparse, const double* value_sparse); +trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max); + #+end_src - rdm->one_e = NULL; - rdm->two_e_file_name[0] = '\0'; - rdm->to_flush = 0; - /* Try to open the file. If the file does not exist, return */ - const char* rdm_file_name = "/rdm.txt"; + #+begin_src c :tangle write_dset_sparse_text.c +trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, + const int64_t offset_file, + const int64_t size, + const int64_t size_max, + const int64_t size_start, + const int32_t* index_sparse, + const double* value_sparse) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; - strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Build the name of the file with sparse data*/ + /* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */ + const char $group_dset$_file_name[256] = "/$group_dset$.txt"; + /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ + char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; - strncat (rdm->file_name, rdm_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name)); + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, $group_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name)); - if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') { - FREE(rdm); - return NULL; + /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */ + FILE* f = fopen(file_full_path, "a"); + if(f == NULL) return TREXIO_FILE_ERROR; + + + /* Specify the line length in order to offset properly. For example, for 4-index quantities + the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char. + CURRENTLY NO OFFSET IS USED WHEN WRITING ! + ,*/ + int64_t line_length = 0L; + char format_str[256] = "\0"; + + /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */ + if (size_max < UINT8_MAX) { + line_length = $sparse_line_length_8$; // 41 for 4 indices + strncpy(format_str, $sparse_format_printf_8$, 256); + } else if (size_max < UINT16_MAX) { + line_length = $sparse_line_length_16$; // 49 for 4 indices + strncpy(format_str, $sparse_format_printf_16$, 256); + } else { + line_length = $sparse_line_length_32$; //69 for 4 indices + strncpy(format_str, $sparse_format_printf_32$, 256); } - /* If the file exists, read it */ - FILE* f = fopen(rdm->file_name,"r"); - if (f != NULL) { + strncat(format_str, "\n", 2); - /* Find size of file to allocate the max size of the string buffer */ - fseek(f, 0L, SEEK_END); - size_t sz = ftell(f); - fseek(f, 0L, SEEK_SET); - sz = (sz < 1024) ? (1024) : (sz); - char* buffer = CALLOC(sz, char); + /* Get the starting position of the IO stream to be written in the .size file. + This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail. + One can use ftello function which is adapted for large files. + For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow. + */ + int64_t io_start_pos = size_start * line_length; - /* Read the dimensioning variables */ - int rc; - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - assert (strcmp(buffer, "dim_one_e") == 0); - - rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e)); - assert (rc == 1); - - /* Allocate arrays */ - rdm->one_e = CALLOC(rdm->dim_one_e, double); - assert (rdm->one_e != NULL); - - /* Read one_e */ - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - assert (strcmp(buffer, "one_e") == 0); - - for (uint64_t i=0 ; idim_one_e; ++i) { - rc = fscanf(f, "%lf", &(rdm->one_e[i])); - assert (rc == 1); - } - - /* Read two_e */ - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - assert (strcmp(buffer, "two_e_file_name") == 0); - - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - strncpy(rdm->two_e_file_name, buffer, 1024); - if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') { - FREE(buffer); - FREE(rdm->one_e); - FREE(rdm); + /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */ + int rc; + for (uint64_t i=0UL; irdm = rdm ; - return rdm; -} - #+end_src - -*** Flush the complete struct - - #+begin_src c :tangle rdm_text.h -trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file); - #+end_src - - #+begin_src c :tangle rdm_text.c -trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) { - if (file == NULL) return TREXIO_INVALID_ARG_1; - - if (file->parent.mode == 'r') return TREXIO_READONLY; - - rdm_t* const rdm = file->rdm; - if (rdm == NULL) return TREXIO_SUCCESS; - - if (rdm->to_flush == 0) return TREXIO_SUCCESS; - - FILE* f = fopen(rdm->file_name,"w"); - assert (f != NULL); - - /* Write the dimensioning variables */ - fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e); - - /* Write arrays */ - fprintf(f, "one_e\n"); - for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) { - fprintf(f, "%lf\n", rdm->one_e[i]); - } - - fprintf(f, "two_e_file_name\n"); - fprintf(f, "%s\n", rdm->two_e_file_name); - - fclose(f); - rdm->to_flush = 0; - return TREXIO_SUCCESS; -} - #+end_src - -*** Free memory - - Memory is allocated when reading. The followig function frees memory. - - #+begin_src c :tangle rdm_text.h -trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file); - #+end_src - - #+begin_src c :tangle rdm_text.c -trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) { - if (file == NULL) return TREXIO_INVALID_ARG_1; - - if (file->parent.mode != 'r') { - trexio_exit_code rc = trexio_text_flush_rdm(file); - if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE; - } - - rdm_t* const rdm = file->rdm; - if (rdm == NULL) return TREXIO_SUCCESS; - - if (rdm->one_e != NULL) { - FREE (rdm->one_e); - } - - free (rdm); - file->rdm = NULL; - return TREXIO_SUCCESS; -} - #+end_src - -*** Read/Write the one_e attribute - - The ~one_e~ array is assumed allocated with the appropriate size. - - #+begin_src c :tangle rdm_text.h -trexio_exit_code -trexio_text_read_rdm_one_e(trexio_t* const file, - double* const one_e, - const uint64_t dim_one_e); - -trexio_exit_code -trexio_text_write_rdm_one_e(trexio_t* const file, - const double* one_e, - const uint64_t dim_one_e); - #+end_src - - #+begin_src c :tangle rdm_text.c -trexio_exit_code -trexio_text_read_rdm_one_e(trexio_t* const file, - double* const one_e, - const uint64_t dim_one_e) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (one_e == NULL) return TREXIO_INVALID_ARG_2; - - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3; - - for (uint64_t i=0 ; ione_e[i]; - } - - return TREXIO_SUCCESS; -} - - -trexio_exit_code -trexio_text_write_rdm_one_e(trexio_t* const file, - const double* one_e, - const uint64_t dim_one_e) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (one_e == NULL) return TREXIO_INVALID_ARG_2; - if (file->mode != 'r') return TREXIO_READONLY; - - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - rdm->dim_one_e = dim_one_e; - for (uint64_t i=0 ; ione_e[i] = one_e[i]; - } - - rdm->to_flush = 1; - return TREXIO_SUCCESS; -} - #+end_src - -*** Read/Write the two_e attribute - - ~two_e~ is a sparse data structure, which can be too large to fit - in memory. So we provide functions to read and write it by - chunks. - In the text back end, the easiest way to do it is to create a - file for each sparse float structure. - - #+begin_src c :tangle rdm_text.h -trexio_exit_code -trexio_text_buffered_read_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - int64_t* const index, - double* const value); - -trexio_exit_code -trexio_text_buffered_write_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - const int64_t* index, - const double* value); - #+end_src - - #+begin_src c :tangle rdm_text.c -trexio_exit_code -trexio_text_buffered_read_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - int64_t* const index, - double* const value) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (index == NULL) return TREXIO_INVALID_ARG_4; - if (value == NULL) return TREXIO_INVALID_ARG_5; - - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - FILE* f = fopen(rdm->two_e_file_name, "r"); - if (f == NULL) return TREXIO_END; - - const uint64_t line_length = 64L; - fseek(f, (long) offset * line_length, SEEK_SET); - - for (uint64_t i=0 ; imode != 'r') return TREXIO_READONLY; + /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */ + FILE *f_wSize = fopen(file_full_path, "a"); + if (f_wSize == NULL) return TREXIO_FILE_ERROR; - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - FILE* f = fopen(rdm->two_e_file_name, "w"); - if (f == NULL) return TREXIO_FAILURE; - - const uint64_t line_length = 64L; - fseek(f, (long) offset * line_length, SEEK_SET); - - for (uint64_t i=0 ; ifile_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, $group_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name)); + + /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */ + FILE* f = fopen(file_full_path, "r"); + if(f == NULL) return TREXIO_FILE_ERROR; + + /* Specify the line length in order to offset properly. For example, for 4-index quantities + the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char + ,*/ + uint64_t line_length = 0UL; + /* Determine the line length depending on the size_max (usually mo_num or ao_num) */ + if (size_max < UINT8_MAX) { + line_length = $sparse_line_length_8$; // 41 for 4 indices + } else if (size_max < UINT16_MAX) { + line_length = $sparse_line_length_16$; // 49 for 4 indices + } else { + line_length = $sparse_line_length_32$; //69 for 4 indices + } + + /* Offset in the file according to the provided value of offset_file and optimal line_length */ + fseek(f, (long) offset_file * line_length, SEEK_SET); + + /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */ + int rc; + char buffer[1024]; + uint64_t count = 0UL; + for (uint64_t i=0UL; ifile_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, $group_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name)); + + /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */ + FILE* f = fopen(file_full_path, "r"); + if(f == NULL) return TREXIO_FILE_ERROR; + + + /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */ + int rc; + int64_t size_item, offset_item, size_accum=0L; + + /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */ + while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) { + /* Check that summation will not overflow the int64_t value */ + if (INT64_MAX - size_accum > size_item) { + size_accum += size_item; + } else { + fclose(f); + *size_max = -1L; + return TREXIO_INT_SIZE_OVERFLOW; + } + } + + /* Close the TXT file */ + rc = fclose(f); + if(rc != 0) return TREXIO_FILE_ERROR; + + /* Overwrite the value at the input address and return TREXIO_SUCCESS */ + *size_max = size_accum; + return TREXIO_SUCCESS; + +} + #+end_src + + #+begin_src c :tangle has_dset_sparse_text.c +trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + + /* Build the name of the file with sparse data. + The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? + */ + const char $group_dset$_file_name[256] = "/$group_dset$.txt"; + /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */ + char file_full_path[TREXIO_MAX_FILENAME_LENGTH]; + + /* Copy directory name in file_full_path */ + strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + /* Append name of the file with sparse data */ + strncat (file_full_path, $group_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name)); + + /* Check the return code of access function to determine whether the file with sparse data exists or not */ + if (access(file_full_path, F_OK) == 0){ + return TREXIO_SUCCESS; + } else { + return TREXIO_HAS_NOT; + } +} + #+end_src * Constant file suffixes (not used by the generator) :noexport: #+begin_src c :tangle suffix_text.h #endif #+end_src - diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cf2d896..94fe414 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,11 +1,12 @@ -# ================= TESTING ================= +# ================= TESTING ================= # Create a list of tests for TEXT back end. set(Tests_text open_text io_dset_float_text io_dset_str_text + io_dset_sparse_text io_safe_dset_float_text io_dset_int_text io_num_text @@ -19,6 +20,7 @@ if(ENABLE_HDF5) open_hdf5 io_dset_float_hdf5 io_dset_str_hdf5 + io_dset_sparse_hdf5 io_safe_dset_float_hdf5 io_dset_int_hdf5 io_num_hdf5 @@ -43,4 +45,3 @@ endforeach() add_executable(test_f test_f.f90) target_link_libraries(test_f PRIVATE trexio_f) add_test(NAME test_f COMMAND $) - diff --git a/tests/io_dset_sparse_hdf5.c b/tests/io_dset_sparse_hdf5.c new file mode 100644 index 0000000..67b8357 --- /dev/null +++ b/tests/io_dset_sparse_hdf5.c @@ -0,0 +1,235 @@ +#include "trexio.h" +#include +#include +#include +#include + +#define TEST_BACKEND TREXIO_HDF5 +#define TREXIO_FILE "test_dset_sparse.h5" +#define RM_COMMAND "rm -f -- " TREXIO_FILE +#define SIZE 100 +#define N_CHUNKS 5 + +static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) { + +/* Try to write an array of sparse data into the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + +/*================= START OF TEST ==================*/ + + // open file in 'write' mode + file = trexio_open(file_name, 'w', backend, &rc); + assert (file != NULL); + assert (rc == TREXIO_SUCCESS); + + // parameters to be written + int32_t* index; + double* value; + + index = calloc(4L*SIZE, sizeof(int32_t)); + value = calloc(SIZE, sizeof(double)); + + for(int i=0; i size_max) + offset_file_read = 97; + offset_data_read = 1; + int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100 + + if (offset != 0L) offset_file_read += offset; + + // read one chunk that will reach EOF and return TREXIO_END code + rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]); + assert(rc == TREXIO_END); + assert(chunk_read == eof_read_size_check); + assert(index_read[4*size_r-1] == 0); + assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset)); + /* + for(int i=0; i +#include +#include +#include + +#define TEST_BACKEND TREXIO_TEXT +#define TREXIO_FILE "test_dset_sparse.dir" +#define RM_COMMAND "rm -rf " TREXIO_FILE +#define SIZE 100 +#define N_CHUNKS 5 + +static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) { + +/* Try to write an array of sparse data into the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + +/*================= START OF TEST ==================*/ + + // open file in 'write' mode + file = trexio_open(file_name, 'w', backend, &rc); + assert (file != NULL); + assert (rc == TREXIO_SUCCESS); + + // parameters to be written + int32_t* index; + double* value; + + index = calloc(4L*SIZE, sizeof(int32_t)); + value = calloc(SIZE, sizeof(double)); + + for(int i=0; i size_max) + offset_file_read = 97L; + offset_data_read = 1; + int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100 + + if (offset != 0L) offset_file_read += offset; + + // read one chunk that will reach EOF and return TREXIO_END code + rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]); + assert(rc == TREXIO_END); + assert(chunk_read == eof_read_size_check); + assert(index_read[4*size_r-1] == 0); + assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset)); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + + // free the memory + free(index_read); + free(value_read); + +/*================= END OF TEST ==================*/ + + return 0; +} + +static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) { + +/* Try to read a size of the dataset of sparse data in the TREXIO file */ + + trexio_t* file = NULL; + trexio_exit_code rc; + +/*================= START OF TEST ==================*/ + + // open file + file = trexio_open(file_name, 'r', backend, &rc); + assert (file != NULL); + assert (rc == TREXIO_SUCCESS); + + // define the variable to read into + int64_t size_written; + + // read one chunk using the aforementioned parameters + rc = trexio_read_mo_2e_int_eri_size(file, &size_written); + assert(rc == TREXIO_SUCCESS); + assert(size_written == size_check); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + +int main(){ + +/*============== Test launcher ================*/ + + int rc; + rc = system(RM_COMMAND); + assert (rc == 0); + + // check the first write attempt (SIZE elements written in N_CHUNKS chunks) + test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0); + test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND); + test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0); + test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE); + + // check the second write attempt (SIZE elements written in N_CHUNKS chunks) + test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE); + test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE); + test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2); + + rc = system(RM_COMMAND); + assert (rc == 0); + + return 0; +} diff --git a/tests/io_num_hdf5.c b/tests/io_num_hdf5.c index e057236..32de679 100644 --- a/tests/io_num_hdf5.c +++ b/tests/io_num_hdf5.c @@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) { rc = trexio_write_nucleus_num(file, num); assert (rc == TREXIO_SUCCESS); + rc = trexio_write_nucleus_repulsion(file, 2.14171677); + assert (rc == TREXIO_SUCCESS); + // attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2 rc = trexio_write_mo_num(file, 0); assert (rc == TREXIO_INVALID_NUM); @@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) { rc = trexio_has_nucleus_num(file); assert (rc == TREXIO_SUCCESS); + rc = trexio_has_nucleus_repulsion(file); + assert (rc == TREXIO_SUCCESS); + // check that the num variable does not exist rc = trexio_has_mo_num(file); assert (rc == TREXIO_HAS_NOT); @@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) { // parameters to be read int num; int cartesian; + float repulsion_32; + double repulsion_64, d; /*================= START OF TEST ==================*/ @@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) { assert (rc == TREXIO_SUCCESS); assert (num == 12); + rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32); + assert (rc == TREXIO_SUCCESS); + d = repulsion_32 - 2.14171677; + assert( d*d < 1.e-8 ); + + rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64); + assert (rc == TREXIO_SUCCESS); + d = repulsion_64 - 2.14171677; + assert( d*d < 1.e-14 ); + // read non-existing numerical attribute from the file rc = trexio_read_mo_num(file, &num); assert (rc == TREXIO_ATTR_MISSING); @@ -134,5 +152,3 @@ int main(void) { return 0; } - - diff --git a/tests/io_num_text.c b/tests/io_num_text.c index 3c299aa..22c110c 100644 --- a/tests/io_num_text.c +++ b/tests/io_num_text.c @@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) { rc = trexio_write_nucleus_num(file, num); assert (rc == TREXIO_SUCCESS); + rc = trexio_write_nucleus_repulsion(file, 2.14171677); + assert (rc == TREXIO_SUCCESS); + // attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2 rc = trexio_write_mo_num(file, 0); assert (rc == TREXIO_INVALID_NUM); @@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) { rc = trexio_has_nucleus_num(file); assert (rc == TREXIO_SUCCESS); + rc = trexio_has_nucleus_repulsion(file); + assert (rc == TREXIO_SUCCESS); + // check that the num variable does not exist rc = trexio_has_mo_num(file); assert (rc == TREXIO_HAS_NOT); @@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) { // parameters to be read int num; int cartesian; + float repulsion_32; + double repulsion_64, d; /*================= START OF TEST ==================*/ @@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) { assert (rc == TREXIO_SUCCESS); assert (num == 12); + rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32); + assert (rc == TREXIO_SUCCESS); + d = repulsion_32 - 2.14171677; + assert( d*d < 1.e-8 ); + + rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64); + assert (rc == TREXIO_SUCCESS); + d = repulsion_64 - 2.14171677; + assert( d*d < 1.e-14 ); + // read non-existing numerical attribute from the file rc = trexio_read_mo_num(file, &num); assert (rc == TREXIO_ATTR_MISSING); @@ -134,5 +152,3 @@ int main(void) { return 0; } - - diff --git a/tests/test_f.f90 b/tests/test_f.f90 index 7c4b141..dcadec8 100644 --- a/tests/test_f.f90 +++ b/tests/test_f.f90 @@ -2,25 +2,25 @@ program test_trexio use trexio use, intrinsic :: iso_c_binding implicit none - + logical :: have_hdf5 - print * , "============================================" - print'(a,a)' , " TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION + print * , "============================================" + print'(a,a)' , " TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION print'(a,i3)', " TREXIO MAJOR VERSION : ", TREXIO_VERSION_MAJOR print'(a,i3)', " TREXIO MINOR VERSION : ", TREXIO_VERSION_MINOR - print * , "============================================" + print * , "============================================" - call system('rm -rf test_write_f.dir') + call system('rm -rf -- test_write_f.dir') print *, 'call test_write(''test_write_f.dir'', TREXIO_TEXT)' call test_write('test_write_f.dir', TREXIO_TEXT) print *, 'call test_read(''test_write_f.dir'', TREXIO_TEXT)' call test_read('test_write_f.dir', TREXIO_TEXT) - call system('rm -rf test_write_f.dir') + call system('rm -rf -- test_write_f.dir') call test_read_void('test_write_f.dir', TREXIO_TEXT) - ! No way to conditionally check whether compilation was done with HDF5 + ! No way to conditionally check whether compilation was done with HDF5 ! So temporarily disable the test for HDF5 back end at the moment have_hdf5 = trexio_has_backend(TREXIO_HDF5) if (have_hdf5) then @@ -30,7 +30,7 @@ program test_trexio print *, 'call test_read(''test_write_f.h5'', TREXIO_HDF5)' call test_read('test_write_f.h5', TREXIO_HDF5) call system('rm -f -- test_write_f.h5') - + call test_read_void('test_write_f.h5', TREXIO_HDF5) endif @@ -61,6 +61,22 @@ subroutine test_write(file_name, back_end) character(len=:), allocatable :: sym_str character(len=:), allocatable :: label(:) + ! sparse data + integer(4) :: index_sparse_mo_2e_int_eri(4,100) + double precision :: value_sparse_mo_2e_int_eri(100) + + integer :: i, n_buffers = 5 + integer(8) :: buf_size, offset + buf_size = 100/n_buffers + + do i = 1, 100 + index_sparse_mo_2e_int_eri(1,i) = 4*i - 3 + index_sparse_mo_2e_int_eri(2,i) = 4*i+1 - 3 + index_sparse_mo_2e_int_eri(3,i) = 4*i+2 - 3 + index_sparse_mo_2e_int_eri(4,i) = 4*i+3 - 3 + value_sparse_mo_2e_int_eri(i) = 3.14 + float(i) + enddo + ! parameters to be written num = 12 charge = (/ 6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1. /) @@ -96,6 +112,9 @@ subroutine test_write(file_name, back_end) rc = trexio_has_nucleus_charge(trex_file) call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 2') + rc = trexio_has_mo_2e_int_eri(trex_file) + call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 3') + rc = trexio_write_nucleus_num(trex_file, num) call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE NUM') @@ -106,8 +125,8 @@ subroutine test_write(file_name, back_end) call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE COORD') rc = trexio_write_nucleus_label(trex_file, label, 5) - call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL') deallocate(label) + call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL') rc = trexio_write_nucleus_point_group(trex_file, sym_str, 32) deallocate(sym_str) @@ -119,6 +138,20 @@ subroutine test_write(file_name, back_end) rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX') + ! write mo_num which will be used to determine the optimal size of int indices + if (trexio_has_mo_num(trex_file) == TREXIO_HAS_NOT) then + rc = trexio_write_mo_num(trex_file, 1000) + call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE MO NUM') + endif + + offset = 0 + do i = 1,n_buffers + rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, & + index_sparse_mo_2e_int_eri(1,offset+1), & + value_sparse_mo_2e_int_eri(offset+1)) + call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE SPARSE') + offset = offset + buf_size + enddo rc = trexio_has_nucleus_num(trex_file) call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 1') @@ -126,6 +159,9 @@ subroutine test_write(file_name, back_end) rc = trexio_has_nucleus_coord(trex_file) call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 2') + rc = trexio_has_mo_2e_int_eri(trex_file) + call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 3') + rc = trexio_close(trex_file) call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS CLOSE') @@ -160,11 +196,25 @@ subroutine test_read(file_name, back_end) character(len=32) :: sym_str + ! sparse data + integer(4) :: index_sparse_mo_2e_int_eri(4,20) + double precision :: value_sparse_mo_2e_int_eri(20) + integer(8) :: read_buf_size = 10 + integer(8) :: read_buf_size_save = 10 + integer(8) :: offset_read = 40 + integer(8) :: offset_data_read = 5 + integer(8) :: offset_eof = 97 + integer(8) :: offset_data_eof = 1 + integer(8) :: size_toread = 0 + character*(128) :: str num = 12 basis_shell_num = 24 + index_sparse_mo_2e_int_eri = 0 + value_sparse_mo_2e_int_eri = 0.0d0 + ! ================= START OF TEST ===================== ! trex_file = trexio_open(file_name, 'r', back_end, rc) @@ -199,7 +249,7 @@ subroutine test_read(file_name, back_end) call exit(-1) endif - + rc = trexio_read_nucleus_label(trex_file, label, 2) call trexio_assert(rc, TREXIO_SUCCESS) if (trim(label(2)) == 'Na') then @@ -230,6 +280,52 @@ subroutine test_read(file_name, back_end) endif + rc = trexio_read_mo_2e_int_eri(trex_file, offset_read, read_buf_size, & + index_sparse_mo_2e_int_eri(1, offset_data_read + 1), & + value_sparse_mo_2e_int_eri(offset_data_read + 1)) + !do i = 1,20 + ! write(*,*) index_sparse_mo_2e_int_eri(1,i) + !enddo + call trexio_assert(rc, TREXIO_SUCCESS) + if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. & + index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1) then + write(*,*) 'SUCCESS READ SPARSE DATA' + else + print *, 'FAILURE SPARSE DATA CHECK' + call exit(-1) + endif + + + ! attempt to read reaching EOF: should return TREXIO_END and + ! NOT increment the existing values in the buffer (only upd with what has been read) + rc = trexio_read_mo_2e_int_eri(trex_file, offset_eof, read_buf_size, & + index_sparse_mo_2e_int_eri(1, offset_data_eof + 1), & + value_sparse_mo_2e_int_eri(offset_data_eof + 1)) + !do i = 1,20 + ! write(*,*) index_sparse_mo_2e_int_eri(1,i) + !enddo + call trexio_assert(rc, TREXIO_END) + if (read_buf_size == 3 .and. & + index_sparse_mo_2e_int_eri(1, 1) == 0 .and. & + index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1 .and. & + index_sparse_mo_2e_int_eri(1, offset_data_eof + 1) == offset_eof*4 + 1) then + write(*,*) 'SUCCESS READ SPARSE DATA EOF' + read_buf_size = read_buf_size_save + else + print *, 'FAILURE SPARSE DATA EOF CHECK' + call exit(-1) + endif + + rc = trexio_read_mo_2e_int_eri_size(trex_file, size_toread) + call trexio_assert(rc, TREXIO_SUCCESS) + if (size_toread == 100) then + write(*,*) 'SUCCESS READ SPARSE SIZE' + else + print *, 'FAILURE SPARSE SIZE CHECK' + call exit(-1) + endif + + rc = trexio_close(trex_file) call trexio_assert(rc, TREXIO_SUCCESS) @@ -254,6 +350,9 @@ subroutine test_read_void(file_name, back_end) ! ================= START OF TEST ===================== ! trex_file = trexio_open(file_name, 'r', back_end, rc) + if (rc /= TREXIO_OPEN_ERROR) then + rc = trexio_close(trex_file) + endif call trexio_assert(rc, TREXIO_OPEN_ERROR) call trexio_string_of_error(rc, str) @@ -262,4 +361,3 @@ subroutine test_read_void(file_name, back_end) ! ================= END OF TEST ===================== ! end subroutine test_read_void - diff --git a/tools/generator.py b/tools/generator.py index 30e06ad..fae24d4 100644 --- a/tools/generator.py +++ b/tools/generator.py @@ -6,29 +6,31 @@ config_file = 'trex.json' trex_config = read_json(config_file) # --------------------------------------------------------------------------- # -# -------------------------------- [WIP] ------------------------------------ # -# for now remove rdm from config because it functions are hardcoded -del trex_config['rdm'] -# --------------------------------------------------------------------------- # - # -------------------- GET ATTRIBUTES FROM THE CONFIGURATION ---------------- # group_dict = get_group_dict(trex_config) detailed_nums = get_detailed_num_dict(trex_config) detailed_strs = get_detailed_str_dict(trex_config) # helper dictionaries that contain names of groups, nums or dsets as keys dsets = get_dset_dict(trex_config) -detailed_dsets_nostr, detailed_dsets_str = split_dset_dict_detailed(dsets) +detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets) detailed_dsets = detailed_dsets_nostr.copy() detailed_dsets.update(detailed_dsets_str) +# build a big dictionary with all pre-processed data +detailed_all = { + 'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse), + 'groups' : group_dict, + 'numbers' : detailed_nums, + 'strings' : detailed_strs +} # consistency check for dimensioning variables check_dim_consistency(detailed_nums, dsets) # --------------------------------------------------------------------------- # # -------------------- GET TEMPLATED FILES TO BE POPULATED ------------------ # source = ['front', 'text', 'hdf5'] -# build helper dictionaries with paths per source directory +# build helper dictionaries with paths per source directory template_paths = get_template_paths(source) -# build helper dictionaries with source files per source directory +# build helper dictionaries with source files per source directory source_files = get_source_files(template_paths) # build helper dictionaries with templated files files_todo = get_files_todo(source_files) @@ -38,7 +40,7 @@ files_todo = get_files_todo(source_files) # populate files with iterative scheme, i.e. for unique functions for fname in files_todo['auxiliary']: - iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs) + iterative_populate_file(fname, template_paths, detailed_all) # populate has/read/write_num functions with recursive scheme for fname in files_todo['attr_num']: @@ -48,14 +50,18 @@ for fname in files_todo['attr_num']: for fname in files_todo['attr_str']: recursive_populate_file(fname, template_paths, detailed_strs) -# populate has/read/write_dset (numerical) functions with recursive scheme +# populate has/read/write_dset (numerical) functions with recursive scheme for fname in files_todo['dset_data']: recursive_populate_file(fname, template_paths, detailed_dsets_nostr) -# populate has/read/write_dset (strings) functions with recursive scheme +# populate has/read/write_dset (strings) functions with recursive scheme for fname in files_todo['dset_str']: recursive_populate_file(fname, template_paths, detailed_dsets_str) +# populate has/read/write_dset (sparse) functions with recursive scheme +for fname in files_todo['dset_sparse']: + recursive_populate_file(fname, template_paths, detailed_dsets_sparse) + # populate group-related functions with mixed (iterative+recursive) scheme [text backend] for fname in files_todo['group']: special_populate_text_group(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs) diff --git a/tools/generator_tools.py b/tools/generator_tools.py index d7cb9a0..420b3c0 100644 --- a/tools/generator_tools.py +++ b/tools/generator_tools.py @@ -4,7 +4,7 @@ from json import load as json_load def read_json(fname: str) -> dict: - """ + """ Read configuration from the input `fname` JSON file. Parameters: @@ -23,7 +23,7 @@ def read_json(fname: str) -> dict: def get_files_todo(source_files: dict) -> dict: - """ + """ Build dictionaries of templated files per objective. Parameters: @@ -36,21 +36,21 @@ def get_files_todo(source_files: dict) -> dict: for key in source_files.keys(): all_files += source_files[key] - files_todo = {} + files_todo = {} #files_todo['all'] = list(filter(lambda x: 'read' in x or 'write' in x or 'has' in x or 'hrw' in x or 'flush' in x or 'free' in x, all_files)) files_todo['all'] = [f for f in all_files if 'read' in f or 'write' in f or 'has' in f or 'flush' in f or 'free' in f or 'hrw' in f] - for key in ['dset_data', 'dset_str', 'attr_num', 'attr_str', 'group']: + for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group']: files_todo[key] = list(filter(lambda x: key in x, files_todo['all'])) files_todo['group'].append('struct_text_group_dset.h') # files that correspond to iterative population (e.g. the code is repeated within the function body but the function itself is unique) - files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h'] + files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h'] return files_todo def get_source_files(paths: dict) -> dict: - """ + """ Build dictionaries of all files per source directory. Parameters: @@ -67,7 +67,7 @@ def get_source_files(paths: dict) -> dict: def get_template_paths(source: list) -> dict: - """ + """ Build dictionary of the absolute paths to directory with templates per source. Parameters: @@ -86,7 +86,7 @@ def get_template_paths(source: list) -> dict: def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> None: - """ + """ Populate files containing basic read/write/has functions. Parameters: @@ -107,6 +107,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N 'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single', 'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single', 'group_num_h5_dtype', 'group_num_py_dtype', + 'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim', + 'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf', + 'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32', + 'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32', 'group_dset', 'group_num', 'group_str', 'group'] for item in detailed_source.keys(): @@ -133,9 +137,9 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N elif 'uncommented by the generator for dimensioning' in line: # only uncomment and write the line if `num` is in the name if 'dim' in detailed_source[item]['trex_json_int_type']: - templine = line.replace('//', '') + templine = line.replace('//', '') f_out.write(templine) - # general case of recursive replacement of inline triggers + # general case of recursive replacement of inline triggers else: populated_line = recursive_replace_line(line, triggers, detailed_source[item]) f_out.write(populated_line) @@ -144,8 +148,8 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N def recursive_replace_line (input_line: str, triggers: list, source: dict) -> str: - """ - Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`. + """ + Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`. Parameters: input_line (str) : input line @@ -154,10 +158,10 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st Returns: output_line (str) : processed (replaced) line - """ + """ is_triggered = False output_line = input_line - + if '$' in input_line: for case in triggers: test_case = f'${case}$' @@ -175,21 +179,22 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st else: print(output_line) raise ValueError('Recursion went wrong, not all cases considered') - + return output_line -def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None: - """ +def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None: + """ Iteratively populate files with unique functions that contain templated variables. Parameters: filename (str) : template file to be populated paths (dict) : dictionary of paths per source directory - groups (dict) : dictionary of groups - datasets (dict) : dictionary of datasets with substitution details - numbers (dict) : dictionary of numbers with substitution details - strings (dict) : dictionary of strings with substitution details + detailed_all(dict) : dictionary with substitution details with the following keys: + 'groups' : dictionary of groups with substitution details + 'datasets' : dictionary of datasets with substitution details + 'numbers' : dictionary of numbers with substitution details + 'strings' : dictionary of strings with substitution details Returns: None @@ -200,7 +205,7 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: templ_path = get_template_path(filename, paths) filename_out = join('populated',f'pop_{filename}') # Note: it is important that special conditions like add_trigger above will be checked before standard triggers -# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed +# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed # from the template and the special condition will never be met. with open(join(templ_path,filename), 'r') as f_in : with open(join(templ_path,filename_out), 'a') as f_out : @@ -209,29 +214,29 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: if id == 0: # special case for proper error handling when deallocting text groups error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n' - populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler) + populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler) f_out.write(populated_line) elif id == 1: - populated_line = iterative_replace_line(line, triggers[id], datasets, None) + populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None) f_out.write(populated_line) elif id == 2: - populated_line = iterative_replace_line(line, triggers[id], numbers, None) + populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None) f_out.write(populated_line) elif id == 3: - populated_line = iterative_replace_line(line, triggers[id], strings, None) + populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None) f_out.write(populated_line) elif id == 4: - populated_line = iterative_replace_line(line, triggers[id], groups, None) + populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None) f_out.write(populated_line) else: f_out.write(line) - + f_out.write("\n") def iterative_replace_line (input_line: str, case: str, source: dict, add_line: str) -> str: - """ - Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`. + """ + Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`. Parameters: input_line (str) : input line @@ -241,7 +246,7 @@ def iterative_replace_line (input_line: str, case: str, source: dict, add_line: Returns: output_block (str) : processed (replaced) block of text - """ + """ output_block = "" for item in source.keys(): templine1 = input_line.replace(case.upper(), item.upper()) @@ -270,12 +275,12 @@ def check_triggers (input_line: str, triggers: list) -> int: if trig in input_line or trig.upper() in input_line: out_id = id return out_id - + return out_id def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detailed_dset: dict, detailed_numbers: dict, detailed_strings: dict) -> None: - """ + """ Special population for group-related functions in the TEXT back end. Parameters: @@ -292,8 +297,8 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai fname_new = join('populated',f'pop_{fname}') templ_path = get_template_path(fname, paths) - triggers = ['group_dset_dtype', 'group_dset_std_dtype_out', 'group_dset_std_dtype_in', - 'group_num_dtype_double', 'group_num_std_dtype_out', 'group_num_std_dtype_in', + triggers = ['group_dset_dtype', 'group_dset_format_printf', 'group_dset_format_scanf', + 'group_num_dtype_double', 'group_num_format_printf', 'group_num_format_scanf', 'group_dset', 'group_num', 'group_str', 'group'] for group in group_dict.keys(): @@ -316,16 +321,16 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai elif 'START REPEAT GROUP_NUM' in line or 'START REPEAT GROUP_ATTR_STR' in line: subloop_num = True continue - + if 'END REPEAT GROUP_DSET' in line: for dset in detailed_dset.keys(): - if group != detailed_dset[dset]['group']: + if group != detailed_dset[dset]['group']: continue - if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['dtype'] != 'char*'): + if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['group_dset_dtype'] != 'char*'): continue - if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['dtype'] == 'char*'): + if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['group_dset_dtype'] == 'char*'): continue dset_allocated.append(dset) @@ -351,7 +356,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai elif 'END REPEAT GROUP_NUM' in line: for dim in detailed_numbers.keys(): - if group != detailed_numbers[dim]['group']: + if group != detailed_numbers[dim]['group']: continue save_body = loop_body @@ -364,7 +369,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai elif 'END REPEAT GROUP_ATTR_STR' in line: for str in detailed_strings.keys(): - if group != detailed_strings[str]['group']: + if group != detailed_strings[str]['group']: continue str_allocated.append(str) @@ -390,22 +395,22 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai continue if not subloop_num and not subloop_dset: - # NORMAL CASE WITHOUT SUBLOOPS + # NORMAL CASE WITHOUT SUBLOOPS if '$group_dset' in line: for dset in detailed_dset.keys(): - if group != detailed_dset[dset]['group']: + if group != detailed_dset[dset]['group']: continue populated_line = recursive_replace_line(line, triggers, detailed_dset[dset]) f_out.write(populated_line) elif '$group_str' in line: for str in detailed_strings.keys(): - if group != detailed_strings[str]['group']: + if group != detailed_strings[str]['group']: continue populated_line = recursive_replace_line(line, triggers, detailed_strings[str]) f_out.write(populated_line) elif '$group_num$' in line: for dim in detailed_numbers.keys(): - if group != detailed_numbers[dim]['group']: + if group != detailed_numbers[dim]['group']: continue populated_line = recursive_replace_line(line, triggers, detailed_numbers[dim]) f_out.write(populated_line) @@ -421,7 +426,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai def get_template_path (filename: str, path_dict: dict) -> str: - """ + """ Returns the absolute path to the directory with indicated `filename` template. Parameters: @@ -435,12 +440,12 @@ def get_template_path (filename: str, path_dict: dict) -> str: if dir_type in filename: path = path_dict[dir_type] return path - + raise ValueError('Filename should contain one of the keywords') def get_group_dict (configuration: dict) -> dict: - """ + """ Returns the dictionary of all groups. Parameters: @@ -456,10 +461,126 @@ def get_group_dict (configuration: dict) -> dict: return group_dict +def get_dtype_dict (dtype: str, target: str, rank = None, int_len_printf = None) -> dict: + """ + Returns the dictionary of dtype-related templated variables set for a given `dtype`. + Keys are names of templated variables, values are strings to be used by the generator. + + Parameters: + dtype (str) : dtype corresponding to the trex.json (i.e. int/dim/float/float sparse/str) + target (str) : `num` or `dset` + rank (int) : [optional] value of n in n-index (sparse) dset; needed to build the printf/scanf format string + int_len_printf(dict): [optional] + keys: precision (e.g. 32 for int32_t) + values: lengths reserved for one index when printing n-index (sparse) dset (e.g. 10 for int32_t) + + Returns: + dtype_dict (dict) : dictionary dtype-related substitutions + """ + if not target in ['num', 'dset']: + raise Exception('Only num or dset target can be set.') + if 'sparse' in dtype: + if rank is None or int_len_printf is None: + raise Exception("Both rank and int_len_printf arguments has to be provided to build the dtype_dict for sparse data.") + if rank is not None and rank <= 1: + raise Exception('Rank of sparse quantity cannot be lower than 2.') + if int_len_printf is not None and not isinstance(int_len_printf, dict): + raise Exception('int_len_printf has to be a dictionary of lengths for different precisions.') + + dtype_dict = {} + # set up the key-value pairs dependending on the dtype + if dtype == 'float': + dtype_dict.update({ + 'default_prec' : '64', + f'group_{target}_dtype' : 'double', + f'group_{target}_h5_dtype' : 'native_double', + f'group_{target}_f_dtype_default' : 'real(8)', + f'group_{target}_f_dtype_double' : 'real(8)', + f'group_{target}_f_dtype_single' : 'real(4)', + f'group_{target}_dtype_default' : 'double', + f'group_{target}_dtype_double' : 'double', + f'group_{target}_dtype_single' : 'float', + f'group_{target}_format_printf' : '24.16e', + f'group_{target}_format_scanf' : 'lf', + f'group_{target}_py_dtype' : 'float' + }) + elif dtype in ['int', 'dim', 'index']: + dtype_dict.update({ + 'default_prec' : '32', + f'group_{target}_dtype' : 'int64_t', + f'group_{target}_h5_dtype' : 'native_int64', + f'group_{target}_f_dtype_default' : 'integer(4)', + f'group_{target}_f_dtype_double' : 'integer(8)', + f'group_{target}_f_dtype_single' : 'integer(4)', + f'group_{target}_dtype_default' : 'int32_t', + f'group_{target}_dtype_double' : 'int64_t', + f'group_{target}_dtype_single' : 'int32_t', + f'group_{target}_format_printf' : '" PRId64 "', + f'group_{target}_format_scanf' : '" SCNd64 "', + f'group_{target}_py_dtype' : 'int' + }) + elif dtype == 'str': + dtype_dict.update({ + 'default_prec' : '', + f'group_{target}_dtype' : 'char*', + f'group_{target}_h5_dtype' : '', + f'group_{target}_f_dtype_default': '', + f'group_{target}_f_dtype_double' : '', + f'group_{target}_f_dtype_single' : '', + f'group_{target}_dtype_default' : 'char*', + f'group_{target}_dtype_double' : '', + f'group_{target}_dtype_single' : '', + f'group_{target}_format_printf' : 's', + f'group_{target}_format_scanf' : 's', + f'group_{target}_py_dtype' : 'str' + }) + elif 'sparse' in dtype: + # build format string for n-index sparse quantity + item_printf_8 = f'%{int_len_printf[8]}" PRIu8 " ' + item_printf_16 = f'%{int_len_printf[16]}" PRIu16 " ' + item_printf_32 = f'%{int_len_printf[32]}" PRId32 " ' + item_scanf = '%" SCNd32 " ' + group_dset_format_printf_8 = '"' + group_dset_format_printf_16 = '"' + group_dset_format_printf_32 = '"' + group_dset_format_scanf = '' + for i in range(rank): + group_dset_format_printf_8 += item_printf_8 + group_dset_format_printf_16 += item_printf_16 + group_dset_format_printf_32 += item_printf_32 + group_dset_format_scanf += item_scanf + # append the format string for float values + group_dset_format_printf_8 += '%24.16e" ' + group_dset_format_printf_16 += '%24.16e" ' + group_dset_format_printf_32 += '%24.16e" ' + group_dset_format_scanf += '%lf' + + # set up the dictionary for sparse + dtype_dict.update({ + 'default_prec' : '', + f'group_{target}_dtype' : 'double', + f'group_{target}_h5_dtype' : '', + f'group_{target}_f_dtype_default': '', + f'group_{target}_f_dtype_double' : '', + f'group_{target}_f_dtype_single' : '', + f'group_{target}_dtype_default' : '', + f'group_{target}_dtype_double' : '', + f'group_{target}_dtype_single' : '', + f'sparse_format_printf_8' : group_dset_format_printf_8, + f'sparse_format_printf_16' : group_dset_format_printf_16, + f'sparse_format_printf_32' : group_dset_format_printf_32, + f'group_{target}_format_scanf' : group_dset_format_scanf, + f'group_{target}_py_dtype' : '' + }) + + return dtype_dict + + + def get_detailed_num_dict (configuration: dict) -> dict: - """ + """ Returns the dictionary of all `num`-suffixed variables. - Keys are names, values are subdictionaries containing corresponding group and group_num names. + Keys are names, values are subdictionaries containing corresponding group and group_num names. Parameters: configuration (dict) : configuration from `trex.json` @@ -472,40 +593,17 @@ def get_detailed_num_dict (configuration: dict) -> dict: for k2,v2 in v1.items(): if len(v2[1]) == 0: tmp_num = f'{k1}_{k2}' - if 'str' not in v2[0]: + if not 'str' in v2[0]: tmp_dict = {} tmp_dict['group'] = k1 tmp_dict['group_num'] = tmp_num num_dict[tmp_num] = tmp_dict - # TODO the arguments below are almost the same as for group_dset (except for trex_json_int_type) and can be exported from somewhere - if v2[0] == 'float': - tmp_dict['datatype'] = 'double' - tmp_dict['group_num_h5_dtype'] = 'native_double' - tmp_dict['group_num_f_dtype_default']= 'real(8)' - tmp_dict['group_num_f_dtype_double'] = 'real(8)' - tmp_dict['group_num_f_dtype_single'] = 'real(4)' - tmp_dict['group_num_dtype_default']= 'double' - tmp_dict['group_num_dtype_double'] = 'double' - tmp_dict['group_num_dtype_single'] = 'float' - tmp_dict['default_prec'] = '64' - tmp_dict['group_num_std_dtype_out'] = '24.16e' - tmp_dict['group_num_std_dtype_in'] = 'lf' - tmp_dict['group_num_py_dtype'] = 'float' - elif v2[0] in ['int', 'dim']: - tmp_dict['datatype'] = 'int64_t' - tmp_dict['group_num_h5_dtype'] = 'native_int64' - tmp_dict['group_num_f_dtype_default']= 'integer(4)' - tmp_dict['group_num_f_dtype_double'] = 'integer(8)' - tmp_dict['group_num_f_dtype_single'] = 'integer(4)' - tmp_dict['group_num_dtype_default']= 'int32_t' - tmp_dict['group_num_dtype_double'] = 'int64_t' - tmp_dict['group_num_dtype_single'] = 'int32_t' - tmp_dict['default_prec'] = '32' - tmp_dict['group_num_std_dtype_out'] = '" PRId64 "' - tmp_dict['group_num_std_dtype_in'] = '" SCNd64 "' - tmp_dict['group_num_py_dtype'] = 'int' + tmp_dict.update(get_dtype_dict(v2[0], 'num')) + if v2[0] in ['int', 'dim']: tmp_dict['trex_json_int_type'] = v2[0] + else: + tmp_dict['trex_json_int_type'] = '' return num_dict @@ -536,8 +634,8 @@ def get_detailed_str_dict (configuration: dict) -> dict: def get_dset_dict (configuration: dict) -> dict: - """ - Returns the dictionary of datasets. + """ + Returns the dictionary of datasets. Keys are names, values are lists containing datatype, list of dimensions and group name Parameters: @@ -559,8 +657,8 @@ def get_dset_dict (configuration: dict) -> dict: def split_dset_dict_detailed (datasets: dict) -> tuple: - """ - Returns the detailed dictionary of datasets. + """ + Returns the detailed dictionary of datasets. Keys are names, values are subdictionaries containing substitutes for templated variables Parameters: @@ -571,106 +669,106 @@ def split_dset_dict_detailed (datasets: dict) -> tuple: """ dset_numeric_dict = {} dset_string_dict = {} + dset_sparse_dict = {} for k,v in datasets.items(): # create a temp dictionary tmp_dict = {} - # specify details required to replace templated variables later - if v[0] == 'float': - datatype = 'double' - group_dset_h5_dtype = 'native_double' - group_dset_f_dtype_default= 'real(8)' - group_dset_f_dtype_double = 'real(8)' - group_dset_f_dtype_single = 'real(4)' - group_dset_dtype_default= 'double' - group_dset_dtype_double = 'double' - group_dset_dtype_single = 'float' - default_prec = '64' - group_dset_std_dtype_out = '24.16e' - group_dset_std_dtype_in = 'lf' - group_dset_py_dtype = 'float' - elif v[0] in ['int', 'index']: - datatype = 'int64_t' - group_dset_h5_dtype = 'native_int64' - group_dset_f_dtype_default= 'integer(4)' - group_dset_f_dtype_double = 'integer(8)' - group_dset_f_dtype_single = 'integer(4)' - group_dset_dtype_default= 'int32_t' - group_dset_dtype_double = 'int64_t' - group_dset_dtype_single = 'int32_t' - default_prec = '32' - group_dset_std_dtype_out = '" PRId64 "' - group_dset_std_dtype_in = '" SCNd64 "' - group_dset_py_dtype = 'int' - elif v[0] == 'str': - datatype = 'char*' - group_dset_h5_dtype = '' - group_dset_f_dtype_default = '' - group_dset_f_dtype_double = '' - group_dset_f_dtype_single = '' - group_dset_dtype_default = 'char*' - group_dset_dtype_double = '' - group_dset_dtype_single = '' - default_prec = '' - group_dset_std_dtype_out = 's' - group_dset_std_dtype_in = 's' - group_dset_py_dtype = 'str' - - # add the dset name for templates + rank = len(v[1]) + datatype = v[0] + + # define whether the dset is sparse + is_sparse = False + int_len_printf = {} + if 'sparse' in datatype: + is_sparse = True + int_len_printf[32] = 10 + int_len_printf[16] = 5 + int_len_printf[8] = 3 + + # get the dtype-related substitutions required to replace templated variables later + if not is_sparse: + dtype_dict = get_dtype_dict(datatype, 'dset') + else: + dtype_dict = get_dtype_dict(datatype, 'dset', rank, int_len_printf) + + tmp_dict.update(dtype_dict) + + # set the group_dset key to the full name of the dset tmp_dict['group_dset'] = k # add flag to detect index types - if 'index' == v[0]: + if 'index' in datatype: tmp_dict['is_index'] = 'file->one_based' else: tmp_dict['is_index'] = 'false' - # add the datatypes for templates - tmp_dict['dtype'] = datatype - tmp_dict['group_dset_dtype'] = datatype - tmp_dict['group_dset_h5_dtype'] = group_dset_h5_dtype - tmp_dict['group_dset_f_dtype_default'] = group_dset_f_dtype_default - tmp_dict['group_dset_f_dtype_double'] = group_dset_f_dtype_double - tmp_dict['group_dset_f_dtype_single'] = group_dset_f_dtype_single - tmp_dict['group_dset_dtype_default'] = group_dset_dtype_default - tmp_dict['group_dset_dtype_double'] = group_dset_dtype_double - tmp_dict['group_dset_dtype_single'] = group_dset_dtype_single - tmp_dict['default_prec'] = default_prec - tmp_dict['group_dset_std_dtype_in'] = group_dset_std_dtype_in - tmp_dict['group_dset_std_dtype_out'] = group_dset_std_dtype_out - tmp_dict['group_dset_py_dtype'] = group_dset_py_dtype + # add the rank - tmp_dict['rank'] = len(v[1]) - tmp_dict['group_dset_rank'] = str(tmp_dict['rank']) + tmp_dict['rank'] = rank + tmp_dict['group_dset_rank'] = str(rank) # add the list of dimensions tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]] # build a list of dimensions to be inserted in the dims array initialization, e.g. {ao_num, ao_num} dim_list = tmp_dict['dims'][0] - if tmp_dict['rank'] > 1: - for i in range(1, tmp_dict['rank']): + if rank > 1: + for i in range(1, rank): dim_toadd = tmp_dict['dims'][i] dim_list += f', {dim_toadd}' - + tmp_dict['group_dset_dim_list'] = dim_list - if tmp_dict['rank'] == 0: + if rank == 0: dim_f_list = "" else: dim_f_list = "(*)" tmp_dict['group_dset_f_dims'] = dim_f_list + if is_sparse: + # store the max possible dim of the sparse dset (e.g. mo_num) + tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0] + # build printf/scanf sequence and compute line length for n-index sparse quantity + index_printf = f'*(index_sparse + {str(rank)}*i' + index_scanf = f'index_sparse + {str(rank)}*i' + # one index item consumes up to index_length characters (int32_len_printf for int32 + 1 for space) + group_dset_sparse_indices_printf = index_printf + ')' + group_dset_sparse_indices_scanf = index_scanf + sparse_line_length_32 = int_len_printf[32] + 1 + sparse_line_length_16 = int_len_printf[16] + 1 + sparse_line_length_8 = int_len_printf[8] + 1 + # loop from 1 because we already have stored one index + for index_count in range(1,rank): + group_dset_sparse_indices_printf += f', {index_printf} + {index_count})' + group_dset_sparse_indices_scanf += f', {index_scanf} + {index_count}' + sparse_line_length_32 += int_len_printf[32] + 1 + sparse_line_length_16 += int_len_printf[16] + 1 + sparse_line_length_8 += int_len_printf[8] + 1 + + # add 24 chars occupied by the floating point value of sparse dataset + 1 char for "\n" + sparse_line_length_32 += 24 + 1 + sparse_line_length_16 += 24 + 1 + sparse_line_length_8 += 24 + 1 + + tmp_dict['sparse_line_length_32'] = str(sparse_line_length_32) + tmp_dict['sparse_line_length_16'] = str(sparse_line_length_16) + tmp_dict['sparse_line_length_8'] = str(sparse_line_length_8) + tmp_dict['group_dset_sparse_indices_printf'] = group_dset_sparse_indices_printf + tmp_dict['group_dset_sparse_indices_scanf'] = group_dset_sparse_indices_scanf + # add group name as a key-value pair to the dset dict tmp_dict['group'] = v[2] # split datasets in numeric- and string- based - if (datatype == 'char*'): + if 'str' in datatype: dset_string_dict[k] = tmp_dict + elif is_sparse: + dset_sparse_dict[k] = tmp_dict else: dset_numeric_dict[k] = tmp_dict - return (dset_numeric_dict, dset_string_dict) + return (dset_numeric_dict, dset_string_dict, dset_sparse_dict) def check_dim_consistency(num: dict, dset: dict) -> None: - """ - Consistency check to make sure that each dimensioning variable exists as a num attribute of some group. + """ + Consistency check to make sure that each dimensioning variable exists as a num attribute of some group. Parameters: num (dict) : dictionary of numerical attributes diff --git a/trex.org b/trex.org index b056db0..3c57e82 100644 --- a/trex.org +++ b/trex.org @@ -2,32 +2,40 @@ #+STARTUP: latexpreview #+SETUPFILE: docs/theme.setup -This page contains information about the general structure of the -TREXIO library. The source code of the library can be automatically -generated based on the contents of the ~trex.json~ configuration file, -which itself is compiled from different sections (groups) presented below. +This page contains information about the general structure of the +TREXIO library. The source code of the library can be automatically +generated based on the contents of the ~trex.json~ configuration file, +which itself is compiled from different sections (groups) presented +below. -For more information about the automatic generation on the source code -or regarding possible modifications, please contact the TREXIO developers. +For more information about the automatic generation on the source code +or regarding possible modifications, please contact the TREXIO +developers. -All quantities are saved in TREXIO file in atomic units. -The dimensions of the arrays in the tables below are given in -column-major order (as in Fortran), and the ordering of the dimensions -is reversed in the produced ~trex.json~ configuration file as the library is +All quantities are saved in TREXIO file in atomic units. The +dimensions of the arrays in the tables below are given in column-major +order (as in Fortran), and the ordering of the dimensions is reversed +in the produced ~trex.json~ configuration file as the library is written in C. -TREXIO currently supports ~int~, ~float~ and ~str~ types for both single attributes and arrays. -Note, that some attributes might have ~dim~ type (e.g. ~num~ of the ~nucleus~ group). -This type is treated exactly the same as ~int~ with the only difference that ~dim~ variables -cannot be negative or zero. This additional constraint is required because ~dim~ attributes -are used internally to allocate memory and to check array boundaries in the memory-safe API. -Most of the times, the ~dim~ variables contain ~num~ suffix. - +TREXIO currently supports ~int~, ~float~ and ~str~ types for both +single attributes and arrays. Note, that some attributes might have +~dim~ type (e.g. ~num~ of the ~nucleus~ group). This type is treated +exactly the same as ~int~ with the only difference that ~dim~ +variables cannot be negative. This additional constraint is required +because ~dim~ attributes are used internally to allocate memory and to +check array boundaries in the memory-safe API. Most of the times, the +~dim~ variables contain the ~num~ suffix. In Fortran, the arrays are 1-based and in most other languages the arrays are 0-based. Hence, we introduce the ~index~ type which is an 1-based ~int~ in the Fortran interface and 0-based otherwise. +For sparse data structures such as electron replusion integrals, +the data can be too large to fit in memory and the data needs to be +fetched using multiple function calls to perform I/O on buffers. + + #+begin_src python :tangle trex.json :exports none { #+end_src @@ -78,14 +86,14 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an #+CALL: json(data=electron, title="electron") #+RESULTS: - :RESULTS: + :results: #+begin_src python :tangle trex.json "electron": { - "up_num" : [ "int", [] ] - , "dn_num" : [ "int", [] ] + "up_num" : [ "int", [] ] + , "dn_num" : [ "int", [] ] } , #+end_src - :END: + :end: * Nucleus (nucleus group) @@ -100,20 +108,22 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an | ~coord~ | ~float~ | ~(3,nucleus.num)~ | Coordinates of the atoms | | ~label~ | ~str~ | ~(nucleus.num)~ | Atom labels | | ~point_group~ | ~str~ | | Symmetry point group | + | ~repulsion~ | ~float~ | | Nuclear repulsion energy | #+CALL: json(data=nucleus, title="nucleus") #+RESULTS: - :RESULTS: + :results: #+begin_src python :tangle trex.json "nucleus": { - "num" : [ "dim" , [] ] - , "charge" : [ "float", [ "nucleus.num" ] ] - , "coord" : [ "float", [ "nucleus.num", "3" ] ] - , "label" : [ "str" , [ "nucleus.num" ] ] - , "point_group" : [ "str" , [] ] + "num" : [ "dim" , [] ] + , "charge" : [ "float", [ "nucleus.num" ] ] + , "coord" : [ "float", [ "nucleus.num", "3" ] ] + , "label" : [ "str" , [ "nucleus.num" ] ] + , "point_group" : [ "str" , [] ] + , "repulsion" : [ "float", [] ] } , #+end_src - :END: + :end: * Effective core potentials (ecp group) @@ -617,15 +627,18 @@ prim_factor = :end: * TODO Slater determinants -* TODO Reduced density matrices (rdm group) +* Reduced density matrices (rdm group) #+NAME: rdm - | Variable | Type | Dimensions | Description | - |------------+----------------+------------------------------------+-------------| - | ~one_e~ | ~float~ | ~(mo.num, mo.num)~ | | - | ~one_e_up~ | ~float~ | ~(mo.num, mo.num)~ | | - | ~one_e_dn~ | ~float~ | ~(mo.num, mo.num)~ | | - | ~two_e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | | + | Variable | Type | Dimensions | Description | + |-----------+----------------+------------------------------------+-----------------------------------------------------------------------| + | ~1e~ | ~float~ | ~(mo.num, mo.num)~ | One body density matrix | + | ~1e_up~ | ~float~ | ~(mo.num, mo.num)~ | \uparrow-spin component of the one body density matrix | + | ~1e_dn~ | ~float~ | ~(mo.num, mo.num)~ | \downarrow-spin component of the one body density matrix | + | ~2e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | Two-body reduced density matrix (spin trace) | + | ~2e_upup~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\uparrow component of the two-body reduced density matrix | + | ~2e_dndn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \downarrow\downarrow component of the two-body reduced density matrix | + | ~2e_updn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\downarrow component of the two-body reduced density matrix | #+CALL: json(data=rdm, title="rdm", last=1) @@ -633,10 +646,13 @@ prim_factor = :results: #+begin_src python :tangle trex.json "rdm": { - "one_e" : [ "float" , [ "mo.num", "mo.num" ] ] - , "one_e_up" : [ "float" , [ "mo.num", "mo.num" ] ] - , "one_e_dn" : [ "float" , [ "mo.num", "mo.num" ] ] - , "two_e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ] + "1e" : [ "float" , [ "mo.num", "mo.num" ] ] + , "1e_up" : [ "float" , [ "mo.num", "mo.num" ] ] + , "1e_dn" : [ "float" , [ "mo.num", "mo.num" ] ] + , "2e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ] + , "2e_upup" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ] + , "2e_dndn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ] + , "2e_updn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ] } #+end_src :end: