Merge pull request #70 from TREX-CoE/add-sparse-datasets

- Add sparse datasets - Modularize generator_tools.py
2024-11-03 20:54:07 +01:00 · 2021-12-17 17:13:22 +01:00 · 2021-12-17 17:13:22 +01:00 · 8ca74ffef1
commit 8ca74ffef1
parent 6705f199b8 31ccd27a0a
18 changed files with 2349 additions and 783 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,6 +11,8 @@ m4/ltsugar.m4
 m4/ltversion.m4
 m4/lt~obsolete.m4
 autom4te.cache/
+build-config/
+ar-lib
 compile
 config.guess
 config.log
@ -38,5 +40,3 @@ test-suite.log
 *.h5
 trexio-*.tar.gz
 trex.json
-
-
--- a/Makefile.am
+++ b/Makefile.am
@ -90,6 +90,7 @@ TESTS_C = \
  tests/io_num_text \
  tests/io_dset_float_text \
  tests/io_dset_int_text \
+  tests/io_dset_sparse_text \
  tests/io_safe_dset_float_text \
  tests/io_str_text \
  tests/io_dset_str_text \
@ -102,6 +103,7 @@ TESTS_C += \
  tests/io_num_hdf5 \
  tests/io_dset_float_hdf5 \
  tests/io_dset_int_hdf5 \
+  tests/io_dset_sparse_hdf5 \
  tests/io_safe_dset_float_hdf5 \
  tests/io_str_hdf5 \
  tests/io_dset_str_hdf5 \
@ -117,8 +119,8 @@ check_PROGRAMS = $(TESTS)
 # specify common LDADD options for all tests
 LDADD = src/libtrexio.la

-
 test_trexio_f = $(srcdir)/tests/trexio_f.f90
+CLEANFILES += $(test_trexio_f)

 $(test_trexio_f): $(trexio_f)
 	cp $(trexio_f) $(test_trexio_f)
@ -126,7 +128,6 @@ $(test_trexio_f): $(trexio_f)
 trexio.mod: tests/trexio_f.o

 tests_test_f_SOURCES = $(test_trexio_f) tests/test_f.f90
-tests_test_f_LDFLAGS = -no-install

 clean-local:
 	-rm -rf -- *.dir/ *.h5 __pycache__/
@ -134,7 +135,7 @@ clean-local:
 # =============== DOCUMENTATION =============== #

 HTML_TANGLED = docs/index.html            \
-               docs/Sparse.html           \
+               docs/examples.html         \
               docs/templator_hdf5.html   \
               docs/trex.html             \
               docs/README.html           \
@ -179,11 +180,13 @@ BUILT_SOURCES += $(SOURCES) $(trexio_f) $(test_trexio_f)

 all: .git_hash

+GENERATOR_FILES = $(srcdir)/tools/generator.py \
+		  $(srcdir)/tools/generator_tools.py

 $(SOURCES): $(trexio_f)
 src/trexio.c: $(trexio_h)

-$(trexio_f): $(ORG_FILES)
+$(trexio_f): $(ORG_FILES) $(GENERATOR_FILES)
 	cd $(srcdir)/tools && ./build_trexio.sh

 $(htmlizer): $(ORG_FILES) $(srcdir)/src/README.org
@ -227,7 +230,7 @@ $(pytrexio_py): $(pytrexio_c)

 # Build Python module and C wrapper code for TREXIO using SWIG
 # [?] swig -python -threads pytrexio.i ----> Add thread support for all the interface
-$(pytrexio_c): $(ORG_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
+$(pytrexio_c): $(ORG_FILES) $(GENERATOR_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
 	cp $(trexio_h) src/
 	cd src/ && \
 		$(SWIG) -python -py3 -o pytrexio_wrap.c pytrexio.i
@ -248,4 +251,3 @@ CLEANFILES += $(pytrexio_c) \
 .PHONY: cppcheck python-test python-install python-sdist check-numpy FORCE

 endif
-
--- a/Sparse.org
+++ b/Sparse.org
@ -1,22 +0,0 @@
-See templator_front.org
-
-* Text back end
-  As the size of the dataset should be extensible, the simplest
-  solution is to use one file for each sparse data set, and store a
-  the name of this file in the group.
-  Each integral can be a line in the file:
-  i  j  k  l  x
-  which can be read with "%10ld %10ld %10ld %10ld %24.16e".
-  The offset can be used with ~fseek(69L*offset, SEEK_SET)~
-  
-* HDF5 Back end
-
-  We need to declare the number of rows of the dataset as
- ~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
-  chunks should absolutely not be larger than 1MB.
-
-  To extend the storage, see :
-  https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
-  (figure 17)
-
-  If the offset+num > nmax, we need to extend the dataset.
--- a/examples.org
+++ b/examples.org
@ -0,0 +1,262 @@
+#+TITLE: Examples
+#+STARTUP: latexpreview
+#+SETUPFILE: docs/theme.setup
+
+  
+* Accessing sparse quantities
+** Fortran
+  :PROPERTIES:
+  :header-args:    :tangle  print_energy.f90
+  :END:
+   
+   #+begin_src f90
+program print_energy
+  use trexio
+  implicit none
+
+  character*(128)  :: filename   ! Name of the input file
+  integer          :: rc         ! Return code for error checking
+  integer(8)       :: f          ! TREXIO file handle
+  character*(128)  :: err_msg    ! Error message
+   #+end_src
+
+   This program computes the energy as:
+
+   \[
+   E = E_{\text{NN}} + \sum_{ij} D_{ij}\, \langle i | h | j \rangle\,
+   +\, \frac{1}{2} \sum_{ijkl} \Gamma_{ijkl}\, \langle i j | k l
+   \rangle\; \textrm{ with } \; 0 < i,j,k,l \le n
+   \]
+
+   One needs to read from the TREXIO file:
+
+   - $n$ :: The number of molecular orbitals
+   - $E_{\text{NN}}$ :: The nuclear repulsion energy 
+   - $D_{ij}$ :: The one-body reduced density matrix 
+   - $\langle i |h| j \rangle$ :: The one-electron Hamiltonian integrals 
+   - $\Gamma_{ijkl}$ :: The two-body reduced density matrix 
+   - $\langle i j | k l \rangle$ :: The electron repulsion integrals
+
+   #+begin_src f90
+  integer                       :: n
+  double precision              :: E, E_nn
+  double precision, allocatable :: D(:,:), h0(:,:)
+  double precision, allocatable :: G(:,:,:,:), W(:,:,:,:)
+   #+end_src
+
+*** Declare Temporary variables
+
+   #+begin_src f90
+  integer                       :: i, j, k, l, m
+  integer(8), parameter         :: BUFSIZE = 100000_8
+  integer(8)                    :: offset, icount, size_max
+  integer                       :: buffer_index(4,BUFSIZE)
+  double precision              :: buffer_values(BUFSIZE)
+
+  double precision, external    :: ddot   ! BLAS dot product
+   #+end_src
+
+*** Obtain the name of the TREXIO file from the command line, and open it for reading
+
+   #+begin_src f90
+  call getarg(1, filename)
+
+  f = trexio_open (filename, 'r', TREXIO_HDF5, rc)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error opening TREXIO file: '//trim(err_msg)
+     stop
+  end if
+   #+end_src
+
+*** Read the nuclear repulsion energy
+
+   #+begin_src f90
+  rc = trexio_read_nucleus_repulsion(f, E_nn)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error reading nuclear repulsion: '//trim(err_msg)
+     stop
+  end if
+   #+end_src
+
+*** Read the number of molecular orbitals
+
+    #+begin_src f90
+  rc = trexio_read_mo_num(f, n)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error reading number of MOs: '//trim(err_msg)
+     stop
+  end if
+    #+end_src
+
+*** Allocate memory
+
+    #+begin_src f90
+  allocate( D(n,n), h0(n,n) )
+  allocate( G(n,n,n,n), W(n,n,n,n) )
+  G(:,:,:,:) = 0.d0
+  W(:,:,:,:) = 0.d0
+    #+end_src
+
+*** Read one-electron quantities
+    
+    #+begin_src f90
+  rc = trexio_has_mo_1e_int_core_hamiltonian(f)
+  if (rc /= TREXIO_SUCCESS) then
+     stop 'No core hamiltonian in file'
+  end if
+  
+  rc = trexio_read_mo_1e_int_core_hamiltonian(f, h0)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error reading core Hamiltonian: '//trim(err_msg)
+     stop
+  end if
+  
+  
+  rc = trexio_has_rdm_1e(f)
+  if (rc /= TREXIO_SUCCESS) then
+     stop 'No 1e RDM in file'
+  end if
+  
+  rc = trexio_read_rdm_1e(f, D)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error reading one-body RDM: '//trim(err_msg)
+     stop
+  end if
+    #+end_src
+  
+*** Read two-electron quantities
+    
+    Reading is done with OpenMP. Each thread reads its own buffer, and
+    the buffers are then processed in parallel.
+
+    Reading the file requires a lock, so it is done in a critical
+    section. The ~offset~ variable is shared, and it is incremented in
+    the critical section. For each read, the function returns in
+ ~icount~ the number of read integrals, so this variable needs also
+    to be protected in the critical section when modified.
+    
+**** Electron repulsion integrals
+
+     #+begin_src f90
+  rc = trexio_has_mo_2e_int_eri(f)
+  if (rc /= TREXIO_SUCCESS) then
+     stop 'No electron repulsion integrals in file'
+  end if
+
+  rc = trexio_read_mo_2e_int_eri_size (f, size_max)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error reading number of ERIs: '//trim(err_msg)
+     stop
+  end if
+
+  offset = 0_8
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
+  !$OMP   buffer_index, buffer_values, m)
+  icount = BUFSIZE
+  do while (icount == BUFSIZE)
+    !$OMP CRITICAL
+    if (offset < size_max) then
+      rc = trexio_read_mo_2e_int_eri(f, offset, icount, buffer_index, buffer_values)
+      offset = offset + icount
+    else
+      icount = 0
+    end if
+    !$OMP END CRITICAL
+    do m=1,icount
+      i = buffer_index(1,m)
+      j = buffer_index(2,m)
+      k = buffer_index(3,m)
+      l = buffer_index(4,m)
+      W(i,j,k,l) = buffer_values(m)
+      W(k,j,i,l) = buffer_values(m)
+      W(i,l,k,j) = buffer_values(m)
+      W(k,l,i,j) = buffer_values(m)
+      W(j,i,l,k) = buffer_values(m)
+      W(j,k,l,i) = buffer_values(m)
+      W(l,i,j,k) = buffer_values(m)
+      W(l,k,j,i) = buffer_values(m)
+    end do
+  end do
+  !$OMP END PARALLEL
+     #+end_src
+
+**** Reduced density matrix
+     
+     #+begin_src f90
+  rc = trexio_has_rdm_2e(f)
+  if (rc /= TREXIO_SUCCESS) then
+     stop 'No two-body density matrix in file'
+  end if
+
+  rc = trexio_read_rdm_2e_size (f, size_max)
+  if (rc /= TREXIO_SUCCESS) then
+     call trexio_string_of_error(rc, err_msg)
+     print *, 'Error reading number of 2-RDM elements: '//trim(err_msg)
+     stop
+  end if
+
+  offset = 0_8
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
+  !$OMP   buffer_index, buffer_values, m)
+  icount = bufsize
+  do while (offset < size_max)
+    !$OMP CRITICAL
+    if (offset < size_max) then
+      rc = trexio_read_rdm_2e(f, offset, icount, buffer_index, buffer_values)
+      offset = offset + icount
+    else
+      icount = 0
+    end if
+    !$OMP END CRITICAL
+    do m=1,icount
+      i = buffer_index(1,m)
+      j = buffer_index(2,m)
+      k = buffer_index(3,m)
+      l = buffer_index(4,m)
+      G(i,j,k,l) = buffer_values(m)
+    end do
+  end do
+  !$OMP END PARALLEL
+
+     #+end_src
+    
+*** Compute the energy
+    
+    As $(n,m)$ 2D arrays are stored in memory as $(\n times m)$ 1D
+    arrays, we could pass the matrices to the ~ddot~ BLAS function to
+    perform the summations in a single call for the 1-electron quantities.
+    Instead, we prefer to interleave the 1-electron (negative) and
+    2-electron (positive) summations to have a better cancellation of
+    numerical errors.
+    
+    Here $n^4$ can be larger than the largest possible 32-bit integer,
+    so it is not safe to pass $n^4$ to the ~ddot~ BLAS
+    function. Hence, we perform $n^2$ loops, using vectors of size $n^2$.
+    
+    #+begin_src f90
+
+  E = 0.d0
+  do l=1,n
+    E = E + ddot( n, D(1,l), 1, h0(1,l), 1 ) 
+    do k=1,n
+       E = E + 0.5d0 * ddot( n*n, G(1,1,k,l), 1, W(1,1,k,l),  1 )
+    end do
+  end do
+  E = E + E_nn
+
+  print *, 'Energy: ', E
+    #+end_src
+
+*** Terminate
+    
+    #+begin_src f90
+  deallocate( D, h0, G, W )
+
+end program
+    #+end_src
--- a/src/templates_front/templator_front.org
+++ b/src/templates_front/templator_front.org
--- a/src/templates_hdf5/build.sh
+++ b/src/templates_hdf5/build.sh
@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c
 cat populated/pop_write_*.c >> trexio_hdf5.c
 cat populated/pop_hrw_*.h >> trexio_hdf5.h

+cat helpers_hdf5.c  >> trexio_hdf5.c
 cat suffix_hdf5.h   >> trexio_hdf5.h
-
--- a/src/templates_hdf5/templator_hdf5.org
+++ b/src/templates_hdf5/templator_hdf5.org
@ -201,15 +201,15 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
 /* Write the dimensioning variables */
  const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$);
  const hid_t dspace = H5Screate(H5S_SCALAR);
-  
-  const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, 
+
+  const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME,
                                 dtype, dspace, H5P_DEFAULT, H5P_DEFAULT);
  if (num_id <= 0) {
    H5Sclose(dspace);
    H5Tclose(dtype);
    return TREXIO_INVALID_ID;
  }
-  
+
  const herr_t status = H5Awrite(num_id, dtype, &(num));
  if (status < 0) {
    H5Aclose(num_id);
@ -217,7 +217,7 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
    H5Tclose(dtype);
    return TREXIO_FAILURE;
  }
-  
+
  H5Sclose(dspace);
  H5Aclose(num_id);
  H5Tclose(dtype);
@ -262,7 +262,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $
 {

  if (file == NULL) return TREXIO_INVALID_ARG_1;
-  if ($group_dset$  == NULL) return TREXIO_INVALID_ARG_2;
+  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

@ -317,7 +317,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$*
 {

  if (file == NULL) return TREXIO_INVALID_ARG_1;
-  if ($group_dset$  == NULL) return TREXIO_INVALID_ARG_2;
+  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

@ -372,6 +372,207 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
 }
   #+end_src

+** Template for HDF5 has/read/write the dataset of sparse data
+
+  Sparse data is stored using extensible datasets of HDF5. Extensibility is required
+  due to the fact that the sparse data will be written in chunks of user-defined size.
+
+   #+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
+trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
+trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
+trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
+trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
+   #+end_src
+
+
+   #+begin_src c :tangle write_dset_sparse_hdf5.c
+trexio_exit_code
+trexio_hdf5_write_$group_dset$ (trexio_t* const file,
+                                const int64_t offset_file,
+                                const int64_t size,
+                                const int64_t size_max,
+                                const int32_t* index_sparse,
+                                const double* value_sparse)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  trexio_hdf5_t* f = (trexio_hdf5_t*) file;
+
+  hid_t index_dtype;
+  void* index_p;
+  uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
+ /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
+  if (size_max < UINT8_MAX) {
+    uint8_t* index = CALLOC(size_ranked, uint8_t);
+    if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+    for (int64_t i=0; i<size_ranked; ++i){
+      index[i] = (uint8_t) index_sparse[i];
+    }
+    index_p = index;
+    index_dtype = H5T_NATIVE_UINT8;
+  } else if (size_max < UINT16_MAX) {
+    uint16_t* index = CALLOC(size_ranked, uint16_t);
+    if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+    for (int64_t i=0; i<size_ranked; ++i){
+      index[i] = (uint16_t) index_sparse[i];
+    }
+    index_p = index;
+    index_dtype = H5T_NATIVE_UINT16;
+  } else {
+    index_p = (int32_t*) index_sparse;
+    index_dtype = H5T_NATIVE_INT32;
+  }
+
+ /* Store float values in double precision */
+  hid_t value_dtype = H5T_NATIVE_DOUBLE;
+ /* Arrays of chunk dims that will be used for chunking the dataset */
+  const hsize_t chunk_i_dims[1] = {size_ranked};
+  const hsize_t chunk_v_dims[1] = {size};
+
+ /* Indices and values are stored as 2 independent datasets in the HDF5 file */
+  char dset_index_name[256] = "\0";
+  char dset_value_name[256] = "\0";
+ /* Build the names of the datasets */
+  strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
+  strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
+
+  trexio_exit_code rc_write = TREXIO_FAILURE;
+ /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
+  if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
+ /* If the file does not exist -> create it and write */
+
+ /* Create chunked dataset with index_dtype datatype and write indices into it */
+    rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
+    if (index_p != index_sparse) FREE(index_p);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+ /* Create chunked dataset with value_dtype datatype and write values into it */
+    rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+  } else {
+ /* If the file exists -> open it and write */
+    hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
+    hsize_t offset_v[1] = {(hsize_t) offset_file};
+
+ /* Create chunked dataset with index_dtype datatype and write indices into it */
+    rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
+    if (index_p != index_sparse) FREE(index_p);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+ /* Create chunked dataset with value_dtype datatype and write values into it */
+    rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+  }
+
+  return TREXIO_SUCCESS;
+}
+   #+end_src
+
+
+   #+begin_src c :tangle read_dset_sparse_hdf5.c
+trexio_exit_code
+trexio_hdf5_read_$group_dset$ (trexio_t* const file,
+                               const int64_t offset_file,
+                               const int64_t size,
+                               const int64_t size_max,
+                               int64_t* const eof_read_size,
+                               int32_t* const index_read,
+                               double* const value_read)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
+
+  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
+
+ /* Indices and values are stored as 2 independent datasets in the HDF5 file */
+  char dset_index_name[256] = "\0";
+  char dset_value_name[256] = "\0";
+ /* Build the names of the datasets */
+  strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
+  strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
+
+  hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
+  hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$};
+
+  hsize_t offset_v[1] = {(hsize_t) offset_file};
+  hsize_t count_v[1] = {(hsize_t) size};
+
+  int is_index = 1, is_value = 0;
+  trexio_exit_code rc_read;
+
+  // attempt to read indices
+  rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read);
+  if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
+  // attempt to read values
+  // when EOF is encountered - the count_v[0] is modified and contains the number of elements being read
+  rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read);
+  if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
+
+  return rc_read;
+}
+   #+end_src
+
+
+   #+begin_src c :tangle read_dset_sparse_hdf5.c
+trexio_exit_code
+trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
+
+  hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT);
+  if (dset_id <= 0) return TREXIO_INVALID_ID;
+
+  hid_t fspace_id = H5Dget_space(dset_id);
+  if (fspace_id < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // allocate space for the dimensions to be read
+  hsize_t ddims[1] = {0};
+
+  // get the rank and dimensions of the dataset
+  int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
+
+  H5Dclose(dset_id);
+  H5Sclose(fspace_id);
+
+  *size_max = (int64_t) ddims[0];
+
+  return TREXIO_SUCCESS;
+}
+   #+end_src
+
+
+   #+begin_src c :tangle has_dset_sparse_hdf5.c
+trexio_exit_code
+trexio_hdf5_has_$group_dset$ (trexio_t* const file)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  trexio_hdf5_t* f = (trexio_hdf5_t*) file;
+
+  herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values");
+ /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
+  if (status == 1){
+    return TREXIO_SUCCESS;
+  } else if (status == 0) {
+    return TREXIO_HAS_NOT;
+  } else {
+    return TREXIO_FAILURE;
+  }
+
+}
+   #+end_src
+
 ** Template for HDF5 has/read/write the dataset of strings

   #+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
@ -403,10 +604,10 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
    return TREXIO_ALLOCATION_FAILED;
  }

-  hid_t dspace = H5Dget_space(dset_id); 
+  hid_t dspace = H5Dget_space(dset_id);
  if (dset_id <= 0) {
    FREE(ddims);
-    H5Dclose(dset_id); 
+    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

@ -442,7 +643,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
  if (rdata == NULL) {
    H5Dclose(dset_id);
    H5Sclose(dspace);
-    H5Tclose(memtype); 
+    H5Tclose(memtype);
    return TREXIO_ALLOCATION_FAILED;
  }

@ -451,7 +652,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
    FREE(rdata);
    H5Dclose(dset_id);
    H5Sclose(dspace);
-    H5Tclose(memtype); 
+    H5Tclose(memtype);
    return TREXIO_FAILURE;
  }

@ -474,11 +675,11 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
    FREE(rdata);
    H5Dclose(dset_id);
    H5Sclose(dspace);
-    H5Tclose(memtype); 
+    H5Tclose(memtype);
    return TREXIO_FAILURE;
  }

-  FREE(rdata); 
+  FREE(rdata);
  H5Dclose(dset_id);
  H5Sclose(dspace);
  H5Tclose(memtype);
@ -509,7 +710,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$,

  if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {

- /* code to create dataset */   
+ /* code to create dataset */
    hid_t filetype = H5Tcopy (H5T_FORTRAN_S1);
    if (filetype <= 0) return TREXIO_INVALID_ID;

@ -577,7 +778,7 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)

 }
   #+end_src
-   
+
 ** Template for HDF5 has/read/write the string attribute

  #+begin_src c :tangle hrw_attr_str_hdf5.h :exports none
@ -655,7 +856,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)

  const hid_t dspace_id = H5Screate(H5S_SCALAR);
  if (dspace_id <= 0) return TREXIO_INVALID_ID;
-  
+
 /* Create the $group_str$ attribute of $group$ group */
  const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id,
                       H5P_DEFAULT, H5P_DEFAULT);
@ -665,7 +866,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
    H5Tclose(dtype_id);
    return TREXIO_INVALID_ID;
  }
-  
+
  status = H5Awrite(str_id, dtype_id, str);
  if (status < 0) {
    H5Aclose(str_id);
@ -673,7 +874,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
    H5Tclose(dtype_id);
    return TREXIO_FAILURE;
  }
-  
+
  H5Aclose(str_id);
  H5Sclose(dspace_id);
  H5Tclose(dtype_id);
@ -703,11 +904,256 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)

 }
    #+end_src
+** Helper functions
+
+  #+begin_src c :tangle helpers_hdf5.c
+trexio_exit_code
+trexio_hdf5_create_write_dset_sparse (const hid_t group_id,
+                                      const char* dset_name,
+                                      const hid_t dtype_id,
+                                      const hsize_t* chunk_dims,
+                                      const void* data_sparse)
+{
+  const int h5_rank = 1;
+  const hsize_t maxdims[1] = {H5S_UNLIMITED};
+
+  hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
+  if (dspace < 0) return TREXIO_INVALID_ID;
+
+  hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
+  if (prop < 0) {
+    H5Sclose(dspace);
+    return TREXIO_INVALID_ID;
+  }
+
+  herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
+  if (status < 0) {
+    H5Sclose(dspace);
+    H5Pclose(prop);
+    return TREXIO_INVALID_ID;
+  }
+
+  hid_t dset_id = H5Dcreate(group_id,
+                            dset_name,
+                            dtype_id,
+                            dspace,
+                            H5P_DEFAULT,
+                            prop,
+                            H5P_DEFAULT);
+  if (dset_id < 0) {
+    H5Sclose(dspace);
+    H5Pclose(prop);
+    return TREXIO_INVALID_ID;
+  }
+
+  status = H5Dwrite(dset_id,
+                    dtype_id,
+                    H5S_ALL, H5S_ALL, H5P_DEFAULT,
+                    data_sparse);
+  H5Sclose(dspace);
+  H5Pclose(prop);
+  H5Dclose(dset_id);
+  if (status < 0) return TREXIO_FAILURE;
+
+  return TREXIO_SUCCESS;
+}
+
+
+trexio_exit_code
+trexio_hdf5_open_write_dset_sparse (const hid_t group_id,
+                                    const char* dset_name,
+                                    const hid_t dtype_id,
+                                    const hsize_t* chunk_dims,
+                                    const hsize_t* offset_file,
+                                    const void* data_sparse)
+{
+  const int h5_rank = 1;
+
+  hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
+  if (dset_id <= 0) return TREXIO_INVALID_ID;
+
+  hid_t fspace = H5Dget_space(dset_id);
+  if (fspace < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // allocate space for the dimensions to be read
+  hsize_t ddims[1] = {0};
+
+  // get the rank and dimensions of the dataset
+  int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
+  ddims[0] += chunk_dims[0];
+
+  // extend the dset size
+  herr_t status  = H5Dset_extent(dset_id, ddims);
+  if (status < 0) {
+    H5Sclose(fspace);
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // close and reopen the file dataspace to take into account the extension
+  H5Sclose(fspace);
+  fspace = H5Dget_space(dset_id);
+  if (fspace < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // select hyperslab to be written using chunk_dims and offset values
+  status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
+  if (status < 0) {
+    H5Sclose(fspace);
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // create memory dataspace to write from
+  hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
+  if (dspace < 0) {
+    H5Sclose(fspace);
+    H5Sclose(dspace);
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  status = H5Dwrite(dset_id,
+                    dtype_id,
+                    dspace, fspace, H5P_DEFAULT,
+                    data_sparse);
+  H5Dclose(dset_id);
+  H5Sclose(dspace);
+  H5Sclose(fspace);
+  if (status < 0) return TREXIO_FAILURE;
+
+  return TREXIO_SUCCESS;
+}
+
+
+trexio_exit_code
+trexio_hdf5_open_read_dset_sparse (const hid_t group_id,
+                                   const char* dset_name,
+                                   const hsize_t* offset_file,
+                                   hsize_t* const size_read,
+                                   int64_t* const eof_read_size,
+                                   const int is_index,
+                                   void* const data_sparse
+                                   )
+{
+  const int h5_rank = 1;
+
+  // get the dataset handle
+  hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
+  if (dset_id <= 0) return TREXIO_INVALID_ID;
+
+  // get the dataspace of the dataset
+  hid_t fspace_id = H5Dget_space(dset_id);
+  if (fspace_id < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  /* get dims of the dset stored in the file to check whether reading with user-provided chunk size
+     will reach end of the dataset (i.e. EOF in TEXT back end)
+   ,*/
+  hsize_t ddims[1] = {0};
+  int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
+  hsize_t max_offset = offset_file[0] + size_read[0];
+
+  int is_EOF = 0;
+  // if max_offset exceed current dim of the dset => EOF
+  if (max_offset > ddims[0]) {
+    is_EOF = 1;
+    // lower the value of count to reduce the number of elements which will be read
+    size_read[0] -= max_offset - ddims[0];
+    // modified the value of eof_read_size passed by address
+    if (eof_read_size != NULL) *eof_read_size = size_read[0];
+  }
+
+  // special case when reading int indices
+  int64_t size_ranked = (int64_t) size_read[0];
+  void* index_p;
+  // read the datatype from the dataset and compare with the pre-defined values
+  hid_t dtype = H5Dget_type(dset_id);
+  if (is_index == 1) {
+    if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
+      uint8_t* index = CALLOC(size_ranked, uint8_t);
+      if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+      index_p = index;
+    } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
+      uint16_t* index = CALLOC(size_ranked, uint16_t);
+      if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+      index_p = index;
+    } else {
+      index_p = data_sparse;
+    }
+  }
+
+  herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL);
+  if (status < 0) {
+    H5Sclose(fspace_id);
+    H5Dclose(dset_id);
+    if (index_p != data_sparse) FREE(index_p);
+    return TREXIO_INVALID_ID;
+  }
+
+  hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL);
+  if (memspace_id < 0) {
+    H5Sclose(fspace_id);
+    H5Dclose(dset_id);
+    if (index_p != data_sparse) FREE(index_p);
+    return TREXIO_INVALID_ID;
+  }
+
+  if (is_index == 1) {
+    status = H5Dread(dset_id,
+                     dtype,
+                     memspace_id, fspace_id, H5P_DEFAULT,
+                     index_p);
+  } else {
+    status = H5Dread(dset_id,
+                     dtype,
+                     memspace_id, fspace_id, H5P_DEFAULT,
+                     data_sparse);
+  }
+
+  H5Sclose(fspace_id);
+  H5Sclose(memspace_id);
+  H5Dclose(dset_id);
+  if (status < 0) {
+    if (index_p != data_sparse) FREE(index_p);
+    return TREXIO_FAILURE;
+  }
+
+  if (is_index == 1) {
+    if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
+      uint8_t* index = (uint8_t*) index_p;
+      for (int64_t i=0; i<size_ranked; ++i){
+        ((int32_t*)data_sparse)[i] = (int32_t) index[i];
+      }
+      FREE(index_p);
+    } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
+      uint16_t* index = (uint16_t*) index_p;
+      for (int64_t i=0; i<size_ranked; ++i){
+        ((int32_t*)data_sparse)[i] = (int32_t) index[i];
+      }
+      FREE(index_p);
+    }
+  }
+
+  if (is_EOF == 1) return TREXIO_END;
+
+  return TREXIO_SUCCESS;
+}
+  #+end_src
+
 * Constant file suffixes (not used by the generator)               :noexport:

  #+begin_src c :tangle suffix_hdf5.h
+trexio_exit_code trexio_hdf5_create_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse);
+trexio_exit_code trexio_hdf5_open_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse);
+trexio_exit_code trexio_hdf5_open_read_dset_sparse (const hid_t group_id, const char* dset_name, const hsize_t* offset_file, hsize_t* const size_read, int64_t* const eof_read_size, const int is_index, void* const data_sparse);

 #endif
  #+end_src
-
-
--- a/src/templates_text/build.sh
+++ b/src/templates_text/build.sh
@ -19,23 +19,26 @@ cat populated/pop_flush_group_text.h >> trexio_text.h

 cat populated/pop_has_dset_data_text.c >> trexio_text.c
 cat populated/pop_has_dset_str_text.c >> trexio_text.c
+cat populated/pop_has_dset_sparse_text.c >> trexio_text.c
 cat populated/pop_has_attr_num_text.c >> trexio_text.c
 cat populated/pop_has_attr_str_text.c >> trexio_text.c
+
 cat populated/pop_read_dset_data_text.c >> trexio_text.c
 cat populated/pop_read_dset_str_text.c >> trexio_text.c
+cat populated/pop_read_dset_sparse_text.c >> trexio_text.c
 cat populated/pop_read_attr_str_text.c >> trexio_text.c
 cat populated/pop_read_attr_num_text.c >> trexio_text.c
+
 cat populated/pop_write_dset_data_text.c >> trexio_text.c
 cat populated/pop_write_dset_str_text.c >> trexio_text.c
+cat populated/pop_write_dset_sparse_text.c >> trexio_text.c
 cat populated/pop_write_attr_str_text.c >> trexio_text.c
 cat populated/pop_write_attr_num_text.c >> trexio_text.c
+
 cat populated/pop_hrw_dset_data_text.h >> trexio_text.h
 cat populated/pop_hrw_dset_str_text.h >> trexio_text.h
+cat populated/pop_hrw_dset_sparse_text.h >> trexio_text.h
 cat populated/pop_hrw_attr_num_text.h >> trexio_text.h
 cat populated/pop_hrw_attr_str_text.h >> trexio_text.h

-cat rdm_text.c >> trexio_text.c
-cat rdm_text.h >> trexio_text.h
-
 cat suffix_text.h   >> trexio_text.h
-
--- a/src/templates_text/templator_text.org
+++ b/src/templates_text/templator_text.org
@ -93,22 +93,10 @@ typedef struct $group$_s {

 ** Template for general structure in text back end

-    #+begin_src c :tangle struct_text_group.h
-typedef struct rdm_s {
-  uint64_t dim_one_e;
-  uint32_t to_flush;
-  uint32_t padding;
-  double*  one_e;
-  char     file_name[TREXIO_MAX_FILENAME_LENGTH];
-  char     two_e_file_name[TREXIO_MAX_FILENAME_LENGTH];
-} rdm_t;
-    #+end_src
-
    #+begin_src c :tangle struct_text_group.h
 typedef struct trexio_text_s {
  trexio_t   parent ;
  $group$_t* $group$;
-  rdm_t*      rdm;
  int        lock_file;
 } trexio_text_t;
    #+end_src
@ -269,9 +257,6 @@ trexio_text_deinit (trexio_t* const file)
 /* Error handling for this call is added by the generator */
  rc = trexio_text_free_$group$( (trexio_text_t*) file);

-  rc = trexio_text_free_rdm( (trexio_text_t*) file);
-  if (rc != TREXIO_SUCCESS) return rc;
-
  return TREXIO_SUCCESS;

 }
@ -411,7 +396,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
        return NULL;
      }

-      rc = fscanf(f, "%$group_num_std_dtype_in$", &($group$->$group_num$));
+      rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$));
      assert(!(rc != 1));
      if (rc != 1) {
        FREE(buffer);
@ -499,7 +484,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
    }

    for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
-      rc = fscanf(f, "%$group_dset_std_dtype_in$", &($group$->$group_dset$[i]));
+      rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i]));
      assert(!(rc != 1));
      if (rc != 1) {
 	FREE(buffer);
@ -535,16 +520,16 @@ trexio_text_read_$group$ (trexio_text_t* const file)
      }

      /* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
-      ,  BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
-      ,*/
+         BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
+       */
      char* tmp_$group_dset$;
      tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);

      for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
        $group$->$group_dset$[i] = tmp_$group_dset$;
        /* conventional fcanf with "%s" only return the string before the first space character
-         ,* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
-         ,* Q: depending on what ? */
+         * to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
+         * Q: depending on what ? */
        rc = fscanf(f, " %1023[^\n]", tmp_$group_dset$);
        assert(!(rc != 1));
        if (rc != 1) {
@ -613,7 +598,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)

  // START REPEAT GROUP_NUM
  fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet);
-  if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_std_dtype_out$ \n", $group$->$group_num$);
+  if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$);
  // END REPEAT GROUP_NUM

  // START REPEAT GROUP_ATTR_STR
@ -627,7 +612,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)

  fprintf(f, "$group_dset$\n");
  for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
-    fprintf(f, "%$group_dset_std_dtype_out$\n", $group$->$group_dset$[i]);
+    fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]);
  }
  // END REPEAT GROUP_DSET_ALL

@ -1016,327 +1001,280 @@ trexio_text_has_$group_str$ (trexio_t* const file)

 }
   #+end_src
-** RDM struct (hard-coded)
-*** Read the complete struct
+** Template for has/read/write the dataset of sparse data

-    #+begin_src c :tangle rdm_text.h
-rdm_t* trexio_text_read_rdm(trexio_text_t* const file);
-    #+end_src
+   Each sparse array is stored in a separate =.txt= file due to the fact that sparse I/O has to be decoupled
+   from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data
+   to prevent memory overflow. Chunks have a given ~int64_t size~
+   (size specifies the number of sparse data items, e.g. integrals).

-    #+begin_src c :tangle rdm_text.c
-rdm_t* trexio_text_read_rdm(trexio_text_t* const file) {
-  if (file  == NULL) return NULL;
+   User provides indices and values of the sparse array as two separate variables.

-  if (file->rdm != NULL) return file->rdm;

- /* Allocate the data structure */
-  rdm_t* rdm = MALLOC(rdm_t);
-  assert (rdm != NULL);
+   #+begin_src c :tangle hrw_dset_sparse_text.h :exports none
+trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file);
+trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
+trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int64_t size_start, const int32_t* index_sparse, const double* value_sparse);
+trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
+   #+end_src

-  rdm->one_e           = NULL;
-  rdm->two_e_file_name[0] = '\0';
-  rdm->to_flush        = 0;

- /* Try to open the file. If the file does not exist, return */
-  const char* rdm_file_name = "/rdm.txt";
+   #+begin_src c :tangle write_dset_sparse_text.c
+trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
+                                                const int64_t offset_file,
+                                                const int64_t size,
+                                                const int64_t size_max,
+                                                const int64_t size_start,
+                                                const int32_t* index_sparse,
+                                                const double* value_sparse)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;

-  strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH);
+ /* Build the name of the file with sparse data*/
+ /* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
+ /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];

-  strncat (rdm->file_name, rdm_file_name,
-	   TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name));
+ /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+ /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+	   TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));

-  if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
-    FREE(rdm);
-    return NULL;
+ /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
+  FILE* f = fopen(file_full_path, "a");
+  if(f == NULL) return TREXIO_FILE_ERROR;
+
+
+  /* Specify the line length in order to offset properly. For example, for 4-index quantities
+     the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
+     CURRENTLY NO OFFSET IS USED WHEN WRITING !
+    ,*/
+  int64_t line_length = 0L;
+  char format_str[256] = "\0";
+
+ /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
+  if (size_max < UINT8_MAX) {
+    line_length = $sparse_line_length_8$; // 41 for 4 indices
+    strncpy(format_str, $sparse_format_printf_8$, 256);
+  } else if (size_max < UINT16_MAX) {
+    line_length = $sparse_line_length_16$; // 49 for 4 indices
+    strncpy(format_str, $sparse_format_printf_16$, 256);
+  } else {
+    line_length = $sparse_line_length_32$; //69 for 4 indices
+    strncpy(format_str, $sparse_format_printf_32$, 256);
  }
- /* If the file exists, read it */
-  FILE* f = fopen(rdm->file_name,"r");
-  if (f != NULL) {
+  strncat(format_str, "\n", 2);

- /* Find size of file to allocate the max size of the string buffer */
-    fseek(f, 0L, SEEK_END);
-    size_t sz = ftell(f);
-    fseek(f, 0L, SEEK_SET);
-    sz = (sz < 1024) ? (1024) : (sz);
-    char* buffer = CALLOC(sz, char);
+  /* Get the starting position of the IO stream to be written in the .size file.
+     This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
+     One can use ftello function which is adapted for large files.
+     For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow.
+   */
+  int64_t io_start_pos = size_start * line_length;

- /* Read the dimensioning variables */
-    int rc;
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    assert (strcmp(buffer, "dim_one_e") == 0);
-
-    rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e));
-    assert (rc == 1);
-
- /* Allocate arrays */
-    rdm->one_e = CALLOC(rdm->dim_one_e, double);
-    assert (rdm->one_e != NULL);
-
- /* Read one_e */
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    assert (strcmp(buffer, "one_e") == 0);
-
-    for (uint64_t i=0 ; i<rdm->dim_one_e; ++i) {
-      rc = fscanf(f, "%lf", &(rdm->one_e[i]));
-      assert (rc == 1);
-    }
-
- /* Read two_e */
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    assert (strcmp(buffer, "two_e_file_name") == 0);
-
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    strncpy(rdm->two_e_file_name, buffer, 1024);
-    if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
-      FREE(buffer);
-      FREE(rdm->one_e);
-      FREE(rdm);
+ /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
+  int rc;
+  for (uint64_t i=0UL; i<size; ++i) {
+    rc = fprintf(f, format_str,
+       $group_dset_sparse_indices_printf$,
+       *(value_sparse + i));
+    if(rc <= 0) {
      fclose(f);
-      return NULL;
-    }
-
-    FREE(buffer);
-    fclose(f);
-    f = NULL;
-  }
-  file->rdm = rdm ;
-  return rdm;
-}
-   #+end_src
-
-*** Flush the complete struct
-
-    #+begin_src c :tangle rdm_text.h
-trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file);
-    #+end_src
-
-    #+begin_src c :tangle rdm_text.c
-trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) {
-  if (file == NULL) return TREXIO_INVALID_ARG_1;
-
-  if (file->parent.mode == 'r') return TREXIO_READONLY;
-
-  rdm_t* const rdm = file->rdm;
-  if (rdm == NULL) return TREXIO_SUCCESS;
-
-  if (rdm->to_flush == 0) return TREXIO_SUCCESS;
-
-  FILE* f = fopen(rdm->file_name,"w");
-  assert (f != NULL);
-
- /* Write the dimensioning variables */
-  fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e);
-
- /* Write arrays */
-  fprintf(f, "one_e\n");
-  for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) {
-    fprintf(f, "%lf\n", rdm->one_e[i]);
-  }
-
-  fprintf(f, "two_e_file_name\n");
-  fprintf(f, "%s\n", rdm->two_e_file_name);
-
-  fclose(f);
-  rdm->to_flush = 0;
-  return TREXIO_SUCCESS;
-}
-   #+end_src
-
-*** Free memory
-
-     Memory is allocated when reading. The followig function frees memory.
-
-    #+begin_src c :tangle rdm_text.h
-trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file);
-    #+end_src
-
-    #+begin_src c :tangle rdm_text.c
-trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) {
-  if (file == NULL) return TREXIO_INVALID_ARG_1;
-
-  if (file->parent.mode != 'r') {
-    trexio_exit_code rc = trexio_text_flush_rdm(file);
-    if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
-  }
-
-  rdm_t* const rdm = file->rdm;
-  if (rdm == NULL) return TREXIO_SUCCESS;
-
-  if (rdm->one_e != NULL) {
-    FREE (rdm->one_e);
-  }
-
-  free (rdm);
-  file->rdm = NULL;
-  return TREXIO_SUCCESS;
-}
-    #+end_src
-
-*** Read/Write the one_e attribute
-
-     The ~one_e~ array is assumed allocated with the appropriate size.
-
-    #+begin_src c :tangle rdm_text.h
-trexio_exit_code
-trexio_text_read_rdm_one_e(trexio_t* const file,
-			   double* const one_e,
-			   const uint64_t dim_one_e);
-
-trexio_exit_code
-trexio_text_write_rdm_one_e(trexio_t* const file,
-			    const double* one_e,
-			    const uint64_t dim_one_e);
-   #+end_src
-
-    #+begin_src c :tangle rdm_text.c
-trexio_exit_code
-trexio_text_read_rdm_one_e(trexio_t* const file,
-			   double* const one_e,
-			   const uint64_t dim_one_e)
-{
-  if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (one_e == NULL) return TREXIO_INVALID_ARG_2;
-
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3;
-
-  for (uint64_t i=0 ; i<dim_one_e ; ++i) {
-    one_e[i] = rdm->one_e[i];
-  }
-
-  return TREXIO_SUCCESS;
-}
-
-
-trexio_exit_code
-trexio_text_write_rdm_one_e(trexio_t* const file,
-			    const double* one_e,
-			    const uint64_t dim_one_e)
-{
-  if (file  == NULL)  return TREXIO_INVALID_ARG_1;
-  if (one_e == NULL)  return TREXIO_INVALID_ARG_2;
-  if (file->mode != 'r') return TREXIO_READONLY;
-
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  rdm->dim_one_e = dim_one_e;
-  for (uint64_t i=0 ; i<dim_one_e ; ++i) {
-    rdm->one_e[i] = one_e[i];
-  }
-
-  rdm->to_flush = 1;
-  return TREXIO_SUCCESS;
-}
-     #+end_src
-
-*** Read/Write the two_e attribute
-
- ~two_e~ is a sparse data structure, which can be too large to fit
-     in memory. So we provide functions to read and write it by
-     chunks.
-     In the text back end, the easiest way to do it is to create a
-     file for each sparse float structure.
-
-    #+begin_src c :tangle rdm_text.h
-trexio_exit_code
-trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
-				    const uint64_t offset,
-				    const uint64_t size,
-				    int64_t* const index,
-				    double* const value);
-
-trexio_exit_code
-trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
-				     const uint64_t offset,
-				     const uint64_t size,
-				     const int64_t* index,
-				     const double* value);
-   #+end_src
-
-    #+begin_src c :tangle rdm_text.c
-trexio_exit_code
-trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
-				    const uint64_t offset,
-				    const uint64_t size,
-				    int64_t* const index,
-				    double* const value)
-{
-  if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (index == NULL) return TREXIO_INVALID_ARG_4;
-  if (value == NULL) return TREXIO_INVALID_ARG_5;
-
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  FILE* f = fopen(rdm->two_e_file_name, "r");
-  if (f == NULL) return TREXIO_END;
-
-  const uint64_t line_length = 64L;
-  fseek(f, (long) offset * line_length, SEEK_SET);
-
-  for (uint64_t i=0 ; i<size ; ++i) {
-    int rc = fscanf(f, "%9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %24le\n",
-	   &index[4*i],
-	   &index[4*i+1],
-	   &index[4*i+2],
-	   &index[4*i+3],
-	   &value[i]);
-    if (rc == 5) {
- /* Do nothing */
-    } else if (rc == EOF) {
-      return TREXIO_END;
+      return TREXIO_FAILURE;
    }
  }

-  return TREXIO_SUCCESS;
-}
+ /* Close the TXT file */
+  rc = fclose(f);
+  if (rc != 0) return TREXIO_FILE_ERROR;

+ /* Append .size to the file_full_path in order to write additional info about the written buffer of data */
+  strncat(file_full_path, ".size", 6);

-trexio_exit_code
-trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
-				     const uint64_t offset,
-				     const uint64_t size,
-				     const int64_t* index,
-				     const double* value)
-{
-  if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (index == NULL) return TREXIO_INVALID_ARG_4;
-  if (value == NULL) return TREXIO_INVALID_ARG_5;
-  if (file->mode != 'r') return TREXIO_READONLY;
+ /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
+  FILE *f_wSize = fopen(file_full_path, "a");
+  if (f_wSize == NULL) return TREXIO_FILE_ERROR;

-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  FILE* f = fopen(rdm->two_e_file_name, "w");
-  if (f == NULL) return TREXIO_FAILURE;
-
-  const uint64_t line_length = 64L;
-  fseek(f, (long) offset * line_length, SEEK_SET);
-
-  for (uint64_t i=0 ; i<size ; ++i) {
-    int rc = fprintf(f, "%9" PRId64 " %9" PRId64 " %9" PRId64 " %9" PRId64 " %24le\n",
-	   index[4*i],
-	   index[4*i+1],
-	   index[4*i+2],
-	   index[4*i+3],
-	   value[i]);
-    if (rc != 5) return TREXIO_FAILURE;
+ /* Write the buffer_size */
+  rc = fprintf(f_wSize, "%" PRId64 " %" PRId64 "\n", size, io_start_pos);
+  if (rc <= 0) {
+    fclose(f_wSize);
+    return TREXIO_FAILURE;
  }

+ /* Close the TXT file */
+  rc = fclose(f_wSize);
+  if (rc != 0) return TREXIO_FILE_ERROR;
+
+ /* Exit upon success */
  return TREXIO_SUCCESS;
 }
-     #+end_src
+   #+end_src
+
+
+   #+begin_src c :tangle read_dset_sparse_text.c
+trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
+                                               const int64_t offset_file,
+                                               const int64_t size,
+                                               const int64_t size_max,
+                                               int64_t* const eof_read_size,
+                                               int32_t* const index_sparse,
+                                               double* const value_sparse)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
+
+  /* Build the name of the file with sparse data.
+     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
+   ,*/
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
+ /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
+
+ /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+ /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+	   TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
+
+ /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
+  FILE* f = fopen(file_full_path, "r");
+  if(f == NULL) return TREXIO_FILE_ERROR;
+
+  /* Specify the line length in order to offset properly. For example, for 4-index quantities
+     the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
+   ,*/
+  uint64_t line_length = 0UL;
+ /* Determine the line length depending on the size_max (usually mo_num or ao_num) */
+  if (size_max < UINT8_MAX) {
+    line_length = $sparse_line_length_8$; // 41 for 4 indices
+  } else if (size_max < UINT16_MAX) {
+    line_length = $sparse_line_length_16$; // 49 for 4 indices
+  } else {
+    line_length = $sparse_line_length_32$; //69 for 4 indices
+  }
+
+ /* Offset in the file according to the provided  value of offset_file and optimal line_length */
+  fseek(f, (long) offset_file * line_length, SEEK_SET);
+
+ /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
+  int rc;
+  char buffer[1024];
+  uint64_t count = 0UL;
+  for (uint64_t i=0UL; i<size; ++i) {
+
+      memset(buffer,0,sizeof(buffer));
+
+      if(fgets(buffer, 1023, f) == NULL){
+
+        fclose(f);
+        *eof_read_size = count;
+        return TREXIO_END;
+
+      } else {
+
+        rc = sscanf(buffer, "$group_dset_format_scanf$",
+                    $group_dset_sparse_indices_scanf$,
+                    value_sparse + i);
+        if(rc <= 0) {
+          fclose(f);
+          return TREXIO_FAILURE;
+        }
+        count += 1UL;
+
+      }
+  }
+
+ /* Close the TXT file */
+  rc = fclose(f);
+  if(rc != 0) return TREXIO_FILE_ERROR;
+
+  return TREXIO_SUCCESS;
+}
+   #+end_src
+
+
+   #+begin_src c :tangle read_dset_sparse_text.c
+trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  /* Build the name of the file with sparse data.
+     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
+   */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
+ /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
+
+ /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+ /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+	   TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
+
+ /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
+  FILE* f = fopen(file_full_path, "r");
+  if(f == NULL) return TREXIO_FILE_ERROR;
+
+
+ /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
+  int rc;
+  int64_t size_item, offset_item, size_accum=0L;
+
+ /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
+  while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) {
+ /* Check that summation will not overflow the int64_t value */
+    if (INT64_MAX - size_accum > size_item) {
+      size_accum += size_item;
+    } else {
+      fclose(f);
+      *size_max = -1L;
+      return TREXIO_INT_SIZE_OVERFLOW;
+    }
+  }
+
+ /* Close the TXT file */
+  rc = fclose(f);
+  if(rc != 0) return TREXIO_FILE_ERROR;
+
+ /* Overwrite the value at the input address and return TREXIO_SUCCESS */
+  *size_max = size_accum;
+  return TREXIO_SUCCESS;
+
+}
+   #+end_src
+
+   #+begin_src c :tangle has_dset_sparse_text.c
+trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  /* Build the name of the file with sparse data.
+     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
+   */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
+ /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
+
+ /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+ /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+	   TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
+
+ /* Check the return code of access function to determine whether the file with sparse data exists or not */
+  if (access(file_full_path, F_OK) == 0){
+    return TREXIO_SUCCESS;
+  } else {
+    return TREXIO_HAS_NOT;
+  }
+}
+   #+end_src

 * Constant file suffixes (not used by the generator)               :noexport:

  #+begin_src c :tangle suffix_text.h
 #endif
  #+end_src
-
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1,11 +1,12 @@

-# ================= TESTING ================= 
+# ================= TESTING =================

 # Create a list of tests for TEXT back end.
 set(Tests_text
  open_text
  io_dset_float_text
  io_dset_str_text
+  io_dset_sparse_text
  io_safe_dset_float_text
  io_dset_int_text
  io_num_text
@ -19,6 +20,7 @@ if(ENABLE_HDF5)
    open_hdf5
    io_dset_float_hdf5
    io_dset_str_hdf5
+    io_dset_sparse_hdf5
    io_safe_dset_float_hdf5
    io_dset_int_hdf5
    io_num_hdf5
@ -43,4 +45,3 @@ endforeach()
 add_executable(test_f test_f.f90)
 target_link_libraries(test_f PRIVATE trexio_f)
 add_test(NAME test_f COMMAND $<TARGET_FILE:test_f>)
-
--- a/tests/io_dset_sparse_hdf5.c
+++ b/tests/io_dset_sparse_hdf5.c
@ -0,0 +1,235 @@
+#include "trexio.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define TEST_BACKEND  TREXIO_HDF5
+#define TREXIO_FILE   "test_dset_sparse.h5"
+#define RM_COMMAND    "rm -f -- " TREXIO_FILE
+#define SIZE          100
+#define N_CHUNKS      5
+
+static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
+
+/* Try to write an array of sparse data into the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file in 'write' mode
+  file = trexio_open(file_name, 'w', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  // parameters to be written
+  int32_t* index;
+  double* value;
+
+  index = calloc(4L*SIZE, sizeof(int32_t));
+  value = calloc(SIZE, sizeof(double));
+
+  for(int i=0; i<SIZE; i++){
+    index[4*i]   = 4*i;
+    index[4*i+1] = 4*i+1;
+    index[4*i+2] = 4*i+2;
+    index[4*i+3] = 4*i+3;
+    value[i]     = 3.14 + (double) i;
+  }
+
+  // write mo_num which will be used to determine the optimal size of int indices
+  if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
+    rc = trexio_write_mo_num(file, 1000);
+    assert(rc == TREXIO_SUCCESS);
+  }
+
+  // write dataset chunks of sparse data in the file (including FAKE statements)
+  uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
+  uint64_t offset_f = 0UL;
+  uint64_t offset_d = 0UL;
+  if (offset != 0L) offset_f += offset;
+
+  // write n_chunks times using write_sparse
+  for(int i=0; i<N_CHUNKS; ++i){
+    rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
+    assert(rc == TREXIO_SUCCESS);
+    offset_d += chunk_size;
+    offset_f += chunk_size;
+  }
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+  // free the allocated memeory
+  free(index);
+  free(value);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
+
+/* Try to check the existence of a dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  // first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
+  rc = trexio_has_mo_2e_int_eri_lr(file);
+  assert(rc==TREXIO_HAS_NOT);
+
+  // check that previous call to has_sparse did not create a file/dset
+  rc = trexio_has_mo_2e_int_eri_lr(file);
+  assert(rc==TREXIO_HAS_NOT);
+
+  // now check that previously written mo_2e_int_eri exists
+  rc = trexio_has_mo_2e_int_eri(file);
+  assert(rc==TREXIO_SUCCESS);
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
+
+/* Try to read one chunk of dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+ // define arrays to read into
+  int32_t* index_read;
+  double* value_read;
+  uint64_t size_r = 40L;
+
+  index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
+  value_read = (double*) calloc(size_r,sizeof(double));
+
+  // specify the read parameters, here:
+  // 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
+  int64_t chunk_read = 10L;
+  int64_t offset_file_read = 40L;
+  int offset_data_read = 5;
+  int64_t read_size_check;
+  read_size_check = chunk_read;
+
+  if (offset != 0L) offset_file_read += offset;
+
+  // read one chunk using the aforementioned parameters
+  rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
+  assert(rc == TREXIO_SUCCESS);
+  assert(chunk_read == read_size_check);
+  assert(index_read[0] == 0);
+  assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
+
+  // now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
+  offset_file_read = 97;
+  offset_data_read = 1;
+  int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
+
+  if (offset != 0L) offset_file_read += offset;
+
+  // read one chunk that will reach EOF and return TREXIO_END code
+  rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
+  assert(rc == TREXIO_END);
+  assert(chunk_read == eof_read_size_check);
+  assert(index_read[4*size_r-1] == 0);
+  assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
+  /*
+  for(int i=0; i<size_r; ++i){
+    printf("%d %lf\n", index_read[4*i], value_read[i]);
+  }
+  */
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+  // free the memory
+  free(index_read);
+  free(value_read);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
+
+/* Try to read a size of the dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  // define the variable to read into
+  int64_t size_written;
+
+  // read one chunk using the aforementioned parameters
+  rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
+  assert(rc == TREXIO_SUCCESS);
+  assert(size_written == size_check);
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+int main(){
+
+/*============== Test launcher ================*/
+
+  int rc;
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  // check the first write attempt (SIZE elements written in N_CHUNKS chunks)
+  test_write_dset_sparse    (TREXIO_FILE, TEST_BACKEND, 0);
+  test_has_dset_sparse      (TREXIO_FILE, TEST_BACKEND);
+  test_read_dset_sparse     (TREXIO_FILE, TEST_BACKEND, 0);
+  test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
+
+  // check the second write attempt (SIZE elements written in N_CHUNKS chunks)
+  test_write_dset_sparse    (TREXIO_FILE, TEST_BACKEND, SIZE);
+  test_read_dset_sparse     (TREXIO_FILE, TEST_BACKEND, SIZE);
+  test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
+
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  return 0;
+}
--- a/tests/io_dset_sparse_text.c
+++ b/tests/io_dset_sparse_text.c
@ -0,0 +1,230 @@
+#include "trexio.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define TEST_BACKEND  TREXIO_TEXT
+#define TREXIO_FILE   "test_dset_sparse.dir"
+#define RM_COMMAND    "rm -rf " TREXIO_FILE
+#define SIZE          100
+#define N_CHUNKS      5
+
+static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
+
+/* Try to write an array of sparse data into the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file in 'write' mode
+  file = trexio_open(file_name, 'w', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  // parameters to be written
+  int32_t* index;
+  double* value;
+
+  index = calloc(4L*SIZE, sizeof(int32_t));
+  value = calloc(SIZE, sizeof(double));
+
+  for(int i=0; i<SIZE; i++){
+    index[4*i]   = 4*i;
+    index[4*i+1] = 4*i+1;
+    index[4*i+2] = 4*i+2;
+    index[4*i+3] = 4*i+3;
+    value[i]     = 3.14 + (double) i;
+  }
+
+  // write mo_num which will be used to determine the optimal size of int indices
+  if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
+    rc = trexio_write_mo_num(file, 1000);
+    assert(rc == TREXIO_SUCCESS);
+  }
+
+  // write dataset chunks of sparse data in the file (including FAKE statements)
+  uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
+  uint64_t offset_f = 0UL;
+  uint64_t offset_d = 0UL;
+  if (offset != 0L) offset_f += offset;
+
+  // write n_chunks times using write_sparse
+  for(int i=0; i<N_CHUNKS; ++i){
+    rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
+    assert(rc == TREXIO_SUCCESS);
+    offset_d += chunk_size;
+    offset_f += chunk_size;
+  }
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+  // free the allocated memeory
+  free(index);
+  free(value);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
+
+/* Try to check the existence of a dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  // first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
+  rc = trexio_has_mo_2e_int_eri_lr(file);
+  assert(rc==TREXIO_HAS_NOT);
+
+  // check that previous call to has_sparse did not create a file/dset
+  rc = trexio_has_mo_2e_int_eri_lr(file);
+  assert(rc==TREXIO_HAS_NOT);
+
+  // now check that previously written mo_2e_int_eri exists
+  rc = trexio_has_mo_2e_int_eri(file);
+  assert(rc==TREXIO_SUCCESS);
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
+
+/* Try to read one chunk of dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+ // define arrays to read into
+  int32_t* index_read;
+  double* value_read;
+  uint64_t size_r = 40L;
+
+  index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
+  value_read = (double*) calloc(size_r,sizeof(double));
+
+  // specify the read parameters, here:
+  // 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
+  int64_t chunk_read = 10L;
+  int64_t offset_file_read = 40L;
+  int offset_data_read = 5;
+  int64_t read_size_check;
+  read_size_check = chunk_read;
+
+  if (offset != 0L) offset_file_read += offset;
+
+  // read one chunk using the aforementioned parameters
+  rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
+  assert(rc == TREXIO_SUCCESS);
+  assert(chunk_read == read_size_check);
+  assert(index_read[0] == 0);
+  assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
+
+  // now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
+  offset_file_read = 97L;
+  offset_data_read = 1;
+  int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
+
+  if (offset != 0L) offset_file_read += offset;
+
+  // read one chunk that will reach EOF and return TREXIO_END code
+  rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
+  assert(rc == TREXIO_END);
+  assert(chunk_read == eof_read_size_check);
+  assert(index_read[4*size_r-1] == 0);
+  assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+  // free the memory
+  free(index_read);
+  free(value_read);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
+
+/* Try to read a size of the dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  // define the variable to read into
+  int64_t size_written;
+
+  // read one chunk using the aforementioned parameters
+  rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
+  assert(rc == TREXIO_SUCCESS);
+  assert(size_written == size_check);
+
+  // close current session
+  rc = trexio_close(file);
+  assert (rc == TREXIO_SUCCESS);
+
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+int main(){
+
+/*============== Test launcher ================*/
+
+  int rc;
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  // check the first write attempt (SIZE elements written in N_CHUNKS chunks)
+  test_write_dset_sparse    (TREXIO_FILE, TEST_BACKEND, 0);
+  test_has_dset_sparse      (TREXIO_FILE, TEST_BACKEND);
+  test_read_dset_sparse     (TREXIO_FILE, TEST_BACKEND, 0);
+  test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
+
+  // check the second write attempt (SIZE elements written in N_CHUNKS chunks)
+  test_write_dset_sparse    (TREXIO_FILE, TEST_BACKEND, SIZE);
+  test_read_dset_sparse     (TREXIO_FILE, TEST_BACKEND, SIZE);
+  test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
+
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  return 0;
+}
--- a/tests/io_num_hdf5.c
+++ b/tests/io_num_hdf5.c
@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
  rc = trexio_write_nucleus_num(file, num);
  assert (rc == TREXIO_SUCCESS);

+  rc = trexio_write_nucleus_repulsion(file, 2.14171677);
+  assert (rc == TREXIO_SUCCESS);
+
  // attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
  rc = trexio_write_mo_num(file, 0);
  assert (rc == TREXIO_INVALID_NUM);
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
  rc = trexio_has_nucleus_num(file);
  assert (rc == TREXIO_SUCCESS);

+  rc = trexio_has_nucleus_repulsion(file);
+  assert (rc == TREXIO_SUCCESS);
+
  // check that the num variable does not exist
  rc = trexio_has_mo_num(file);
  assert (rc == TREXIO_HAS_NOT);
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
  // parameters to be read
  int num;
  int cartesian;
+  float repulsion_32;
+  double repulsion_64, d;

 /*================= START OF TEST ==================*/

@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
  assert (rc == TREXIO_SUCCESS);
  assert (num == 12);

+  rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
+  assert (rc == TREXIO_SUCCESS);
+  d = repulsion_32 - 2.14171677;
+  assert( d*d < 1.e-8 );
+
+  rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
+  assert (rc == TREXIO_SUCCESS);
+  d = repulsion_64 - 2.14171677;
+  assert( d*d < 1.e-14 );
+
  // read non-existing numerical attribute from the file
  rc = trexio_read_mo_num(file, &num);
  assert (rc == TREXIO_ATTR_MISSING);
@ -134,5 +152,3 @@ int main(void) {

  return 0;
 }
-
-
--- a/tests/io_num_text.c
+++ b/tests/io_num_text.c
@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
  rc = trexio_write_nucleus_num(file, num);
  assert (rc == TREXIO_SUCCESS);

+  rc = trexio_write_nucleus_repulsion(file, 2.14171677);
+  assert (rc == TREXIO_SUCCESS);
+
  // attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
  rc = trexio_write_mo_num(file, 0);
  assert (rc == TREXIO_INVALID_NUM);
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
  rc = trexio_has_nucleus_num(file);
  assert (rc == TREXIO_SUCCESS);

+  rc = trexio_has_nucleus_repulsion(file);
+  assert (rc == TREXIO_SUCCESS);
+
  // check that the num variable does not exist
  rc = trexio_has_mo_num(file);
  assert (rc == TREXIO_HAS_NOT);
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
  // parameters to be read
  int num;
  int cartesian;
+  float repulsion_32;
+  double repulsion_64, d;

 /*================= START OF TEST ==================*/

@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
  assert (rc == TREXIO_SUCCESS);
  assert (num == 12);

+  rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
+  assert (rc == TREXIO_SUCCESS);
+  d = repulsion_32 - 2.14171677;
+  assert( d*d < 1.e-8 );
+
+  rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
+  assert (rc == TREXIO_SUCCESS);
+  d = repulsion_64 - 2.14171677;
+  assert( d*d < 1.e-14 );
+
  // read non-existing numerical attribute from the file
  rc = trexio_read_mo_num(file, &num);
  assert (rc == TREXIO_ATTR_MISSING);
@ -134,5 +152,3 @@ int main(void) {

  return 0;
 }
-
-
--- a/tests/test_f.f90
+++ b/tests/test_f.f90
@ -2,25 +2,25 @@ program test_trexio
  use trexio
  use, intrinsic :: iso_c_binding
  implicit none
-  
+
  logical :: have_hdf5

-  print *      , "============================================" 
-  print'(a,a)' , "         TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION 
+  print *      , "============================================"
+  print'(a,a)' , "         TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION
  print'(a,i3)', "         TREXIO MAJOR VERSION  : ", TREXIO_VERSION_MAJOR
  print'(a,i3)', "         TREXIO MINOR VERSION  : ", TREXIO_VERSION_MINOR
-  print *      , "============================================" 
+  print *      , "============================================"

-  call system('rm -rf test_write_f.dir')
+  call system('rm -rf -- test_write_f.dir')
  print *, 'call test_write(''test_write_f.dir'', TREXIO_TEXT)'
  call test_write('test_write_f.dir', TREXIO_TEXT)
  print *, 'call test_read(''test_write_f.dir'', TREXIO_TEXT)'
  call test_read('test_write_f.dir', TREXIO_TEXT)
-  call system('rm -rf test_write_f.dir')
+  call system('rm -rf -- test_write_f.dir')

  call test_read_void('test_write_f.dir', TREXIO_TEXT)

-  ! No way to conditionally check whether compilation was done with HDF5 
+  ! No way to conditionally check whether compilation was done with HDF5
  ! So temporarily disable the test for HDF5 back end at the moment
  have_hdf5 = trexio_has_backend(TREXIO_HDF5)
  if (have_hdf5) then
@ -30,7 +30,7 @@ program test_trexio
    print *, 'call test_read(''test_write_f.h5'', TREXIO_HDF5)'
    call test_read('test_write_f.h5', TREXIO_HDF5)
    call system('rm -f -- test_write_f.h5')
-    
+
    call test_read_void('test_write_f.h5', TREXIO_HDF5)
  endif

@ -61,6 +61,22 @@ subroutine test_write(file_name, back_end)
  character(len=:), allocatable :: sym_str
  character(len=:), allocatable :: label(:)

+  ! sparse data
+  integer(4) :: index_sparse_mo_2e_int_eri(4,100)
+  double precision :: value_sparse_mo_2e_int_eri(100)
+
+  integer :: i, n_buffers = 5
+  integer(8) :: buf_size, offset
+  buf_size = 100/n_buffers
+
+  do i = 1, 100
+    index_sparse_mo_2e_int_eri(1,i) = 4*i   - 3
+    index_sparse_mo_2e_int_eri(2,i) = 4*i+1 - 3
+    index_sparse_mo_2e_int_eri(3,i) = 4*i+2 - 3
+    index_sparse_mo_2e_int_eri(4,i) = 4*i+3 - 3
+    value_sparse_mo_2e_int_eri(i) = 3.14 + float(i)
+  enddo
+
  ! parameters to be written
  num = 12
  charge = (/ 6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1.  /)
@ -96,6 +112,9 @@ subroutine test_write(file_name, back_end)
  rc = trexio_has_nucleus_charge(trex_file)
  call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 2')

+  rc = trexio_has_mo_2e_int_eri(trex_file)
+  call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 3')
+
  rc = trexio_write_nucleus_num(trex_file, num)
  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE NUM')

@ -106,8 +125,8 @@ subroutine test_write(file_name, back_end)
  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE COORD')

  rc = trexio_write_nucleus_label(trex_file, label, 5)
-  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
  deallocate(label)
+  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')

  rc = trexio_write_nucleus_point_group(trex_file, sym_str, 32)
  deallocate(sym_str)
@ -119,6 +138,20 @@ subroutine test_write(file_name, back_end)
  rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index)
  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX')

+  ! write mo_num which will be used to determine the optimal size of int indices
+  if (trexio_has_mo_num(trex_file) == TREXIO_HAS_NOT) then
+    rc = trexio_write_mo_num(trex_file, 1000)
+    call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE MO NUM')
+  endif
+
+  offset = 0
+  do i = 1,n_buffers
+    rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, &
+	                            index_sparse_mo_2e_int_eri(1,offset+1), &
+				    value_sparse_mo_2e_int_eri(offset+1))
+    call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE SPARSE')
+    offset = offset + buf_size
+  enddo

  rc = trexio_has_nucleus_num(trex_file)
  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 1')
@ -126,6 +159,9 @@ subroutine test_write(file_name, back_end)
  rc = trexio_has_nucleus_coord(trex_file)
  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 2')

+  rc = trexio_has_mo_2e_int_eri(trex_file)
+  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 3')
+
  rc = trexio_close(trex_file)
  call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS CLOSE')

@ -160,11 +196,25 @@ subroutine test_read(file_name, back_end)

  character(len=32) :: sym_str

+  ! sparse data
+  integer(4) :: index_sparse_mo_2e_int_eri(4,20)
+  double precision :: value_sparse_mo_2e_int_eri(20)
+  integer(8) :: read_buf_size = 10
+  integer(8) :: read_buf_size_save = 10
+  integer(8) :: offset_read = 40
+  integer(8) :: offset_data_read = 5
+  integer(8) :: offset_eof  = 97
+  integer(8) :: offset_data_eof = 1
+  integer(8) :: size_toread = 0
+
  character*(128) :: str

  num = 12
  basis_shell_num = 24

+  index_sparse_mo_2e_int_eri = 0
+  value_sparse_mo_2e_int_eri = 0.0d0
+
 ! ================= START OF TEST ===================== !

  trex_file = trexio_open(file_name, 'r', back_end, rc)
@ -199,7 +249,7 @@ subroutine test_read(file_name, back_end)
    call exit(-1)
  endif

-  
+
  rc = trexio_read_nucleus_label(trex_file, label, 2)
  call trexio_assert(rc, TREXIO_SUCCESS)
  if (trim(label(2)) == 'Na') then
@ -230,6 +280,52 @@ subroutine test_read(file_name, back_end)
  endif


+  rc = trexio_read_mo_2e_int_eri(trex_file, offset_read, read_buf_size, &
+	                         index_sparse_mo_2e_int_eri(1, offset_data_read + 1), &
+			         value_sparse_mo_2e_int_eri(offset_data_read + 1))
+  !do  i = 1,20
+  !  write(*,*) index_sparse_mo_2e_int_eri(1,i)
+  !enddo
+  call trexio_assert(rc, TREXIO_SUCCESS)
+  if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
+      index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1) then
+    write(*,*) 'SUCCESS READ SPARSE DATA'
+  else
+    print *, 'FAILURE SPARSE DATA CHECK'
+    call exit(-1)
+  endif
+
+
+  ! attempt to read reaching EOF: should return TREXIO_END and
+  ! NOT increment the existing values in the buffer (only upd with what has been read)
+  rc = trexio_read_mo_2e_int_eri(trex_file, offset_eof, read_buf_size, &
+	                         index_sparse_mo_2e_int_eri(1, offset_data_eof + 1), &
+			         value_sparse_mo_2e_int_eri(offset_data_eof + 1))
+  !do  i = 1,20
+  !  write(*,*) index_sparse_mo_2e_int_eri(1,i)
+  !enddo
+  call trexio_assert(rc, TREXIO_END)
+  if (read_buf_size == 3 .and. &
+      index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
+      index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1 .and. &
+      index_sparse_mo_2e_int_eri(1, offset_data_eof + 1) == offset_eof*4 + 1) then
+    write(*,*) 'SUCCESS READ SPARSE DATA EOF'
+    read_buf_size = read_buf_size_save
+  else
+    print *, 'FAILURE SPARSE DATA EOF CHECK'
+    call exit(-1)
+  endif
+
+  rc = trexio_read_mo_2e_int_eri_size(trex_file, size_toread)
+  call trexio_assert(rc, TREXIO_SUCCESS)
+  if (size_toread == 100) then
+    write(*,*) 'SUCCESS READ SPARSE SIZE'
+  else
+    print *, 'FAILURE SPARSE SIZE CHECK'
+    call exit(-1)
+  endif
+
+
  rc = trexio_close(trex_file)
  call trexio_assert(rc, TREXIO_SUCCESS)

@ -254,6 +350,9 @@ subroutine test_read_void(file_name, back_end)
 ! ================= START OF TEST ===================== !

  trex_file = trexio_open(file_name, 'r', back_end, rc)
+  if (rc /= TREXIO_OPEN_ERROR) then
+    rc = trexio_close(trex_file)
+  endif
  call trexio_assert(rc, TREXIO_OPEN_ERROR)

  call trexio_string_of_error(rc, str)
@ -262,4 +361,3 @@ subroutine test_read_void(file_name, back_end)
 ! ================= END OF TEST ===================== !

 end subroutine test_read_void
-
--- a/tools/generator.py
+++ b/tools/generator.py
@ -6,29 +6,31 @@ config_file = 'trex.json'
 trex_config = read_json(config_file)
 # --------------------------------------------------------------------------- #

-# -------------------------------- [WIP] ------------------------------------ #
-# for now remove rdm from config because it functions are hardcoded
-del trex_config['rdm']
-# --------------------------------------------------------------------------- #
-
 # -------------------- GET ATTRIBUTES FROM THE CONFIGURATION ---------------- #
 group_dict = get_group_dict(trex_config)
 detailed_nums = get_detailed_num_dict(trex_config)
 detailed_strs = get_detailed_str_dict(trex_config)
 # helper dictionaries that contain names of groups, nums or dsets as keys
 dsets = get_dset_dict(trex_config)
-detailed_dsets_nostr, detailed_dsets_str = split_dset_dict_detailed(dsets)
+detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
 detailed_dsets = detailed_dsets_nostr.copy()
 detailed_dsets.update(detailed_dsets_str)
+# build a big dictionary with all pre-processed data
+detailed_all = {
+    'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse),
+    'groups'   : group_dict,
+    'numbers'  : detailed_nums,
+    'strings'  : detailed_strs
+}
 # consistency check for dimensioning variables
 check_dim_consistency(detailed_nums, dsets)
 # --------------------------------------------------------------------------- #

 # -------------------- GET TEMPLATED FILES TO BE POPULATED ------------------ #
 source = ['front', 'text', 'hdf5']
-# build helper dictionaries with paths per source directory 
+# build helper dictionaries with paths per source directory
 template_paths = get_template_paths(source)
-# build helper dictionaries with source files per source directory 
+# build helper dictionaries with source files per source directory
 source_files = get_source_files(template_paths)
 # build helper dictionaries with templated files
 files_todo = get_files_todo(source_files)
@ -38,7 +40,7 @@ files_todo = get_files_todo(source_files)

 # populate files with iterative scheme, i.e. for unique functions
 for fname in files_todo['auxiliary']:
-    iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
+    iterative_populate_file(fname, template_paths, detailed_all)

 # populate has/read/write_num functions with recursive scheme
 for fname in files_todo['attr_num']:
@ -48,14 +50,18 @@ for fname in files_todo['attr_num']:
 for fname in files_todo['attr_str']:
    recursive_populate_file(fname, template_paths, detailed_strs)

-# populate has/read/write_dset (numerical) functions with recursive scheme 
+# populate has/read/write_dset (numerical) functions with recursive scheme
 for fname in files_todo['dset_data']:
    recursive_populate_file(fname, template_paths, detailed_dsets_nostr)

-# populate has/read/write_dset (strings) functions with recursive scheme 
+# populate has/read/write_dset (strings) functions with recursive scheme
 for fname in files_todo['dset_str']:
    recursive_populate_file(fname, template_paths, detailed_dsets_str)

+# populate has/read/write_dset (sparse) functions with recursive scheme
+for fname in files_todo['dset_sparse']:
+    recursive_populate_file(fname, template_paths, detailed_dsets_sparse)
+
 # populate group-related functions with mixed (iterative+recursive) scheme [text backend]
 for fname in files_todo['group']:
    special_populate_text_group(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
--- a/tools/generator_tools.py
+++ b/tools/generator_tools.py
@ -4,7 +4,7 @@ from json import load as json_load


 def read_json(fname: str) -> dict:
-    """ 
+    """
    Read configuration from the input `fname` JSON file.

            Parameters:
@ -23,7 +23,7 @@ def read_json(fname: str) -> dict:


 def get_files_todo(source_files: dict) -> dict:
-    """ 
+    """
    Build dictionaries of templated files per objective.

            Parameters:
@ -36,21 +36,21 @@ def get_files_todo(source_files: dict) -> dict:
    for key in source_files.keys():
        all_files += source_files[key]

-    files_todo = {}       
+    files_todo = {}
    #files_todo['all'] = list(filter(lambda x: 'read' in x or 'write' in x or 'has' in x or 'hrw' in x or 'flush' in x or 'free' in x, all_files))
    files_todo['all'] = [f for f in all_files if 'read' in f or 'write' in f or 'has' in f or 'flush' in f or 'free' in f or 'hrw' in f]
-    for key in ['dset_data', 'dset_str', 'attr_num', 'attr_str', 'group']:
+    for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group']:
        files_todo[key] = list(filter(lambda x: key in x, files_todo['all']))

    files_todo['group'].append('struct_text_group_dset.h')
    # files that correspond to iterative population (e.g. the code is repeated within the function body but the function itself is unique)
-    files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h'] 
+    files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h']

    return files_todo


 def get_source_files(paths: dict) -> dict:
-    """ 
+    """
    Build dictionaries of all files per source directory.

            Parameters:
@ -67,7 +67,7 @@ def get_source_files(paths: dict) -> dict:


 def get_template_paths(source: list) -> dict:
-    """ 
+    """
    Build dictionary of the absolute paths to directory with templates per source.

            Parameters:
@ -86,7 +86,7 @@ def get_template_paths(source: list) -> dict:


 def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> None:
-    """ 
+    """
    Populate files containing basic read/write/has functions.

            Parameters:
@ -107,6 +107,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
                'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
                'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
                'group_num_h5_dtype', 'group_num_py_dtype',
+                'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
+                'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
+                'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
+                'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
                'group_dset', 'group_num', 'group_str', 'group']

    for item in detailed_source.keys():
@ -133,9 +137,9 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
                    elif 'uncommented by the generator for dimensioning' in line:
                        # only uncomment and write the line if `num` is in the name
                        if 'dim' in detailed_source[item]['trex_json_int_type']:
-                            templine = line.replace('//', '') 
+                            templine = line.replace('//', '')
                            f_out.write(templine)
-                    # general case of recursive replacement of inline triggers 
+                    # general case of recursive replacement of inline triggers
                    else:
                        populated_line = recursive_replace_line(line, triggers, detailed_source[item])
                        f_out.write(populated_line)
@ -144,8 +148,8 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N


 def recursive_replace_line (input_line: str, triggers: list, source: dict) -> str:
-    """ 
-    Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`. 
+    """
+    Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`.

            Parameters:
                    input_line (str)    : input line
@ -154,10 +158,10 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st

            Returns:
                    output_line (str)   : processed (replaced) line
-    """    
+    """
    is_triggered = False
    output_line = input_line
-    
+
    if '$' in input_line:
        for case in triggers:
            test_case = f'${case}$'
@ -175,21 +179,22 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
        else:
            print(output_line)
            raise ValueError('Recursion went wrong, not all cases considered')
-        
+
    return output_line


-def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
-    """ 
+def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
+    """
    Iteratively populate files with unique functions that contain templated variables.

            Parameters:
                    filename (str)          : template file to be populated
                    paths (dict)            : dictionary of paths per source directory
-                    groups (dict)           : dictionary of groups
-                    datasets (dict)         : dictionary of datasets with substitution details
-                    numbers (dict)          : dictionary of numbers with substitution details
-                    strings (dict)          : dictionary of strings with substitution details
+                    detailed_all(dict)      : dictionary with substitution details with the following keys:
+                        'groups'            : dictionary of groups with substitution details
+                        'datasets'          : dictionary of datasets with substitution details
+                        'numbers'           : dictionary of numbers with substitution details
+                        'strings'           : dictionary of strings with substitution details

            Returns:
                    None
@ -200,7 +205,7 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
    templ_path = get_template_path(filename, paths)
    filename_out = join('populated',f'pop_{filename}')
 # Note: it is important that special conditions like add_trigger above will be checked before standard triggers
-# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed 
+# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed
 # from the template and the special condition will never be met.
    with open(join(templ_path,filename), 'r') as f_in :
        with open(join(templ_path,filename_out), 'a') as f_out :
@ -209,29 +214,29 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
                if id == 0:
                    # special case for proper error handling when deallocting text groups
                    error_handler = '  if (rc != TREXIO_SUCCESS) return rc;\n'
-                    populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
+                    populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
                    f_out.write(populated_line)
                elif id == 1:
-                    populated_line = iterative_replace_line(line, triggers[id], datasets, None)
+                    populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
                    f_out.write(populated_line)
                elif id == 2:
-                    populated_line = iterative_replace_line(line, triggers[id], numbers, None)
+                    populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
                    f_out.write(populated_line)
                elif id == 3:
-                    populated_line = iterative_replace_line(line, triggers[id], strings, None)
+                    populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
                    f_out.write(populated_line)
                elif id == 4:
-                    populated_line = iterative_replace_line(line, triggers[id], groups, None)
+                    populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
                    f_out.write(populated_line)
                else:
                    f_out.write(line)
-                    
+
            f_out.write("\n")


 def iterative_replace_line (input_line: str, case: str, source: dict, add_line: str) -> str:
-    """ 
-    Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`. 
+    """
+    Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`.

            Parameters:
                    input_line (str)    : input line
@ -241,7 +246,7 @@ def iterative_replace_line (input_line: str, case: str, source: dict, add_line:

            Returns:
                    output_block (str)   : processed (replaced) block of text
-    """    
+    """
    output_block = ""
    for item in source.keys():
        templine1 = input_line.replace(case.upper(), item.upper())
@ -270,12 +275,12 @@ def check_triggers (input_line: str, triggers: list) -> int:
        if trig in input_line or trig.upper() in input_line:
            out_id = id
            return out_id
-        
+
    return out_id


 def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detailed_dset: dict, detailed_numbers: dict, detailed_strings: dict) -> None:
-    """ 
+    """
    Special population for group-related functions in the TEXT back end.

            Parameters:
@ -292,8 +297,8 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
    fname_new = join('populated',f'pop_{fname}')
    templ_path = get_template_path(fname, paths)

-    triggers = ['group_dset_dtype', 'group_dset_std_dtype_out', 'group_dset_std_dtype_in',
-                'group_num_dtype_double', 'group_num_std_dtype_out', 'group_num_std_dtype_in',
+    triggers = ['group_dset_dtype', 'group_dset_format_printf', 'group_dset_format_scanf',
+                'group_num_dtype_double', 'group_num_format_printf', 'group_num_format_scanf',
                'group_dset', 'group_num', 'group_str', 'group']

    for group in group_dict.keys():
@ -316,16 +321,16 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
                    elif 'START REPEAT GROUP_NUM' in line or 'START REPEAT GROUP_ATTR_STR' in line:
                        subloop_num = True
                        continue
-                    
+
                    if 'END REPEAT GROUP_DSET' in line:

                        for dset in detailed_dset.keys():
-                            if group != detailed_dset[dset]['group']: 
+                            if group != detailed_dset[dset]['group']:
                                continue

-                            if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['dtype'] != 'char*'):
+                            if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['group_dset_dtype'] != 'char*'):
                                continue
-                            if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['dtype'] == 'char*'):
+                            if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['group_dset_dtype'] == 'char*'):
                                continue

                            dset_allocated.append(dset)
@ -351,7 +356,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai

                    elif 'END REPEAT GROUP_NUM' in line:
                        for dim in detailed_numbers.keys():
-                            if group != detailed_numbers[dim]['group']: 
+                            if group != detailed_numbers[dim]['group']:
                                continue

                            save_body = loop_body
@ -364,7 +369,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai

                    elif 'END REPEAT GROUP_ATTR_STR' in line:
                        for str in detailed_strings.keys():
-                            if group != detailed_strings[str]['group']: 
+                            if group != detailed_strings[str]['group']:
                                continue

                            str_allocated.append(str)
@ -390,22 +395,22 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
                        continue

                    if not subloop_num and not subloop_dset:
-                        # NORMAL CASE WITHOUT SUBLOOPS 
+                        # NORMAL CASE WITHOUT SUBLOOPS
                        if '$group_dset' in line:
                            for dset in detailed_dset.keys():
-                                if group != detailed_dset[dset]['group']: 
+                                if group != detailed_dset[dset]['group']:
                                    continue
                                populated_line = recursive_replace_line(line, triggers, detailed_dset[dset])
                                f_out.write(populated_line)
                        elif '$group_str' in line:
                            for str in detailed_strings.keys():
-                                if group != detailed_strings[str]['group']: 
+                                if group != detailed_strings[str]['group']:
                                    continue
                                populated_line = recursive_replace_line(line, triggers, detailed_strings[str])
                                f_out.write(populated_line)
                        elif '$group_num$' in line:
                            for dim in detailed_numbers.keys():
-                                if group != detailed_numbers[dim]['group']: 
+                                if group != detailed_numbers[dim]['group']:
                                    continue
                                populated_line = recursive_replace_line(line, triggers, detailed_numbers[dim])
                                f_out.write(populated_line)
@ -421,7 +426,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai


 def get_template_path (filename: str, path_dict: dict) -> str:
-    """ 
+    """
    Returns the absolute path to the directory with indicated `filename` template.

            Parameters:
@ -435,12 +440,12 @@ def get_template_path (filename: str, path_dict: dict) -> str:
        if dir_type in filename:
            path = path_dict[dir_type]
            return path
-        
+
    raise ValueError('Filename should contain one of the keywords')


 def get_group_dict (configuration: dict) -> dict:
-    """ 
+    """
    Returns the dictionary of all groups.

            Parameters:
@ -456,10 +461,126 @@ def get_group_dict (configuration: dict) -> dict:
    return group_dict


+def get_dtype_dict (dtype: str, target: str, rank = None, int_len_printf = None) -> dict:
+    """
+    Returns the dictionary of dtype-related templated variables set for a given `dtype`.
+    Keys are names of templated variables, values are strings to be used by the generator.
+
+            Parameters:
+                    dtype (str)         : dtype corresponding to the trex.json (i.e. int/dim/float/float sparse/str)
+                    target (str)        : `num` or `dset`
+                    rank (int)          : [optional] value of n in n-index (sparse) dset; needed to build the printf/scanf format string
+                    int_len_printf(dict): [optional]
+                                          keys: precision (e.g. 32 for int32_t)
+                                          values: lengths reserved for one index when printing n-index (sparse) dset (e.g. 10 for int32_t)
+
+            Returns:
+                    dtype_dict (dict) : dictionary dtype-related substitutions
+    """
+    if not target in ['num', 'dset']:
+        raise Exception('Only num or dset target can be set.')
+    if 'sparse' in dtype:
+        if rank is None or int_len_printf is None:
+            raise Exception("Both rank and int_len_printf arguments has to be provided to build the dtype_dict for sparse data.")
+    if rank is not None and rank <= 1:
+        raise Exception('Rank of sparse quantity cannot be lower than 2.')
+    if int_len_printf is not None and not isinstance(int_len_printf, dict):
+        raise Exception('int_len_printf has to be a dictionary of lengths for different precisions.')
+
+    dtype_dict = {}
+    # set up the key-value pairs dependending on the dtype
+    if dtype == 'float':
+        dtype_dict.update({
+            'default_prec'                    : '64',
+            f'group_{target}_dtype'           : 'double',
+            f'group_{target}_h5_dtype'        : 'native_double',
+            f'group_{target}_f_dtype_default' : 'real(8)',
+            f'group_{target}_f_dtype_double'  : 'real(8)',
+            f'group_{target}_f_dtype_single'  : 'real(4)',
+            f'group_{target}_dtype_default'   : 'double',
+            f'group_{target}_dtype_double'    : 'double',
+            f'group_{target}_dtype_single'    : 'float',
+            f'group_{target}_format_printf'   : '24.16e',
+            f'group_{target}_format_scanf'    : 'lf',
+            f'group_{target}_py_dtype'        : 'float'
+        })
+    elif dtype in ['int', 'dim', 'index']:
+        dtype_dict.update({
+            'default_prec'                    : '32',
+            f'group_{target}_dtype'           : 'int64_t',
+            f'group_{target}_h5_dtype'        : 'native_int64',
+            f'group_{target}_f_dtype_default' : 'integer(4)',
+            f'group_{target}_f_dtype_double'  : 'integer(8)',
+            f'group_{target}_f_dtype_single'  : 'integer(4)',
+            f'group_{target}_dtype_default'   : 'int32_t',
+            f'group_{target}_dtype_double'    : 'int64_t',
+            f'group_{target}_dtype_single'    : 'int32_t',
+            f'group_{target}_format_printf'   : '" PRId64 "',
+            f'group_{target}_format_scanf'    : '" SCNd64 "',
+            f'group_{target}_py_dtype'        : 'int'
+        })
+    elif dtype == 'str':
+        dtype_dict.update({
+            'default_prec'                   : '',
+            f'group_{target}_dtype'          : 'char*',
+            f'group_{target}_h5_dtype'       : '',
+            f'group_{target}_f_dtype_default': '',
+            f'group_{target}_f_dtype_double' : '',
+            f'group_{target}_f_dtype_single' : '',
+            f'group_{target}_dtype_default'  : 'char*',
+            f'group_{target}_dtype_double'   : '',
+            f'group_{target}_dtype_single'   : '',
+            f'group_{target}_format_printf'  : 's',
+            f'group_{target}_format_scanf'   : 's',
+            f'group_{target}_py_dtype'       : 'str'
+        })
+    elif 'sparse' in dtype:
+        # build format string for n-index sparse quantity
+        item_printf_8  = f'%{int_len_printf[8]}" PRIu8 " '
+        item_printf_16 = f'%{int_len_printf[16]}" PRIu16 " '
+        item_printf_32 = f'%{int_len_printf[32]}" PRId32 " '
+        item_scanf  = '%" SCNd32 " '
+        group_dset_format_printf_8 = '"'
+        group_dset_format_printf_16 = '"'
+        group_dset_format_printf_32 = '"'
+        group_dset_format_scanf  = ''
+        for i in range(rank):
+            group_dset_format_printf_8  += item_printf_8
+            group_dset_format_printf_16 += item_printf_16
+            group_dset_format_printf_32 += item_printf_32
+            group_dset_format_scanf  += item_scanf
+        # append the format string for float values
+        group_dset_format_printf_8  += '%24.16e" '
+        group_dset_format_printf_16 += '%24.16e" '
+        group_dset_format_printf_32 += '%24.16e" '
+        group_dset_format_scanf  += '%lf'
+
+        # set up the dictionary for sparse
+        dtype_dict.update({
+            'default_prec'                   : '',
+            f'group_{target}_dtype'          : 'double',
+            f'group_{target}_h5_dtype'       : '',
+            f'group_{target}_f_dtype_default': '',
+            f'group_{target}_f_dtype_double' : '',
+            f'group_{target}_f_dtype_single' : '',
+            f'group_{target}_dtype_default'  : '',
+            f'group_{target}_dtype_double'   : '',
+            f'group_{target}_dtype_single'   : '',
+            f'sparse_format_printf_8'        : group_dset_format_printf_8,
+            f'sparse_format_printf_16'       : group_dset_format_printf_16,
+            f'sparse_format_printf_32'       : group_dset_format_printf_32,
+            f'group_{target}_format_scanf'   : group_dset_format_scanf,
+            f'group_{target}_py_dtype'       : ''
+        })
+
+    return dtype_dict
+
+
+
 def get_detailed_num_dict (configuration: dict) -> dict:
-    """ 
+    """
    Returns the dictionary of all `num`-suffixed variables.
-    Keys are names, values are subdictionaries containing corresponding group and group_num names. 
+    Keys are names, values are subdictionaries containing corresponding group and group_num names.

            Parameters:
                    configuration (dict) : configuration from `trex.json`
@ -472,40 +593,17 @@ def get_detailed_num_dict (configuration: dict) -> dict:
        for k2,v2 in v1.items():
            if len(v2[1]) == 0:
                tmp_num = f'{k1}_{k2}'
-                if 'str' not in v2[0]:
+                if not 'str' in v2[0]:
                    tmp_dict = {}
                    tmp_dict['group'] = k1
                    tmp_dict['group_num'] = tmp_num
                    num_dict[tmp_num] = tmp_dict

-                    # TODO the arguments below are almost the same as for group_dset (except for trex_json_int_type) and can be exported from somewhere
-                    if v2[0] == 'float':
-                        tmp_dict['datatype'] = 'double'
-                        tmp_dict['group_num_h5_dtype']       = 'native_double'
-                        tmp_dict['group_num_f_dtype_default']= 'real(8)'
-                        tmp_dict['group_num_f_dtype_double'] = 'real(8)'
-                        tmp_dict['group_num_f_dtype_single'] = 'real(4)'
-                        tmp_dict['group_num_dtype_default']= 'double'
-                        tmp_dict['group_num_dtype_double'] = 'double'
-                        tmp_dict['group_num_dtype_single'] = 'float'
-                        tmp_dict['default_prec']   = '64'
-                        tmp_dict['group_num_std_dtype_out'] = '24.16e'
-                        tmp_dict['group_num_std_dtype_in'] = 'lf'
-                        tmp_dict['group_num_py_dtype'] = 'float'
-                    elif v2[0] in ['int', 'dim']:
-                        tmp_dict['datatype'] = 'int64_t'
-                        tmp_dict['group_num_h5_dtype'] = 'native_int64'
-                        tmp_dict['group_num_f_dtype_default']= 'integer(4)'
-                        tmp_dict['group_num_f_dtype_double'] = 'integer(8)'
-                        tmp_dict['group_num_f_dtype_single'] = 'integer(4)'
-                        tmp_dict['group_num_dtype_default']= 'int32_t'
-                        tmp_dict['group_num_dtype_double'] = 'int64_t'
-                        tmp_dict['group_num_dtype_single'] = 'int32_t'
-                        tmp_dict['default_prec']   = '32'
-                        tmp_dict['group_num_std_dtype_out'] = '" PRId64 "'
-                        tmp_dict['group_num_std_dtype_in']  = '" SCNd64 "' 
-                        tmp_dict['group_num_py_dtype'] = 'int'
+                    tmp_dict.update(get_dtype_dict(v2[0], 'num'))
+                    if v2[0] in ['int', 'dim']:
                        tmp_dict['trex_json_int_type'] = v2[0]
+                    else:
+                        tmp_dict['trex_json_int_type'] = ''

    return num_dict

@ -536,8 +634,8 @@ def get_detailed_str_dict (configuration: dict) -> dict:


 def get_dset_dict (configuration: dict) -> dict:
-    """ 
-    Returns the dictionary of datasets. 
+    """
+    Returns the dictionary of datasets.
    Keys are names, values are lists containing datatype, list of dimensions and group name

            Parameters:
@ -559,8 +657,8 @@ def get_dset_dict (configuration: dict) -> dict:


 def split_dset_dict_detailed (datasets: dict) -> tuple:
-    """ 
-    Returns the detailed dictionary of datasets. 
+    """
+    Returns the detailed dictionary of datasets.
    Keys are names, values are subdictionaries containing substitutes for templated variables

            Parameters:
@ -571,106 +669,106 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
    """
    dset_numeric_dict = {}
    dset_string_dict = {}
+    dset_sparse_dict = {}
    for k,v in datasets.items():
        # create a temp dictionary
        tmp_dict = {}
-        # specify details required to replace templated variables later
-        if v[0] == 'float':
-            datatype = 'double'
-            group_dset_h5_dtype       = 'native_double'
-            group_dset_f_dtype_default= 'real(8)'
-            group_dset_f_dtype_double = 'real(8)'
-            group_dset_f_dtype_single = 'real(4)'
-            group_dset_dtype_default= 'double'
-            group_dset_dtype_double = 'double'
-            group_dset_dtype_single = 'float'
-            default_prec   = '64'
-            group_dset_std_dtype_out = '24.16e'
-            group_dset_std_dtype_in = 'lf'
-            group_dset_py_dtype = 'float'
-        elif v[0] in ['int', 'index']:
-            datatype = 'int64_t'
-            group_dset_h5_dtype = 'native_int64'
-            group_dset_f_dtype_default= 'integer(4)'
-            group_dset_f_dtype_double = 'integer(8)'
-            group_dset_f_dtype_single = 'integer(4)'
-            group_dset_dtype_default= 'int32_t'
-            group_dset_dtype_double = 'int64_t'
-            group_dset_dtype_single = 'int32_t'
-            default_prec   = '32'
-            group_dset_std_dtype_out = '" PRId64 "'
-            group_dset_std_dtype_in  = '" SCNd64 "' 
-            group_dset_py_dtype = 'int'
-        elif v[0] == 'str':
-            datatype = 'char*'
-            group_dset_h5_dtype = ''
-            group_dset_f_dtype_default = ''
-            group_dset_f_dtype_double = ''
-            group_dset_f_dtype_single = ''
-            group_dset_dtype_default = 'char*'
-            group_dset_dtype_double = ''
-            group_dset_dtype_single = ''
-            default_prec   = ''
-            group_dset_std_dtype_out = 's'
-            group_dset_std_dtype_in  = 's' 
-            group_dset_py_dtype = 'str'
-        
-        # add the dset name for templates
+        rank = len(v[1])
+        datatype = v[0]
+
+        # define whether the dset is sparse
+        is_sparse = False
+        int_len_printf = {}
+        if 'sparse' in datatype:
+            is_sparse = True
+            int_len_printf[32] = 10
+            int_len_printf[16] = 5
+            int_len_printf[8] = 3
+
+        # get the dtype-related substitutions required to replace templated variables later
+        if not is_sparse:
+            dtype_dict = get_dtype_dict(datatype, 'dset')
+        else:
+            dtype_dict = get_dtype_dict(datatype, 'dset', rank, int_len_printf)
+
+        tmp_dict.update(dtype_dict)
+
+        # set the group_dset key to the full name of the dset
        tmp_dict['group_dset'] = k
        # add flag to detect index types
-        if 'index' == v[0]:
+        if 'index' in datatype:
            tmp_dict['is_index'] = 'file->one_based'
        else:
            tmp_dict['is_index'] = 'false'
-        # add the datatypes for templates
-        tmp_dict['dtype'] = datatype
-        tmp_dict['group_dset_dtype'] = datatype
-        tmp_dict['group_dset_h5_dtype'] = group_dset_h5_dtype 
-        tmp_dict['group_dset_f_dtype_default'] = group_dset_f_dtype_default
-        tmp_dict['group_dset_f_dtype_double'] = group_dset_f_dtype_double
-        tmp_dict['group_dset_f_dtype_single'] = group_dset_f_dtype_single
-        tmp_dict['group_dset_dtype_default'] = group_dset_dtype_default
-        tmp_dict['group_dset_dtype_double'] = group_dset_dtype_double
-        tmp_dict['group_dset_dtype_single'] = group_dset_dtype_single
-        tmp_dict['default_prec'] = default_prec
-        tmp_dict['group_dset_std_dtype_in'] = group_dset_std_dtype_in
-        tmp_dict['group_dset_std_dtype_out'] = group_dset_std_dtype_out
-        tmp_dict['group_dset_py_dtype'] = group_dset_py_dtype
+
        # add the rank
-        tmp_dict['rank'] = len(v[1])
-        tmp_dict['group_dset_rank'] = str(tmp_dict['rank'])
+        tmp_dict['rank'] = rank
+        tmp_dict['group_dset_rank'] = str(rank)
        # add the list of dimensions
        tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
        # build a list of dimensions to be inserted in the dims array initialization, e.g. {ao_num, ao_num}
        dim_list = tmp_dict['dims'][0]
-        if tmp_dict['rank'] > 1:
-            for i in range(1, tmp_dict['rank']):
+        if rank > 1:
+            for i in range(1, rank):
                dim_toadd = tmp_dict['dims'][i]
                dim_list += f', {dim_toadd}'
-        
+
        tmp_dict['group_dset_dim_list'] = dim_list

-        if tmp_dict['rank'] == 0:
+        if rank == 0:
            dim_f_list = ""
        else:
            dim_f_list = "(*)"
        tmp_dict['group_dset_f_dims'] = dim_f_list

+        if is_sparse:
+            # store the max possible dim of the sparse dset (e.g. mo_num)
+            tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
+            # build printf/scanf sequence and compute line length for n-index sparse quantity
+            index_printf = f'*(index_sparse + {str(rank)}*i'
+            index_scanf  = f'index_sparse + {str(rank)}*i'
+            # one index item consumes up to index_length characters (int32_len_printf for int32 + 1 for space)
+            group_dset_sparse_indices_printf = index_printf + ')'
+            group_dset_sparse_indices_scanf  = index_scanf
+            sparse_line_length_32            = int_len_printf[32] + 1
+            sparse_line_length_16            = int_len_printf[16] + 1
+            sparse_line_length_8             = int_len_printf[8]  + 1
+            # loop from 1 because we already have stored one index
+            for index_count in range(1,rank):
+                group_dset_sparse_indices_printf += f', {index_printf} + {index_count})'
+                group_dset_sparse_indices_scanf  += f', {index_scanf} + {index_count}'
+                sparse_line_length_32            += int_len_printf[32] + 1
+                sparse_line_length_16            += int_len_printf[16] + 1
+                sparse_line_length_8             += int_len_printf[8]  + 1
+
+            # add 24 chars occupied by the floating point value of sparse dataset + 1 char for "\n"
+            sparse_line_length_32 += 24 + 1
+            sparse_line_length_16 += 24 + 1
+            sparse_line_length_8  += 24 + 1
+
+            tmp_dict['sparse_line_length_32']    = str(sparse_line_length_32)
+            tmp_dict['sparse_line_length_16']    = str(sparse_line_length_16)
+            tmp_dict['sparse_line_length_8']     = str(sparse_line_length_8)
+            tmp_dict['group_dset_sparse_indices_printf'] = group_dset_sparse_indices_printf
+            tmp_dict['group_dset_sparse_indices_scanf']  = group_dset_sparse_indices_scanf
+
        # add group name as a key-value pair to the dset dict
        tmp_dict['group'] = v[2]

        # split datasets in numeric- and string- based
-        if (datatype == 'char*'):
+        if 'str' in datatype:
            dset_string_dict[k] = tmp_dict
+        elif is_sparse:
+            dset_sparse_dict[k] = tmp_dict
        else:
            dset_numeric_dict[k] = tmp_dict

-    return (dset_numeric_dict, dset_string_dict)
+    return (dset_numeric_dict, dset_string_dict, dset_sparse_dict)


 def check_dim_consistency(num: dict, dset: dict) -> None:
-    """ 
-    Consistency check to make sure that each dimensioning variable exists as a num attribute of some group. 
+    """
+    Consistency check to make sure that each dimensioning variable exists as a num attribute of some group.

            Parameters:
                    num (dict)  : dictionary of numerical attributes
--- a/trex.org
+++ b/trex.org
@ -2,32 +2,40 @@
 #+STARTUP: latexpreview
 #+SETUPFILE: docs/theme.setup

-This page contains information about the general structure of the 
-TREXIO library. The source code of the library can be automatically 
-generated based on the contents of the ~trex.json~ configuration file, 
-which itself is compiled from different sections (groups) presented below.
+This page contains information about the general structure of the
+TREXIO library. The source code of the library can be automatically
+generated based on the contents of the ~trex.json~ configuration file,
+which itself is compiled from different sections (groups) presented
+below.

-For more information about the automatic generation on the source code 
-or regarding possible modifications, please contact the TREXIO developers.
+For more information about the automatic generation on the source code
+or regarding possible modifications, please contact the TREXIO
+developers.

-All quantities are saved in TREXIO file in atomic units.
-The dimensions of the arrays in the tables below are given in
-column-major order (as in Fortran), and the ordering of the dimensions
-is reversed in the produced ~trex.json~ configuration file as the library is
+All quantities are saved in TREXIO file in atomic units. The
+dimensions of the arrays in the tables below are given in column-major
+order (as in Fortran), and the ordering of the dimensions is reversed
+in the produced ~trex.json~ configuration file as the library is
 written in C.

-TREXIO currently supports ~int~, ~float~ and ~str~ types for both single attributes and arrays.
-Note, that some attributes might have ~dim~ type (e.g. ~num~ of the ~nucleus~ group).
-This type is treated exactly the same as ~int~ with the only difference that ~dim~ variables
-cannot be negative or zero. This additional constraint is required because ~dim~ attributes 
-are used internally to allocate memory and to check array boundaries in the memory-safe API.
-Most of the times, the ~dim~ variables contain ~num~ suffix.
-
+TREXIO currently supports ~int~, ~float~ and ~str~ types for both
+single attributes and arrays.  Note, that some attributes might have
+~dim~ type (e.g. ~num~ of the ~nucleus~ group).  This type is treated
+exactly the same as ~int~ with the only difference that ~dim~
+variables cannot be negative. This additional constraint is required
+because ~dim~ attributes are used internally to allocate memory and to
+check array boundaries in the memory-safe API. Most of the times, the
+~dim~ variables contain the ~num~ suffix.

 In Fortran, the arrays are 1-based and in most other languages the
 arrays are 0-based. Hence, we introduce the ~index~ type which is an
 1-based ~int~ in the Fortran interface and 0-based otherwise.

+For sparse data structures such as electron replusion integrals,
+the data can be too large to fit in memory and the data needs to be
+fetched using multiple function calls to perform I/O on buffers.
+
+
  #+begin_src python :tangle trex.json :exports none
 {
  #+end_src
@ -78,14 +86,14 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an

  #+CALL: json(data=electron, title="electron")
  #+RESULTS:
-  :RESULTS:
+  :results:
  #+begin_src python :tangle trex.json
      "electron": {
-	  "up_num" : [ "int", []  ]
-	, "dn_num" : [ "int", []  ]
+          "up_num" : [ "int", []  ]
+        , "dn_num" : [ "int", []  ]
      } ,
  #+end_src
-  :END:
+  :end:

 * Nucleus (nucleus group)

@ -100,20 +108,22 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
  | ~coord~       | ~float~ | ~(3,nucleus.num)~ | Coordinates of the atoms |
  | ~label~       | ~str~   | ~(nucleus.num)~   | Atom labels              |
  | ~point_group~ | ~str~   |                   | Symmetry point group     |
+  | ~repulsion~   | ~float~ |                   | Nuclear repulsion energy |

  #+CALL: json(data=nucleus, title="nucleus")
  #+RESULTS:
-  :RESULTS:
+  :results:
  #+begin_src python :tangle trex.json
      "nucleus": {
-		  "num" : [ "dim"  , []                     ]
-	,      "charge" : [ "float", [ "nucleus.num" ]      ]
-	,       "coord" : [ "float", [ "nucleus.num", "3" ] ]
-	,       "label" : [ "str"  , [ "nucleus.num" ]      ]
-	, "point_group" : [ "str"  , []                     ]
+                  "num" : [ "dim"  , []                     ]
+        ,      "charge" : [ "float", [ "nucleus.num" ]      ]
+        ,       "coord" : [ "float", [ "nucleus.num", "3" ] ]
+        ,       "label" : [ "str"  , [ "nucleus.num" ]      ]
+        , "point_group" : [ "str"  , []                     ]
+        ,   "repulsion" : [ "float", []                     ]
      } ,
  #+end_src
-  :END:
+  :end:

 * Effective core potentials (ecp group)

@ -617,15 +627,18 @@ prim_factor =
   :end:

 * TODO Slater determinants
-* TODO Reduced density matrices (rdm group)
+* Reduced density matrices (rdm group)

  #+NAME: rdm
-  | Variable   | Type           | Dimensions                         | Description |
-  |------------+----------------+------------------------------------+-------------|
-  | ~one_e~    | ~float~        | ~(mo.num, mo.num)~                 |             |
-  | ~one_e_up~ | ~float~        | ~(mo.num, mo.num)~                 |             |
-  | ~one_e_dn~ | ~float~        | ~(mo.num, mo.num)~                 |             |
-  | ~two_e~    | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ |             |
+  | Variable  | Type           | Dimensions                         | Description                                                           |
+  |-----------+----------------+------------------------------------+-----------------------------------------------------------------------|
+  | ~1e~      | ~float~        | ~(mo.num, mo.num)~                 | One body density matrix                                               |
+  | ~1e_up~   | ~float~        | ~(mo.num, mo.num)~                 | \uparrow-spin component of the one body density matrix                |
+  | ~1e_dn~   | ~float~        | ~(mo.num, mo.num)~                 | \downarrow-spin component of the one body density matrix              |
+  | ~2e~      | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | Two-body reduced density matrix (spin trace)                          |
+  | ~2e_upup~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\uparrow component of the two-body reduced density matrix     |
+  | ~2e_dndn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \downarrow\downarrow component of the two-body reduced density matrix |
+  | ~2e_updn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\downarrow component of the two-body reduced density matrix   |

  #+CALL: json(data=rdm, title="rdm", last=1)

@ -633,10 +646,13 @@ prim_factor =
  :results:
  #+begin_src python :tangle trex.json
      "rdm": {
-             "one_e" : [ "float"       , [ "mo.num", "mo.num" ]                     ]
-        , "one_e_up" : [ "float"       , [ "mo.num", "mo.num" ]                     ]
-        , "one_e_dn" : [ "float"       , [ "mo.num", "mo.num" ]                     ]
-        ,    "two_e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
+               "1e" : [ "float"       , [ "mo.num", "mo.num" ]                     ]
+        ,   "1e_up" : [ "float"       , [ "mo.num", "mo.num" ]                     ]
+        ,   "1e_dn" : [ "float"       , [ "mo.num", "mo.num" ]                     ]
+        ,      "2e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
+        , "2e_upup" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
+        , "2e_dndn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
+        , "2e_updn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
      }
  #+end_src
  :end: