Merge pull request #71 from TREX-CoE/add-sparse-datasets

Add Python API for sparse datasets
2024-11-03 20:54:07 +01:00 · 2022-01-05 13:57:32 +01:00 · 2022-01-05 13:57:32 +01:00 · c687c80f92
commit c687c80f92
parent 0a8aa638f6 b65ec031dc
4 changed files with 440 additions and 50 deletions
--- a/python/test/test_api.py
+++ b/python/test/test_api.py
@ -67,7 +67,7 @@ charges = [6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1.]
 #charges_np = np.array(charges, dtype=np.float32)
 charges_np = np.array(charges, dtype=np.int32)

-# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived 
+# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived
 # from the size of the list/array by SWIG using typemaps from numpy.i
 trexio.write_nucleus_charge(test_file, charges_np)

@ -80,7 +80,7 @@ indices_np = np.array(indices, dtype=np.int64)
 # first write basis_shell_num because it is needed to check dimensions of basis_nucleus_index in TREXIO >= 2.0.0
 trexio.write_basis_shell_num(test_file, basis_shell_num)

-# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived 
+# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived
 # from the size of the list/array by SWIG using typemacs from numpy.i
 trexio.write_basis_nucleus_index(test_file, indices_np)

@ -103,11 +103,25 @@ coords = [
 # write coordinates in the file
 trexio.write_nucleus_coord(test_file, coords)

-point_group = 'B3U'
+
+# write mo_num (needed later to write sparse mo_2e_int_eri integrals)
+trexio.write_mo_num(test_file, 600)
+
+# write sparse data in the file
+num_integrals = 100
+indices = [i for i in range(num_integrals*4)]
+values  = [(3.14 + float(i)) for i in range(num_integrals)]
+
+trexio.write_mo_2e_int_eri(test_file, 0, num_integrals, indices, values)
+

 # write nucleus_point_group in the file
+point_group = 'B3U'
+
 trexio.write_nucleus_point_group(test_file, point_group)

+
+# write nucleus_label in the file
 labels = [
        'C',
        'C',
@ -122,14 +136,13 @@ labels = [
        'H',
        'H']

-# write nucleus_label in the file
 trexio.write_nucleus_label(test_file,labels)

-# close TREXIO file 
+# close TREXIO file
 # this call is no longer needed as we introduced TREXIO_File class which has a desctructor that closes the file
 #trexio.close(test_file)
-# without calling destructor on test_file the TREXIO_FILE is not getting created and the data is not written when using TEXT back end. 
-# This, the user still has to explicitly call destructor on test_file object instead of the trexio.close function. 
+# without calling destructor on test_file the TREXIO_FILE is not getting created and the data is not written when using TEXT back end.
+# This, the user still has to explicitly call destructor on test_file object instead of the trexio.close function.
 # This is only an issue when the data is getting written and read in the same session (e.g. in Jupyter notebook)
 del test_file

@ -147,6 +160,7 @@ assert trexio.has_nucleus_charge
 assert trexio.has_nucleus_coord
 assert trexio.has_nucleus_label
 assert trexio.has_nucleus_point_group
+assert trexio.has_mo_2e_int_eri

 # read nucleus_num from file
 rnum = trexio.read_nucleus_num(test_file2)
@ -189,6 +203,33 @@ np.testing.assert_array_almost_equal(rcoords_np, np.array(coords).reshape(nucleu
 # set doReshape to False to get a flat 1D array (e.g. when reading matrices like nuclear coordinates)
 #rcoords_reshaped_2 = trexio.read_nucleus_coord(test_file2, doReshape=False)

+# read number of integrals already present in the file
+assert trexio.has_mo_2e_int_eri(test_file2)
+assert trexio.read_mo_2e_int_eri_size(test_file2)==num_integrals
+
+# read sparse arrays on mo_2e_int_eri integrals
+buf_size = 60
+offset_file = 0
+# read full buf_size (i.e. the one that does not reach EOF)
+indices_sparse_np, value_sparse_np, read_buf_size, eof = trexio.read_mo_2e_int_eri(test_file2, offset_file, buf_size)
+print(f'First complete sparse read size: {read_buf_size}')
+#print(indices_sparse_np)
+assert not eof
+assert read_buf_size==buf_size
+assert indices_sparse_np[0][0]==0
+assert indices_sparse_np[read_buf_size-1][3]==read_buf_size*4-1
+offset_file += buf_size
+
+# read incomplete buf_size (i.e. the one that does reach EOF)
+indices_sparse_np, value_sparse_np, read_buf_size, eof2 = trexio.read_mo_2e_int_eri(test_file2, offset_file, buf_size)
+print(f'Second incomplete sparse read size: {read_buf_size}')
+#print(indices_sparse_np)
+assert eof2
+assert read_buf_size==(num_integrals - buf_size)
+assert indices_sparse_np[0][0]==offset_file*4
+assert indices_sparse_np[read_buf_size-1][3]==(offset_file+read_buf_size)*4-1
+
+
 # read array of nuclear labels
 rlabels_2d = trexio.read_nucleus_label(test_file2, dim=nucleus_num)
 print(rlabels_2d)
@ -197,13 +238,13 @@ for i in range(nucleus_num):

 # read a string corresponding to nuclear point group
 rpoint_group = trexio.read_nucleus_point_group(test_file2)
-assert rpoint_group==point_group 
+assert rpoint_group==point_group

 # another way to read only if the variable exists
-if trexio.has_mo_num(test_file2):
-    rmo_num = trexio.read_mo_num(test_file2)
+if trexio.has_ao_num(test_file2):
+    rmo_num = trexio.read_ao_num(test_file2)
 else:
-    print("Pass on reading the non-existing variable mo_num: checked")
+    print("Pass on reading the non-existing variable ao_num: checked")

 # close TREXIO file
 #trexio.close(test_file2)
--- a/python/test/test_pytrexio.py
+++ b/python/test/test_pytrexio.py
@ -54,7 +54,7 @@ assert rc==0
 charges = [6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1.]
 charges_np = np.array(charges, dtype=np.float64)

-# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived 
+# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived
 # from the size of the list/array by SWIG using typemaps from numpy.i
 rc = trexio_write_safe_nucleus_charge(test_file, charges_np)
 assert rc==0
@ -78,16 +78,29 @@ indices_np = np.array(indices, dtype=np.int32)
 rc = trexio_write_basis_shell_num(test_file, basis_num)
 assert rc==0

-# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived 
+# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived
 # from the size of the list/array by SWIG using typemacs from numpy.i
 rc = trexio_write_safe_basis_nucleus_index(test_file, indices_np)
 assert rc==0

+# test writing of sparse data
+rc = trexio_write_mo_num(test_file, 600)
+assert rc==0
+
+num_integrals = 100
+indices = [i for i in range(num_integrals*4)]
+values  = [(3.14 + float(i)) for i in range(num_integrals)]
+
+rc = trexio_write_safe_mo_2e_int_eri(test_file, 0, num_integrals, indices, values)
+assert rc==0
+
+# test writing of single string
 point_group = 'B3U'

 rc = trexio_write_nucleus_point_group(test_file, point_group, 10)
 assert rc==0

+# test writing of array of strings
 labels = [
        'C',
        'C',
@ -172,6 +185,36 @@ print(f'Read point group: {rpoint_group}')
 assert rc==0
 assert rpoint_group==point_group

+# test reasing sparse quantities
+rc, mo_2e_int_size = trexio_read_mo_2e_int_eri_size(test_file2)
+assert rc==0
+assert mo_2e_int_size==num_integrals
+
+buf_size = 60
+offset_file = 0
+# read full buf_size (i.e. the one that does not reach EOF)
+rc, read_buf_size, indices_sparse_np, value_sparse_np = trexio_read_safe_mo_2e_int_eri(test_file2, offset_file, buf_size, buf_size*4, buf_size)
+print(f'First complete sparse read size: {read_buf_size}')
+#print(indices_sparse_np)
+assert rc==0
+assert read_buf_size==buf_size
+assert indices_sparse_np[0]==0
+assert indices_sparse_np[read_buf_size*4-1]==read_buf_size*4-1
+offset_file += buf_size
+
+# read incomplete buf_size (i.e. the one that does reach EOF)
+rc, read_buf_size, indices_sparse_np, value_sparse_np = trexio_read_safe_mo_2e_int_eri(test_file2, offset_file, buf_size, buf_size*4, buf_size)
+print(f'Second incomplete sparse read size: {read_buf_size}')
+# Incomplete read still allocates NumPy array of buf_size=60 elements but only 40 elements read upon encounter of EOF,
+# Thus the remaining 20 elements are filled with garbage rather than zeros. Handle this in the front end ?
+print(indices_sparse_np)
+# trexio_exit_code = 6 correspond to TREXIO_END
+assert rc==6
+assert read_buf_size==(num_integrals - buf_size)
+assert indices_sparse_np[0]==offset_file*4
+assert indices_sparse_np[read_buf_size*4-1]==(offset_file+read_buf_size)*4-1
+
+
 rc = trexio_close(test_file2)
 assert rc==0

@ -184,4 +227,3 @@ except:
    print (f'No output file {output_filename} has been produced')

 #==========================================================#
-
--- a/src/pytrexio.i
+++ b/src/pytrexio.i
@ -12,10 +12,10 @@
 %include <stdint.i>

 /* NOTE:
-   carrays was useful before numpy.i was introduced. 
+   carrays was useful before numpy.i was introduced.
   For Python interface it's better to use numpy arrays instead of carrays, because the latter are less python-ic.
   On the other hand, carrays might be more portable to other target languages.
-// Include carrays to work with C pointers to arrays 
+// Include carrays to work with C pointers to arrays
 %include "carrays.i"
 // Include classes that correspond to integer and float arrays
 %array_class(double, doubleArray);
@ -24,20 +24,26 @@
 %array_class(int64_t, int64Array);
 */

-/* Include typemaps to play with input/output re-casting 
+/* Include typemaps to play with input/output re-casting
   Useful when working with C pointers
 */
 %include typemaps.i
-/* Redefine the [int32_t*, int64_t*, float*, double*] num 
+/* Redefine the [int32_t*, int64_t*, float*, double*] num
   pattern to be appended to the output tuple.
-   Useful for TREXIO read_num functions where the 
+   Useful for TREXIO read_num functions where the
   num variable is modified by address
 */
+/* Return num variables as part of the output tuple */
 %apply int *OUTPUT { int32_t* const num};
 %apply int *OUTPUT { int64_t* const num};
 %apply float *OUTPUT { float* const num};
 %apply float *OUTPUT { double* const num};
+/* Return TREXIO exit code from trexio_open as part of the output tuple */
 %apply int *OUTPUT { trexio_exit_code* const rc_open};
+/* Return number of sparse data points stored in the file as part of the output tuple */
+%apply int *OUTPUT { int64_t* const size_max};
+/* Return number of sparse data points read from the file as part of the output tuple */
+%apply int *INOUT { int64_t* const buffer_size_read};

 /* Does not work for arrays (SIGSEGV) */

@ -47,13 +53,13 @@
 %include <cstring.i>
 /* This enables read of long strings with TREXIO_DELIM delimeters that can be further converted into an array of string */
 %cstring_bounded_output(char* dset_out, 4096);
-/* This enables read of single string attributes with pre-defined max_str_len 
+/* This enables read of single string attributes with pre-defined max_str_len
   for Python we pre-define max_str_len = PYTREXIO_MAX_STR_LENGTH everywhere for simplicity
 */
 %cstring_output_maxsize(char* const str_out, const int32_t max_str_len);


-/* This block is needed make SWIG treat (double * dset_out|_in, int64_t dim_out|_in) pattern 
+/* This block is needed make SWIG treat (double * dset_out|_in, int64_t dim_out|_in) pattern
   as a special case in order to return the NumPy array to Python from C pointer to array
   provided by trexio_read_safe_[dset_num] function.
   NOTE: numpy.i is currently not part of SWIG but included in the numpy distribution (under numpy/tools/swig/numpy.i)
@ -81,8 +87,14 @@ import_array();
 /* Enable write|read_safe functions to convert numpy arrays from/to int64 arrays */
 %apply (int64_t* ARGOUT_ARRAY1, int64_t DIM1) {(int64_t* const dset_out, const int64_t dim_out)};
 %apply (int64_t* IN_ARRAY1, int64_t DIM1) {(const int64_t* dset_in, const int64_t dim_in)};
+/* Enable write|read_safe functions to convert numpy arrays from/to sparse arrays */
+%apply (double* IN_ARRAY1, int64_t DIM1) {(const double* value_sparse_write, const int64_t size_value_write)};
+%apply (int32_t* IN_ARRAY1, int64_t DIM1) {(const int32_t* index_sparse_write, const int64_t size_index_write)};

-/* This tells SWIG to treat char ** dset_in pattern as a special case 
+%apply (double* ARGOUT_ARRAY1, int64_t DIM1) {(double* const value_sparse_read, const int64_t size_value_read)};
+%apply (int32_t* ARGOUT_ARRAY1, int64_t DIM1) {(int32_t* const index_sparse_read, const int64_t size_index_read)};
+
+/* This tells SWIG to treat char ** dset_in pattern as a special case
   Enables access to trexio_[...]_write_dset_str set of functions directly, i.e.
   by converting input list of strings from Python into char ** of C
 */
@ -116,4 +128,3 @@ import_array();
 /* Parse the header files to generate wrappers */
 %include "trexio_s.h"
 %include "trexio.h"
-
--- a/src/templates_front/templator_front.org
+++ b/src/templates_front/templator_front.org
@ -2378,35 +2378,58 @@ def has_$group_dset$(trexio_file) -> bool:
   }
   #+end_src

-  The electron repulsion integral (eri) $\langle ij | kl \rangle$ is
-  represented as a quartet of integers $(i,j,k,l)$ and a floating
-  point value.
+   The electron repulsion integral (eri) $\langle ij | kl \rangle$ is
+   represented as a quartet of integers $(i,j,k,l)$ and a floating
+   point value.

-  To store $N$ integrals in the file, we store
+   To store $N$ integrals in the file, we store

-  - An array of quartets of integers
-  - An array of values (floats)
+   - An array of quartets of integers
+   - An array of values (floats)

-  Both arrays have the same size, $N$, the number of non-zero integrals.
-  Knowing the maximum dimensions allows to check that the integers are
-  in a valid range, and also lets the library choose the smallest
-  integer representation to compress the storage.
+   Both arrays have the same size, $N$, the number of non-zero integrals.
+   Knowing the maximum dimensions allows to check that the integers are
+   in a valid range, and also lets the library choose the smallest
+   integer representation to compress the storage.

-  Fortran uses 1-based array indexing, while C uses 0-based indexing.
-  Internally, we use a 0-based representation but the Fortran binding
-  does the appropriate conversion when reading or writing.
+   Fortran uses 1-based array indexing, while C uses 0-based indexing.
+   Internally, we use a 0-based representation but the Fortran binding
+   does the appropriate conversion when reading or writing.

-  As the number of integrals to store can be prohibitively large, we
-  provide the possibility to read/write the integrals in chunks. So the
-  functions take two extra parameters:
+   As the number of integrals to store can be prohibitively large, we
+   provide the possibility to read/write the integrals in chunks. So the
+   functions take two extra parameters:

-  - ~offset~ : how many integrals in the file should be skipped when reading.
-    An offset of zero implies to read the first integral.
-  - ~size~ : the number of integrals to read.
+   - ~offset_file~ : how many integrals in the file should be skipped when reading/writing.
+     An offset of zero implies to read the first integral.
+   - ~buffer_size~ : the number of integrals to read/write.
+     If EOF is encountered upon reading, the ~buffer_size~ is overwritten with the number
+     of integrals that have been read before EOF and the ~trexio_read_~ function return
+ ~TREXIO_END~ exit code instead of ~TREXIO_SUCCESS~.

-  We provide a function to read a chunk of indices, and a function to
-  read a chunk of values, because some users might want to read only
-  the values of the integrals, or only the indices.
+   The storage of ~int~ indices is internally compressed based on the maximum possible value of an index,
+   which is derived from the corresponding dimension of the sparse array (e.g. ~ao_num~ is the upper bound
+   of indices in the aforementioned ~ao_2e_int_eri~ dataset).
+   The upper bounds for different ~int~ types (e.g. ~uint16_t~) can be found in the in the =stdint.h= C library.
+   Currently implemented list of compressions based on the upper bound of indices can be found below:
+
+   | Max value of indices            | Internal representation (in the TREXIO file) |
+   |---------------------------------+----------------------------------------------|
+   | ~UINT8_MAX~  (e.g. $< 255$)     | 8-bit unsigned int                           |
+   | ~UINT16_MAX~ (e.g. $< 65535$)   | 16-bit unsigned int                          |
+   | Otherwise    (e.g. $\ge 65535$) | 32-bit signed int                            |
+
+
+   This section concerns API calls related to sparse data structures.
+
+   | Function name                    | Description                                                 | Precision                        |
+   |----------------------------------+-------------------------------------------------------------+----------------------------------|
+   | ~trexio_has_$group_dset$~        | Check if a sparse dset is present in a file                 | ---                              |
+   | ~trexio_read_$group_dset$~       | Read indices and values of a sparse dset                    | Single/Double for indices/values |
+   | ~trexio_read_$group_dset$_size~  | Read the number of sparse data elements stored in the file  | Double for size                  |
+   | ~trexio_write_$group_dset$~      | Write indices and values of a sparse dset                   | Single/Double for indices/values |
+   | ~trexio_read_safe_$group_dset$~  | Safe (bounded) read of indices and values  (for Python API) | Single/Double for indices/values |
+   | ~trexio_write_safe_$group_dset$~ | Safe (bounded) write of indices and values (for Python API) | Single/Double for indices/values |

 *** C templates for front end
 **** Function declarations
@ -2416,11 +2439,25 @@ trexio_exit_code trexio_has_$group_dset$(trexio_t* const file);
 trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, int32_t* const index_sparse, double* const value_sparse);
 trexio_exit_code trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
 trexio_exit_code trexio_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int32_t* index_sparse, const double* value_sparse);
+trexio_exit_code trexio_read_safe_$group_dset$(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size_read, int32_t* const index_sparse_read, const int64_t size_index_read, double* const value_sparse_read, const int64_t size_value_read);
+trexio_exit_code trexio_write_safe_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int32_t* index_sparse_write, const int64_t size_index_write, const double* value_sparse_write, const int64_t size_value_write);
     #+end_src

 **** Source code for default functions

  #+begin_src c :tangle read_dset_sparse_front.c
+trexio_exit_code trexio_read_safe_$group_dset$(trexio_t* const file,
+                                               const int64_t offset_file,
+                                               int64_t* const buffer_size_read,
+                                               int32_t* const index_sparse_read,
+                                               const int64_t size_index_read,
+                                               double* const value_sparse_read,
+                                               const int64_t size_value_read
+                                               )
+{
+  return trexio_read_$group_dset$(file, offset_file, buffer_size_read, index_sparse_read, value_sparse_read);
+}
+
 trexio_exit_code
 trexio_read_$group_dset$(trexio_t* const file,
                         const int64_t offset_file,
@ -2525,13 +2562,25 @@ trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)


  #+begin_src c :tangle write_dset_sparse_front.c
+trexio_exit_code trexio_write_safe_$group_dset$(trexio_t* const file,
+                                                const int64_t offset_file,
+                                                const int64_t buffer_size,
+                                                const int32_t* index_sparse_write,
+                                                const int64_t size_index_write,
+                                                const double* value_sparse_write,
+                                                const int64_t size_value_write
+                                                )
+{
+  return trexio_write_$group_dset$(file, offset_file, buffer_size, index_sparse_write, value_sparse_write);
+}
+
 trexio_exit_code
 trexio_write_$group_dset$(trexio_t* const file,
-                           const int64_t offset_file,
-                           const int64_t buffer_size,
-                           const int32_t* index_sparse,
-                           const double* value_sparse
-                           )
+                          const int64_t offset_file,
+                          const int64_t buffer_size,
+                          const int32_t* index_sparse,
+                          const double* value_sparse
+                          )
 {
  if (file  == NULL) return TREXIO_INVALID_ARG_1;
  if (offset_file < 0L) return TREXIO_INVALID_ARG_2;
@ -2648,6 +2697,22 @@ interface
     double precision, intent(in)  :: value_sparse(*)
   end function trexio_write_$group_dset$
 end interface
+
+interface
+   integer function trexio_write_safe_$group_dset$ (trex_file, &
+                                                    offset_file, buffer_size, &
+                                                    index_sparse, index_size, &
+                                                    value_sparse, value_size) bind(C)
+     use, intrinsic :: iso_c_binding
+     integer(8), intent(in), value :: trex_file
+     integer(8), intent(in), value :: offset_file
+     integer(8), intent(in), value :: buffer_size
+     integer(4), intent(in)        :: index_sparse(*)
+     integer(8), intent(in), value :: index_size
+     double precision, intent(in)  :: value_sparse(*)
+     integer(8), intent(in), value :: value_size
+   end function trexio_write_safe_$group_dset$
+end interface
     #+end_src

     #+begin_src f90 :tangle read_dset_sparse_front_fortran.f90
@ -2663,6 +2728,22 @@ interface
     double precision, intent(out) :: value_sparse(*)
   end function trexio_read_$group_dset$
 end interface
+
+interface
+   integer function trexio_read_safe_$group_dset$ (trex_file, &
+                                                   offset_file, buffer_size, &
+                                                   index_sparse, index_size, &
+                                                   value_sparse, value_size) bind(C)
+     use, intrinsic :: iso_c_binding
+     integer(8), intent(in), value :: trex_file
+     integer(8), intent(in), value :: offset_file
+     integer(8), intent(inout)     :: buffer_size
+     integer(4), intent(out)       :: index_sparse(*)
+     integer(8), intent(in), value :: index_size
+     double precision, intent(out) :: value_sparse(*)
+     integer(8), intent(in), value :: value_size
+   end function trexio_read_safe_$group_dset$
+end interface
     #+end_src

     #+begin_src f90 :tangle read_dset_sparse_size_front_fortran.f90
@ -2685,6 +2766,221 @@ interface
 end interface
     #+end_src

+*** Python templates for front end
+
+     #+begin_src python :tangle write_dset_sparse_front.py
+def write_$group_dset$(trexio_file: File, offset_file: int, buffer_size: int, indices: list, values: list) -> None:
+    """Write the $group_dset$ indices and values in the TREXIO file.
+
+    Parameters:
+
+    trexio_file:
+        TREXIO File object.
+
+    offset_file: int
+        The number of integrals to be skipped in the file when writing.
+
+    buffer_size: int
+        The number of integrals to write in the file from the provided sparse arrays.
+
+    values: list OR numpy.ndarray
+        Array of $group_dset$ indices to be written. If array data type does not correspond to int32, the conversion is performed.
+
+    values: list OR numpy.ndarray
+        Array of $group_dset$ values to be written. If array data type does not correspond to float64, the conversion is performed.
+
+    Raises:
+        - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error.
+        - Exception from some other error (e.g. RuntimeError).
+    """
+
+    try:
+        import numpy as np
+    except ImportError:
+        raise Exception("NumPy cannot be imported.")
+
+    if not isinstance(offset_file, int):
+        raise TypeError("offset_file argument has to be an integer.")
+    if not isinstance(buffer_size, int):
+        raise TypeError("buffer_size argument has to be an integer.")
+    if not isinstance(indices, (list, tuple, np.ndarray)):
+        raise TypeError("indices argument has to be an array (list, tuple or NumPy ndarray).")
+    if not isinstance(values, (list, tuple, np.ndarray)):
+        raise TypeError("values argument has to be an array (list, tuple or NumPy ndarray).")
+
+    convertIndices = False
+    convertValues = False
+    flattenIndices = False
+    if isinstance(indices, np.ndarray):
+       # convert to int32 if input indices are in a different precision
+       if not indices.dtype==np.int32:
+           convertIndices = True
+
+       if len(indices.shape) > 1:
+           flattenIndices = True
+           if convertIndices:
+               indices_32 = np.int32(indices).flatten()
+           else:
+               indices_32 = np.array(indices, dtype=np.int32).flatten()
+       else:
+           if convertIndices:
+               indices_32 = np.int32(indices)
+    else:
+       # if input array is a multidimensional list or tuple, we have to convert it
+       try:
+           doFlatten = True
+           # if list of indices is flat - the attempt to compute len(indices[0]) will raise a TypeError
+           ncol = len(indices[0])
+           indices_32 = np.array(indices, dtype=np.int32).flatten()
+       except TypeError:
+           doFlatten = False
+           pass
+
+    if isinstance(values, np.ndarray):
+       # convert to float64 if input values are in a different precision
+       if not values.dtype==np.float64:
+           convertValues = True
+       if convertValues:
+           values_64 = np.float64(values)
+
+    if (convertIndices or flattenIndices) and convertValues:
+        rc = pytr.trexio_write_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, buffer_size, indices_32, values_64)
+    elif (convertIndices or flattenIndices) and not convertValues:
+        rc = pytr.trexio_write_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, buffer_size, indices_32, values)
+    elif not (convertIndices or flattenIndices) and convertValues:
+        rc = pytr.trexio_write_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, buffer_size, indices, values_64)
+    else:
+        rc = pytr.trexio_write_safe_$group_dset$(trexio_file.pytrexio_s, offset_file, buffer_size, indices, values)
+
+    if rc != TREXIO_SUCCESS:
+        raise Error(rc)
+     #+end_src
+
+     #+begin_src python :tangle read_dset_sparse_front.py
+def read_$group_dset$(trexio_file: File, offset_file: int, buffer_size: int) -> tuple:
+    """Read the $group_dset$ indices and values from the TREXIO file.
+
+    Parameters:
+
+    trexio_file:
+        TREXIO File object.
+
+    offset_file: int
+        The number of integrals to be skipped in the file when reading.
+
+    buffer_size: int
+        The number of integrals to read from the file.
+
+    Returns:
+        (indices, values, n_int_read, eof_flag) tuple where
+          - indices and values are NumPy arrays [numpy.ndarray] with the default int32 and float64 precision, respectively;
+          - n_int_read [int] is the number of integrals read from the trexio_file
+            (either strictly equal to buffer_size or less than buffer_size if EOF has been reached);
+          - eof_flag [bool] is True when EOF has been reached (i.e. when call to low-level pytrexio API returns TREXIO_END)
+                               False otherwise.
+
+    Raises:
+        - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS
+            and prints the error message using trexio_string_of_error.
+        - Exception from some other error (e.g. RuntimeError).
+    """
+
+    try:
+        import numpy as np
+    except ImportError:
+        raise Exception("NumPy cannot be imported.")
+
+    if not isinstance(offset_file, int):
+        raise TypeError("offset_file argument has to be an integer.")
+    if not isinstance(buffer_size, int):
+        raise TypeError("buffer_size argument has to be an integer.")
+
+
+    # read the number of integrals already in the file
+    integral_num = read_$group_dset$_size(trexio_file)
+
+    # additional modification needed to avoid allocating more memory than needed if EOF will be reached during read
+    overflow = offset_file + buffer_size - integral_num
+    eof_flag = False
+    if overflow > 0:
+        verified_size = buffer_size - overflow
+        eof_flag = True
+    else:
+        verified_size = buffer_size
+
+    # main call to the low-level (SWIG-wrapped) trexio_read function, which also requires the sizes of the output to be provided
+    # as the last 2 arguments (for numpy arrays of indices and values, respectively)
+    # read_buf_size contains the number of elements being read from the file, useful when EOF has been reached
+    rc, n_int_read, indices, values = pytr.trexio_read_safe_$group_dset$(trexio_file.pytrexio_s,
+                                                                         offset_file,
+                                                                         verified_size,
+                                                                         verified_size * $group_dset_rank$,
+                                                                         verified_size)
+    if rc != TREXIO_SUCCESS:
+        raise Error(rc)
+    if n_int_read == 0:
+        raise ValueError("No integrals have been read from the file.")
+    if indices is None or values is None:
+        raise ValueError("Returned NULL array from the low-level pytrexio API.")
+
+    # conversion to custom types can be performed on the user side, here we only reshape the returned flat array of indices according to group_dset_rank
+    shape = tuple([verified_size, $group_dset_rank$])
+    indices_reshaped = np.reshape(indices, shape, order='C')
+
+    return (indices_reshaped, values, n_int_read, eof_flag)
+
+
+def read_$group_dset$_size(trexio_file) -> int:
+    """Read the number of $group_dset$ integrals stored in the TREXIO file.
+
+    Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
+
+    Returns:
+ ~num_integral~: int
+        Integer value of corresponding to the size of the $group_dset$ sparse array from ~trexio_file~.
+
+    Raises:
+        - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error.
+        - Exception from some other error (e.g. RuntimeError).
+    """
+
+    try:
+        rc, num_integral = pytr.trexio_read_$group_dset$_size(trexio_file.pytrexio_s)
+        if rc != TREXIO_SUCCESS:
+            raise Error(rc)
+    except:
+        raise
+
+    return num_integral
+     #+end_src
+
+     #+begin_src python :tangle has_dset_sparse_front.py
+def has_$group_dset$(trexio_file) -> bool:
+    """Check that $group_dset$ variable exists in the TREXIO file.
+
+    Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
+
+    Returns:
+          True if the variable exists, False otherwise
+
+    Raises:
+        - Exception from trexio.Error class if TREXIO return code ~rc~ is TREXIO_FAILURE and prints the error message using string_of_error.
+        - Exception from some other error (e.g. RuntimeError).
+    """
+
+    try:
+        rc = pytr.trexio_has_$group_dset$(trexio_file.pytrexio_s)
+        if rc == TREXIO_FAILURE:
+            raise Error(rc)
+    except:
+        raise
+
+    if rc == TREXIO_SUCCESS:
+        return True
+    else:
+        return False
+     #+end_src
+
 ** Templates for front end has/read/write a dataset of strings
 *** Introduction
   This section concerns API calls related to datasets of strings.