read dim variable of sparse dset to compress storage of indices in HDF5

2025-04-25 18:04:44 +02:00 · 2021-12-14 18:03:51 +01:00 · 2021-12-14 18:03:51 +01:00 · 2e99a14b6e
commit 2e99a14b6e
parent a8b251d820
2 changed files with 13 additions and 3 deletions
--- a/src/templates_front/templator_front.org
+++ b/src/templates_front/templator_front.org
@ -2445,6 +2445,10 @@ trexio_read_$group_dset$(trexio_t* const file,
  rc = trexio_read_$group_dset$_size(file, &size_max);
  if (rc != TREXIO_SUCCESS) return rc;

+  int64_t num;
+  rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
+  if (rc != TREXIO_SUCCESS) return rc;
+
  // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
  uint64_t eof_read_size = 0UL;

@ -2456,7 +2460,7 @@ trexio_read_$group_dset$(trexio_t* const file,

  case TREXIO_HDF5:
 #ifdef HAVE_HDF5
-    rc = trexio_hdf5_read_$group_dset$(file, offset_file, buffer_size, size_max, &eof_read_size, index_sparse, value_sparse);
+    rc = trexio_hdf5_read_$group_dset$(file, offset_file, buffer_size, num, &eof_read_size, index_sparse, value_sparse);
    break;
 #else
    rc = TREXIO_BACK_END_MISSING;
@ -2543,6 +2547,10 @@ trexio_write_$group_dset$(trexio_t* const file,
  if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc;
  if (rc == TREXIO_DSET_MISSING) size_max = 0L;

+  int64_t num;
+  rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
+  if (rc != TREXIO_SUCCESS) return rc;
+
  int32_t* index_sparse_p = (int32_t*) index_sparse;
  // shift indices to be zero-based if Fortran API is used
  if (file->one_based) {
@ -2565,7 +2573,7 @@ trexio_write_$group_dset$(trexio_t* const file,

  case TREXIO_HDF5:
 #ifdef HAVE_HDF5
-    rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, size_max, index_sparse_p, value_sparse);
+    rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, index_sparse_p, value_sparse);
    break;
 #else
    rc = TREXIO_BACK_END_MISSING;
--- a/tools/generator_tools.py
+++ b/tools/generator_tools.py
@ -107,7 +107,7 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
                'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
                'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
                'group_num_h5_dtype', 'group_num_py_dtype',
-                'group_dset_format_scanf', 'group_dset_format_printf',
+                'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
                'group_dset_sparse_line_length', 'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
                'group_dset', 'group_num', 'group_str', 'group']

@ -707,6 +707,8 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
        tmp_dict['group_dset_f_dims'] = dim_f_list

        if is_sparse:
+            # store the max possible dim of the sparse dset (e.g. mo_num)
+            tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
            # build printf/scanf sequence and compute line length for n-index sparse quantity
            index_printf = f'*(index_sparse + {str(rank)}*i'
            index_scanf  = f'index_sparse + {str(rank)}*i'