From 2e99a14b6e2025b41f2c292d2f88865732e48570 Mon Sep 17 00:00:00 2001 From: q-posev Date: Tue, 14 Dec 2021 18:03:51 +0100 Subject: [PATCH] read dim variable of sparse dset to compress storage of indices in HDF5 --- src/templates_front/templator_front.org | 12 ++++++++++-- tools/generator_tools.py | 4 +++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index dab7796..e7a2d3e 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -2445,6 +2445,10 @@ trexio_read_$group_dset$(trexio_t* const file, rc = trexio_read_$group_dset$_size(file, &size_max); if (rc != TREXIO_SUCCESS) return rc; + int64_t num; + rc = trexio_read_$group_dset_sparse_dim$_64(file, &num); + if (rc != TREXIO_SUCCESS) return rc; + // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered uint64_t eof_read_size = 0UL; @@ -2456,7 +2460,7 @@ trexio_read_$group_dset$(trexio_t* const file, case TREXIO_HDF5: #ifdef HAVE_HDF5 - rc = trexio_hdf5_read_$group_dset$(file, offset_file, buffer_size, size_max, &eof_read_size, index_sparse, value_sparse); + rc = trexio_hdf5_read_$group_dset$(file, offset_file, buffer_size, num, &eof_read_size, index_sparse, value_sparse); break; #else rc = TREXIO_BACK_END_MISSING; @@ -2543,6 +2547,10 @@ trexio_write_$group_dset$(trexio_t* const file, if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc; if (rc == TREXIO_DSET_MISSING) size_max = 0L; + int64_t num; + rc = trexio_read_$group_dset_sparse_dim$_64(file, &num); + if (rc != TREXIO_SUCCESS) return rc; + int32_t* index_sparse_p = (int32_t*) index_sparse; // shift indices to be zero-based if Fortran API is used if (file->one_based) { @@ -2565,7 +2573,7 @@ trexio_write_$group_dset$(trexio_t* const file, case TREXIO_HDF5: #ifdef HAVE_HDF5 - rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, size_max, index_sparse_p, value_sparse); + rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, index_sparse_p, value_sparse); break; #else rc = TREXIO_BACK_END_MISSING; diff --git a/tools/generator_tools.py b/tools/generator_tools.py index ada99b3..c892621 100644 --- a/tools/generator_tools.py +++ b/tools/generator_tools.py @@ -107,7 +107,7 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N 'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single', 'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single', 'group_num_h5_dtype', 'group_num_py_dtype', - 'group_dset_format_scanf', 'group_dset_format_printf', + 'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim', 'group_dset_sparse_line_length', 'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf', 'group_dset', 'group_num', 'group_str', 'group'] @@ -707,6 +707,8 @@ def split_dset_dict_detailed (datasets: dict) -> tuple: tmp_dict['group_dset_f_dims'] = dim_f_list if is_sparse: + # store the max possible dim of the sparse dset (e.g. mo_num) + tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0] # build printf/scanf sequence and compute line length for n-index sparse quantity index_printf = f'*(index_sparse + {str(rank)}*i' index_scanf = f'index_sparse + {str(rank)}*i'