1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2025-04-28 19:34:44 +02:00

Refactor to allow arbitrary dimensions of sparse datasets

This commit is contained in:
q-posev 2022-09-28 12:17:54 +02:00
parent 99d0ef70bd
commit b36b63347f
2 changed files with 51 additions and 19 deletions

View File

@ -3167,9 +3167,21 @@ trexio_read_$group_dset$(trexio_t* const file,
rc = trexio_read_$group_dset$_size(file, &size_max); rc = trexio_read_$group_dset$_size(file, &size_max);
if (rc != TREXIO_SUCCESS) return rc; if (rc != TREXIO_SUCCESS) return rc;
int64_t num; /* To be set by generator : number of unique dimensions
rc = trexio_read_$group_dset_sparse_dim$_64(file, &num); (e.g. 1 for ERI in AO basis because only ao_num is present in the list of dimensions) */
if (rc != TREXIO_SUCCESS) return rc; const uint32_t unique_rank = $group_dset_unique_rank$;
int64_t unique_dims[$group_dset_unique_rank$];
// Below part is populated by the generator when unique_rank > 1
rc = trexio_read_$group_dset_unique_dim$_64(file, &unique_dims[$dim_id$]); if (rc != TREXIO_SUCCESS) return rc;
/* Find the maximal value along all dimensions to define the compression technique in the back end */
int64_t max_dim = unique_dims[0];
if (unique_rank != 1) {
for (int i = 1; i < unique_rank; i++) {
if (unique_dims[i] > max_dim) max_dim = unique_dims[i];
}
}
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
int64_t eof_read_size = 0L; int64_t eof_read_size = 0L;
@ -3177,12 +3189,12 @@ trexio_read_$group_dset$(trexio_t* const file,
switch (file->back_end) { switch (file->back_end) {
case TREXIO_TEXT: case TREXIO_TEXT:
rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse); rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, max_dim, &eof_read_size, index_sparse, value_sparse);
break; break;
case TREXIO_HDF5: case TREXIO_HDF5:
#ifdef HAVE_HDF5 #ifdef HAVE_HDF5
rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse); rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, max_dim, &eof_read_size, index_sparse, value_sparse);
break; break;
#else #else
rc = TREXIO_BACK_END_MISSING; rc = TREXIO_BACK_END_MISSING;
@ -3276,9 +3288,10 @@ trexio_write_$group_dset$(trexio_t* const file,
if (index_sparse == NULL) return TREXIO_INVALID_ARG_4; if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
if (value_sparse == NULL) return TREXIO_INVALID_ARG_5; if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices /* To be set by generator : number of indices */
const uint32_t rank = $group_dset_rank$;
int64_t size_max=0L; // Max number of integrals (already in the file) int64_t size_max = 0L; // Max number of integrals (already in the file)
trexio_exit_code rc; trexio_exit_code rc;
/* Read the max number of integrals stored in the file */ /* Read the max number of integrals stored in the file */
@ -3286,9 +3299,21 @@ trexio_write_$group_dset$(trexio_t* const file,
if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc; if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc;
if (rc == TREXIO_DSET_MISSING) size_max = 0L; if (rc == TREXIO_DSET_MISSING) size_max = 0L;
int64_t num; /* To be set by generator : number of unique dimensions
rc = trexio_read_$group_dset_sparse_dim$_64(file, &num); (e.g. 1 for ERI in AO basis because only ao_num is present in the list of dimensions) */
if (rc != TREXIO_SUCCESS) return rc; const uint32_t unique_rank = $group_dset_unique_rank$;
int64_t unique_dims[$group_dset_unique_rank$];
// Below part is populated by the generator when unique_rank > 1
rc = trexio_read_$group_dset_unique_dim$_64(file, &unique_dims[$dim_id$]); if (rc != TREXIO_SUCCESS) return rc;
/* Find the maximal value along all dimensions to define the compression technique in the back end */
int64_t max_dim = unique_dims[0];
if (unique_rank != 1) {
for (int i = 1; i < unique_rank; i++) {
if (unique_dims[i] > max_dim) max_dim = unique_dims[i];
}
}
// shift indices to be zero-based if Fortran API is used // shift indices to be zero-based if Fortran API is used
if (file->one_based) { if (file->one_based) {
@ -3305,13 +3330,13 @@ trexio_write_$group_dset$(trexio_t* const file,
switch (file->back_end) { switch (file->back_end) {
case TREXIO_TEXT: case TREXIO_TEXT:
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num, rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, max_dim,
size_max, index_sparse_p, value_sparse); size_max, index_sparse_p, value_sparse);
break; break;
case TREXIO_HDF5: case TREXIO_HDF5:
#ifdef HAVE_HDF5 #ifdef HAVE_HDF5
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, max_dim,
index_sparse_p, value_sparse); index_sparse_p, value_sparse);
break; break;
#else #else
@ -3336,13 +3361,13 @@ trexio_write_$group_dset$(trexio_t* const file,
switch (file->back_end) { switch (file->back_end) {
case TREXIO_TEXT: case TREXIO_TEXT:
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num, rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, max_dim,
size_max, index_sparse, value_sparse); size_max, index_sparse, value_sparse);
break; break;
case TREXIO_HDF5: case TREXIO_HDF5:
#ifdef HAVE_HDF5 #ifdef HAVE_HDF5
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, max_dim,
index_sparse, value_sparse); index_sparse, value_sparse);
break; break;
#else #else

View File

@ -108,11 +108,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
triggers = ['group_dset_dtype', 'group_dset_py_dtype', 'group_dset_h5_dtype', 'default_prec', 'is_index', triggers = ['group_dset_dtype', 'group_dset_py_dtype', 'group_dset_h5_dtype', 'default_prec', 'is_index',
'group_dset_f_dtype_default', 'group_dset_f_dtype_double', 'group_dset_f_dtype_single', 'group_dset_f_dtype_default', 'group_dset_f_dtype_double', 'group_dset_f_dtype_single',
'group_dset_dtype_default', 'group_dset_dtype_double', 'group_dset_dtype_single', 'group_dset_dtype_default', 'group_dset_dtype_double', 'group_dset_dtype_single',
'group_dset_rank', 'group_dset_dim_list', 'group_dset_f_dims', 'group_dset_rank', 'group_dset_unique_rank', 'group_dset_dim_list', 'group_dset_f_dims',
'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single', 'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single', 'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
'group_num_h5_dtype', 'group_num_py_dtype', 'group_num_h5_dtype', 'group_num_py_dtype', 'group_dset_format_scanf', 'group_dset_format_printf',
'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf', 'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32', 'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32', 'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
@ -151,6 +150,11 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
if 'dim' in detailed_source[item]['trex_json_int_type']: if 'dim' in detailed_source[item]['trex_json_int_type']:
templine = line.replace('//', '') templine = line.replace('//', '')
f_out.write(templine) f_out.write(templine)
# special case to get the max dimension of sparse datasets with different dimensions
elif 'trexio_read_$group_dset_unique_dim$_64' in line:
for i in range(int(detailed_source[item]['group_dset_unique_rank'])):
templine = line.replace('$group_dset_unique_dim$', detailed_source[item]['unique_dims'][i]).replace('$dim_id$', str(i))
f_out.write(templine)
# general case of recursive replacement of inline triggers # general case of recursive replacement of inline triggers
else: else:
populated_line = recursive_replace_line(line, triggers, detailed_source[item]) populated_line = recursive_replace_line(line, triggers, detailed_source[item])
@ -717,6 +721,11 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
# add the list of dimensions # add the list of dimensions
tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]] tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
# get a list of unique dimensions for sparse datasets
if is_sparse:
tmp_dict['unique_dims'] = list(set(tmp_dict['dims']))
tmp_dict['group_dset_unique_rank'] = str(len(tmp_dict['unique_dims']))
# add the rank # add the rank
tmp_dict['rank'] = rank tmp_dict['rank'] = rank
tmp_dict['group_dset_rank'] = str(rank) tmp_dict['group_dset_rank'] = str(rank)
@ -737,8 +746,6 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
tmp_dict['group_dset_f_dims'] = dim_f_list tmp_dict['group_dset_f_dims'] = dim_f_list
if is_sparse: if is_sparse:
# store the max possible dim of the sparse dset (e.g. mo_num)
tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
# build printf/scanf sequence and compute line length for n-index sparse quantity # build printf/scanf sequence and compute line length for n-index sparse quantity
index_printf = f'*(index_sparse + {str(rank)}*i' index_printf = f'*(index_sparse + {str(rank)}*i'
index_scanf = f'index_sparse + {str(rank)}*i' index_scanf = f'index_sparse + {str(rank)}*i'