mirror of
https://github.com/TREX-CoE/trexio.git
synced 2025-04-28 19:34:44 +02:00
Refactor to allow arbitrary dimensions of sparse datasets
This commit is contained in:
parent
99d0ef70bd
commit
b36b63347f
@ -3167,9 +3167,21 @@ trexio_read_$group_dset$(trexio_t* const file,
|
|||||||
rc = trexio_read_$group_dset$_size(file, &size_max);
|
rc = trexio_read_$group_dset$_size(file, &size_max);
|
||||||
if (rc != TREXIO_SUCCESS) return rc;
|
if (rc != TREXIO_SUCCESS) return rc;
|
||||||
|
|
||||||
int64_t num;
|
/* To be set by generator : number of unique dimensions
|
||||||
rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
|
(e.g. 1 for ERI in AO basis because only ao_num is present in the list of dimensions) */
|
||||||
if (rc != TREXIO_SUCCESS) return rc;
|
const uint32_t unique_rank = $group_dset_unique_rank$;
|
||||||
|
int64_t unique_dims[$group_dset_unique_rank$];
|
||||||
|
|
||||||
|
// Below part is populated by the generator when unique_rank > 1
|
||||||
|
rc = trexio_read_$group_dset_unique_dim$_64(file, &unique_dims[$dim_id$]); if (rc != TREXIO_SUCCESS) return rc;
|
||||||
|
|
||||||
|
/* Find the maximal value along all dimensions to define the compression technique in the back end */
|
||||||
|
int64_t max_dim = unique_dims[0];
|
||||||
|
if (unique_rank != 1) {
|
||||||
|
for (int i = 1; i < unique_rank; i++) {
|
||||||
|
if (unique_dims[i] > max_dim) max_dim = unique_dims[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
|
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
|
||||||
int64_t eof_read_size = 0L;
|
int64_t eof_read_size = 0L;
|
||||||
@ -3177,12 +3189,12 @@ trexio_read_$group_dset$(trexio_t* const file,
|
|||||||
switch (file->back_end) {
|
switch (file->back_end) {
|
||||||
|
|
||||||
case TREXIO_TEXT:
|
case TREXIO_TEXT:
|
||||||
rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse);
|
rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, max_dim, &eof_read_size, index_sparse, value_sparse);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TREXIO_HDF5:
|
case TREXIO_HDF5:
|
||||||
#ifdef HAVE_HDF5
|
#ifdef HAVE_HDF5
|
||||||
rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse);
|
rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, max_dim, &eof_read_size, index_sparse, value_sparse);
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
rc = TREXIO_BACK_END_MISSING;
|
rc = TREXIO_BACK_END_MISSING;
|
||||||
@ -3276,9 +3288,10 @@ trexio_write_$group_dset$(trexio_t* const file,
|
|||||||
if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
|
if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
|
||||||
if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
|
if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
|
||||||
|
|
||||||
const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices
|
/* To be set by generator : number of indices */
|
||||||
|
const uint32_t rank = $group_dset_rank$;
|
||||||
|
|
||||||
int64_t size_max=0L; // Max number of integrals (already in the file)
|
int64_t size_max = 0L; // Max number of integrals (already in the file)
|
||||||
trexio_exit_code rc;
|
trexio_exit_code rc;
|
||||||
|
|
||||||
/* Read the max number of integrals stored in the file */
|
/* Read the max number of integrals stored in the file */
|
||||||
@ -3286,9 +3299,21 @@ trexio_write_$group_dset$(trexio_t* const file,
|
|||||||
if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc;
|
if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc;
|
||||||
if (rc == TREXIO_DSET_MISSING) size_max = 0L;
|
if (rc == TREXIO_DSET_MISSING) size_max = 0L;
|
||||||
|
|
||||||
int64_t num;
|
/* To be set by generator : number of unique dimensions
|
||||||
rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
|
(e.g. 1 for ERI in AO basis because only ao_num is present in the list of dimensions) */
|
||||||
if (rc != TREXIO_SUCCESS) return rc;
|
const uint32_t unique_rank = $group_dset_unique_rank$;
|
||||||
|
int64_t unique_dims[$group_dset_unique_rank$];
|
||||||
|
|
||||||
|
// Below part is populated by the generator when unique_rank > 1
|
||||||
|
rc = trexio_read_$group_dset_unique_dim$_64(file, &unique_dims[$dim_id$]); if (rc != TREXIO_SUCCESS) return rc;
|
||||||
|
|
||||||
|
/* Find the maximal value along all dimensions to define the compression technique in the back end */
|
||||||
|
int64_t max_dim = unique_dims[0];
|
||||||
|
if (unique_rank != 1) {
|
||||||
|
for (int i = 1; i < unique_rank; i++) {
|
||||||
|
if (unique_dims[i] > max_dim) max_dim = unique_dims[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// shift indices to be zero-based if Fortran API is used
|
// shift indices to be zero-based if Fortran API is used
|
||||||
if (file->one_based) {
|
if (file->one_based) {
|
||||||
@ -3305,13 +3330,13 @@ trexio_write_$group_dset$(trexio_t* const file,
|
|||||||
switch (file->back_end) {
|
switch (file->back_end) {
|
||||||
|
|
||||||
case TREXIO_TEXT:
|
case TREXIO_TEXT:
|
||||||
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num,
|
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, max_dim,
|
||||||
size_max, index_sparse_p, value_sparse);
|
size_max, index_sparse_p, value_sparse);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TREXIO_HDF5:
|
case TREXIO_HDF5:
|
||||||
#ifdef HAVE_HDF5
|
#ifdef HAVE_HDF5
|
||||||
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num,
|
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, max_dim,
|
||||||
index_sparse_p, value_sparse);
|
index_sparse_p, value_sparse);
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
@ -3336,13 +3361,13 @@ trexio_write_$group_dset$(trexio_t* const file,
|
|||||||
switch (file->back_end) {
|
switch (file->back_end) {
|
||||||
|
|
||||||
case TREXIO_TEXT:
|
case TREXIO_TEXT:
|
||||||
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num,
|
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, max_dim,
|
||||||
size_max, index_sparse, value_sparse);
|
size_max, index_sparse, value_sparse);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TREXIO_HDF5:
|
case TREXIO_HDF5:
|
||||||
#ifdef HAVE_HDF5
|
#ifdef HAVE_HDF5
|
||||||
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num,
|
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, max_dim,
|
||||||
index_sparse, value_sparse);
|
index_sparse, value_sparse);
|
||||||
break;
|
break;
|
||||||
#else
|
#else
|
||||||
|
@ -108,11 +108,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
|
|||||||
triggers = ['group_dset_dtype', 'group_dset_py_dtype', 'group_dset_h5_dtype', 'default_prec', 'is_index',
|
triggers = ['group_dset_dtype', 'group_dset_py_dtype', 'group_dset_h5_dtype', 'default_prec', 'is_index',
|
||||||
'group_dset_f_dtype_default', 'group_dset_f_dtype_double', 'group_dset_f_dtype_single',
|
'group_dset_f_dtype_default', 'group_dset_f_dtype_double', 'group_dset_f_dtype_single',
|
||||||
'group_dset_dtype_default', 'group_dset_dtype_double', 'group_dset_dtype_single',
|
'group_dset_dtype_default', 'group_dset_dtype_double', 'group_dset_dtype_single',
|
||||||
'group_dset_rank', 'group_dset_dim_list', 'group_dset_f_dims',
|
'group_dset_rank', 'group_dset_unique_rank', 'group_dset_dim_list', 'group_dset_f_dims',
|
||||||
'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
|
'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
|
||||||
'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
|
'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
|
||||||
'group_num_h5_dtype', 'group_num_py_dtype',
|
'group_num_h5_dtype', 'group_num_py_dtype', 'group_dset_format_scanf', 'group_dset_format_printf',
|
||||||
'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
|
|
||||||
'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
|
'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
|
||||||
'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
|
'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
|
||||||
'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
|
'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
|
||||||
@ -151,6 +150,11 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
|
|||||||
if 'dim' in detailed_source[item]['trex_json_int_type']:
|
if 'dim' in detailed_source[item]['trex_json_int_type']:
|
||||||
templine = line.replace('//', '')
|
templine = line.replace('//', '')
|
||||||
f_out.write(templine)
|
f_out.write(templine)
|
||||||
|
# special case to get the max dimension of sparse datasets with different dimensions
|
||||||
|
elif 'trexio_read_$group_dset_unique_dim$_64' in line:
|
||||||
|
for i in range(int(detailed_source[item]['group_dset_unique_rank'])):
|
||||||
|
templine = line.replace('$group_dset_unique_dim$', detailed_source[item]['unique_dims'][i]).replace('$dim_id$', str(i))
|
||||||
|
f_out.write(templine)
|
||||||
# general case of recursive replacement of inline triggers
|
# general case of recursive replacement of inline triggers
|
||||||
else:
|
else:
|
||||||
populated_line = recursive_replace_line(line, triggers, detailed_source[item])
|
populated_line = recursive_replace_line(line, triggers, detailed_source[item])
|
||||||
@ -717,6 +721,11 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
|
|||||||
# add the list of dimensions
|
# add the list of dimensions
|
||||||
tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
|
tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
|
||||||
|
|
||||||
|
# get a list of unique dimensions for sparse datasets
|
||||||
|
if is_sparse:
|
||||||
|
tmp_dict['unique_dims'] = list(set(tmp_dict['dims']))
|
||||||
|
tmp_dict['group_dset_unique_rank'] = str(len(tmp_dict['unique_dims']))
|
||||||
|
|
||||||
# add the rank
|
# add the rank
|
||||||
tmp_dict['rank'] = rank
|
tmp_dict['rank'] = rank
|
||||||
tmp_dict['group_dset_rank'] = str(rank)
|
tmp_dict['group_dset_rank'] = str(rank)
|
||||||
@ -737,8 +746,6 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
|
|||||||
tmp_dict['group_dset_f_dims'] = dim_f_list
|
tmp_dict['group_dset_f_dims'] = dim_f_list
|
||||||
|
|
||||||
if is_sparse:
|
if is_sparse:
|
||||||
# store the max possible dim of the sparse dset (e.g. mo_num)
|
|
||||||
tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
|
|
||||||
# build printf/scanf sequence and compute line length for n-index sparse quantity
|
# build printf/scanf sequence and compute line length for n-index sparse quantity
|
||||||
index_printf = f'*(index_sparse + {str(rank)}*i'
|
index_printf = f'*(index_sparse + {str(rank)}*i'
|
||||||
index_scanf = f'index_sparse + {str(rank)}*i'
|
index_scanf = f'index_sparse + {str(rank)}*i'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user