1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-12-22 12:23:54 +01:00

[WIP] working write_ for chunked extensible (sparse) datasets

This commit is contained in:
q-posev 2021-12-08 17:26:55 +01:00
parent ddcfff0f83
commit d001844c2f
3 changed files with 229 additions and 28 deletions

View File

@ -372,6 +372,202 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
}
#+end_src
** Template for HDF5 has/read/write the dataset of sparse data
Sparse data is stored using extensible datasets of HDF5. Extensibility is required
due to the fact that the sparse data will be written in chunks of user-defined size.
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
#+end_src
#+begin_src c :tangle write_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
const int32_t* index_sparse,
const double* value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
const uint32_t rank = 1; // 4;
const hsize_t chunk_dims[1] = {size*4}; //[4] = {size, size, size, size};
// TODO: generator
hsize_t maxdims[1] = {H5S_UNLIMITED}; // [4] = {H5S_UNLIMITED, H5S_UNLIMITED, H5S_UNLIMITED, H5S_UNLIMITED};
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {
hid_t dspace = H5Screate_simple(rank, chunk_dims, maxdims);
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
herr_t status = H5Pset_chunk(prop, rank, chunk_dims);
hid_t dset_id = H5Dcreate(f->$group$_group,
$GROUP_DSET$_NAME,
H5T_NATIVE_INT32,
dspace,
H5P_DEFAULT,
prop,
H5P_DEFAULT);
assert(dset_id >= 0);
printf(" HERE HERE HERE !\n");
status = H5Dwrite(dset_id, H5T_NATIVE_INT32, H5S_ALL, H5S_ALL, H5P_DEFAULT, index_sparse);
/*const herr_t status = H5LTmake_dataset(f->$group$_group,
$GROUP_DSET$_NAME,
(int) rank, (const hsize_t*) dims,
H5T_$GROUP_DSET_H5_DTYPE$,
$group_dset$);
if (status < 0) return TREXIO_FAILURE;*/
H5Pclose(prop);
H5Dclose(dset_id);
H5Sclose(dspace);
} else {
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
hid_t fspace = H5Dget_space(dset_id);
hsize_t offset[1] = {offset_file*4}; //[4] = {offset_file, offset_file, offset_file, offset_file};
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
ddims[0] += chunk_dims[0];
printf("SIZE = %ld\n", ddims[0]);
// extend the dset size
herr_t status = H5Dset_extent(dset_id, ddims);
H5Sclose(fspace);
fspace = H5Dget_space(dset_id);
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, chunk_dims, NULL);
hid_t dspace = H5Screate_simple(rank, chunk_dims, NULL);
status = H5Dwrite(dset_id,
H5T_NATIVE_INT32,
dspace, fspace, H5P_DEFAULT,
index_sparse);
assert(status >= 0);
// TODO: CLOSE ALL OPENED
H5Dclose(dset_id);
H5Sclose(dspace);
H5Sclose(fspace);
//if (status < 0) return TREXIO_FAILURE;
}
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
int32_t* const index_sparse,
double* const value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
// open the dataset to get its dimensions
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
const uint32_t rank = 4;
// allocate space for the dimensions to be read
hsize_t* ddims = CALLOC( (int) rank, hsize_t);
if (ddims == NULL) return TREXIO_FAILURE;
// get the dataspace of the dataset
hid_t dspace_id = H5Dget_space(dset_id);
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(dspace_id, ddims, NULL);
// check that dimensions are consistent
if (rrank != (int) rank) {
FREE(ddims);
H5Sclose(dspace_id);
H5Dclose(dset_id);
return TREXIO_INVALID_ARG_3;
}
free(ddims);
H5Sclose(dspace_id);
H5Dclose(dset_id);
/* High-level H5LT API. No need to deal with dataspaces and datatypes */
/*herr_t status = H5LTread_dataset(f->$group$_group,
$GROUP_DSET$_NAME,
H5T_$GROUP_DSET_H5_DTYPE$,
$group_dset$);
if (status < 0) return TREXIO_FAILURE;*/
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
// TODO
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle has_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME);
/* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
if (status == 1){
return TREXIO_SUCCESS;
} else if (status == 0) {
return TREXIO_HAS_NOT;
} else {
return TREXIO_FAILURE;
}
}
#+end_src
** Template for HDF5 has/read/write the dataset of strings
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
@ -709,5 +905,3 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
#endif
#+end_src

View File

@ -15,6 +15,12 @@ dsets = get_dset_dict(trex_config)
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
detailed_dsets = detailed_dsets_nostr.copy()
detailed_dsets.update(detailed_dsets_str)
# build a big dictionary with all pre-processed data
detailed_all = {}
detailed_all['datasets'] = dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse)
detailed_all['groups'] = group_dict
detailed_all['numbers'] = detailed_nums
detailed_all['strings'] = detailed_strs
# consistency check for dimensioning variables
check_dim_consistency(detailed_nums, dsets)
# --------------------------------------------------------------------------- #
@ -33,7 +39,7 @@ files_todo = get_files_todo(source_files)
# populate files with iterative scheme, i.e. for unique functions
for fname in files_todo['auxiliary']:
iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
iterative_populate_file(fname, template_paths, detailed_all)
# populate has/read/write_num functions with recursive scheme
for fname in files_todo['attr_num']:

View File

@ -181,17 +181,18 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
return output_line
def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
"""
Iteratively populate files with unique functions that contain templated variables.
Parameters:
filename (str) : template file to be populated
paths (dict) : dictionary of paths per source directory
groups (dict) : dictionary of groups
datasets (dict) : dictionary of datasets with substitution details
numbers (dict) : dictionary of numbers with substitution details
strings (dict) : dictionary of strings with substitution details
detailed_all(dict) : dictionary with substitution details with the following keys:
'groups' : dictionary of groups with substitution details
'datasets' : dictionary of datasets with substitution details
'numbers' : dictionary of numbers with substitution details
'strings' : dictionary of strings with substitution details
Returns:
None
@ -211,19 +212,19 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
if id == 0:
# special case for proper error handling when deallocting text groups
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
f_out.write(populated_line)
elif id == 1:
populated_line = iterative_replace_line(line, triggers[id], datasets, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
f_out.write(populated_line)
elif id == 2:
populated_line = iterative_replace_line(line, triggers[id], numbers, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
f_out.write(populated_line)
elif id == 3:
populated_line = iterative_replace_line(line, triggers[id], strings, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
f_out.write(populated_line)
elif id == 4:
populated_line = iterative_replace_line(line, triggers[id], groups, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
f_out.write(populated_line)
else:
f_out.write(line)