mirror of
https://github.com/TREX-CoE/trexio.git
synced 2024-12-22 12:23:54 +01:00
[WIP] working write_ for chunked extensible (sparse) datasets
This commit is contained in:
parent
ddcfff0f83
commit
d001844c2f
@ -372,6 +372,202 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
|||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
** Template for HDF5 has/read/write the dataset of sparse data
|
||||||
|
|
||||||
|
Sparse data is stored using extensible datasets of HDF5. Extensibility is required
|
||||||
|
due to the fact that the sparse data will be written in chunks of user-defined size.
|
||||||
|
|
||||||
|
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
|
||||||
|
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
|
||||||
|
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse);
|
||||||
|
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
|
||||||
|
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :tangle write_dset_sparse_hdf5.c
|
||||||
|
trexio_exit_code
|
||||||
|
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
|
||||||
|
const int64_t offset_file,
|
||||||
|
const int64_t size,
|
||||||
|
const int64_t size_max,
|
||||||
|
const int32_t* index_sparse,
|
||||||
|
const double* value_sparse)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||||
|
|
||||||
|
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||||
|
|
||||||
|
const uint32_t rank = 1; // 4;
|
||||||
|
const hsize_t chunk_dims[1] = {size*4}; //[4] = {size, size, size, size};
|
||||||
|
// TODO: generator
|
||||||
|
hsize_t maxdims[1] = {H5S_UNLIMITED}; // [4] = {H5S_UNLIMITED, H5S_UNLIMITED, H5S_UNLIMITED, H5S_UNLIMITED};
|
||||||
|
|
||||||
|
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {
|
||||||
|
|
||||||
|
hid_t dspace = H5Screate_simple(rank, chunk_dims, maxdims);
|
||||||
|
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
|
||||||
|
herr_t status = H5Pset_chunk(prop, rank, chunk_dims);
|
||||||
|
|
||||||
|
hid_t dset_id = H5Dcreate(f->$group$_group,
|
||||||
|
$GROUP_DSET$_NAME,
|
||||||
|
H5T_NATIVE_INT32,
|
||||||
|
dspace,
|
||||||
|
H5P_DEFAULT,
|
||||||
|
prop,
|
||||||
|
H5P_DEFAULT);
|
||||||
|
|
||||||
|
assert(dset_id >= 0);
|
||||||
|
printf(" HERE HERE HERE !\n");
|
||||||
|
status = H5Dwrite(dset_id, H5T_NATIVE_INT32, H5S_ALL, H5S_ALL, H5P_DEFAULT, index_sparse);
|
||||||
|
|
||||||
|
/*const herr_t status = H5LTmake_dataset(f->$group$_group,
|
||||||
|
$GROUP_DSET$_NAME,
|
||||||
|
(int) rank, (const hsize_t*) dims,
|
||||||
|
H5T_$GROUP_DSET_H5_DTYPE$,
|
||||||
|
$group_dset$);
|
||||||
|
if (status < 0) return TREXIO_FAILURE;*/
|
||||||
|
H5Pclose(prop);
|
||||||
|
H5Dclose(dset_id);
|
||||||
|
H5Sclose(dspace);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
|
||||||
|
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||||
|
|
||||||
|
|
||||||
|
hid_t fspace = H5Dget_space(dset_id);
|
||||||
|
hsize_t offset[1] = {offset_file*4}; //[4] = {offset_file, offset_file, offset_file, offset_file};
|
||||||
|
|
||||||
|
// allocate space for the dimensions to be read
|
||||||
|
hsize_t ddims[1] = {0};
|
||||||
|
|
||||||
|
// get the rank and dimensions of the dataset
|
||||||
|
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
|
||||||
|
ddims[0] += chunk_dims[0];
|
||||||
|
|
||||||
|
printf("SIZE = %ld\n", ddims[0]);
|
||||||
|
|
||||||
|
// extend the dset size
|
||||||
|
herr_t status = H5Dset_extent(dset_id, ddims);
|
||||||
|
|
||||||
|
H5Sclose(fspace);
|
||||||
|
|
||||||
|
fspace = H5Dget_space(dset_id);
|
||||||
|
|
||||||
|
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, chunk_dims, NULL);
|
||||||
|
hid_t dspace = H5Screate_simple(rank, chunk_dims, NULL);
|
||||||
|
|
||||||
|
status = H5Dwrite(dset_id,
|
||||||
|
H5T_NATIVE_INT32,
|
||||||
|
dspace, fspace, H5P_DEFAULT,
|
||||||
|
index_sparse);
|
||||||
|
assert(status >= 0);
|
||||||
|
// TODO: CLOSE ALL OPENED
|
||||||
|
H5Dclose(dset_id);
|
||||||
|
H5Sclose(dspace);
|
||||||
|
H5Sclose(fspace);
|
||||||
|
//if (status < 0) return TREXIO_FAILURE;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return TREXIO_SUCCESS;
|
||||||
|
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :tangle read_dset_sparse_hdf5.c
|
||||||
|
trexio_exit_code
|
||||||
|
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
|
||||||
|
const int64_t offset_file,
|
||||||
|
const int64_t size,
|
||||||
|
const int64_t size_max,
|
||||||
|
int32_t* const index_sparse,
|
||||||
|
double* const value_sparse)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||||
|
|
||||||
|
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
|
||||||
|
|
||||||
|
// open the dataset to get its dimensions
|
||||||
|
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
|
||||||
|
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||||
|
|
||||||
|
const uint32_t rank = 4;
|
||||||
|
|
||||||
|
// allocate space for the dimensions to be read
|
||||||
|
hsize_t* ddims = CALLOC( (int) rank, hsize_t);
|
||||||
|
if (ddims == NULL) return TREXIO_FAILURE;
|
||||||
|
|
||||||
|
// get the dataspace of the dataset
|
||||||
|
hid_t dspace_id = H5Dget_space(dset_id);
|
||||||
|
// get the rank and dimensions of the dataset
|
||||||
|
int rrank = H5Sget_simple_extent_dims(dspace_id, ddims, NULL);
|
||||||
|
|
||||||
|
// check that dimensions are consistent
|
||||||
|
if (rrank != (int) rank) {
|
||||||
|
FREE(ddims);
|
||||||
|
H5Sclose(dspace_id);
|
||||||
|
H5Dclose(dset_id);
|
||||||
|
return TREXIO_INVALID_ARG_3;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(ddims);
|
||||||
|
H5Sclose(dspace_id);
|
||||||
|
H5Dclose(dset_id);
|
||||||
|
|
||||||
|
/* High-level H5LT API. No need to deal with dataspaces and datatypes */
|
||||||
|
/*herr_t status = H5LTread_dataset(f->$group$_group,
|
||||||
|
$GROUP_DSET$_NAME,
|
||||||
|
H5T_$GROUP_DSET_H5_DTYPE$,
|
||||||
|
$group_dset$);
|
||||||
|
if (status < 0) return TREXIO_FAILURE;*/
|
||||||
|
|
||||||
|
return TREXIO_SUCCESS;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :tangle read_dset_sparse_hdf5.c
|
||||||
|
trexio_exit_code
|
||||||
|
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
return TREXIO_SUCCESS;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :tangle has_dset_sparse_hdf5.c
|
||||||
|
trexio_exit_code
|
||||||
|
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||||
|
|
||||||
|
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||||
|
|
||||||
|
herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME);
|
||||||
|
/* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
|
||||||
|
if (status == 1){
|
||||||
|
return TREXIO_SUCCESS;
|
||||||
|
} else if (status == 0) {
|
||||||
|
return TREXIO_HAS_NOT;
|
||||||
|
} else {
|
||||||
|
return TREXIO_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
** Template for HDF5 has/read/write the dataset of strings
|
** Template for HDF5 has/read/write the dataset of strings
|
||||||
|
|
||||||
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
|
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
|
||||||
@ -709,5 +905,3 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,6 +15,12 @@ dsets = get_dset_dict(trex_config)
|
|||||||
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
|
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
|
||||||
detailed_dsets = detailed_dsets_nostr.copy()
|
detailed_dsets = detailed_dsets_nostr.copy()
|
||||||
detailed_dsets.update(detailed_dsets_str)
|
detailed_dsets.update(detailed_dsets_str)
|
||||||
|
# build a big dictionary with all pre-processed data
|
||||||
|
detailed_all = {}
|
||||||
|
detailed_all['datasets'] = dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse)
|
||||||
|
detailed_all['groups'] = group_dict
|
||||||
|
detailed_all['numbers'] = detailed_nums
|
||||||
|
detailed_all['strings'] = detailed_strs
|
||||||
# consistency check for dimensioning variables
|
# consistency check for dimensioning variables
|
||||||
check_dim_consistency(detailed_nums, dsets)
|
check_dim_consistency(detailed_nums, dsets)
|
||||||
# --------------------------------------------------------------------------- #
|
# --------------------------------------------------------------------------- #
|
||||||
@ -33,7 +39,7 @@ files_todo = get_files_todo(source_files)
|
|||||||
|
|
||||||
# populate files with iterative scheme, i.e. for unique functions
|
# populate files with iterative scheme, i.e. for unique functions
|
||||||
for fname in files_todo['auxiliary']:
|
for fname in files_todo['auxiliary']:
|
||||||
iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
|
iterative_populate_file(fname, template_paths, detailed_all)
|
||||||
|
|
||||||
# populate has/read/write_num functions with recursive scheme
|
# populate has/read/write_num functions with recursive scheme
|
||||||
for fname in files_todo['attr_num']:
|
for fname in files_todo['attr_num']:
|
||||||
|
@ -181,17 +181,18 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
|
|||||||
return output_line
|
return output_line
|
||||||
|
|
||||||
|
|
||||||
def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
|
def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
|
||||||
"""
|
"""
|
||||||
Iteratively populate files with unique functions that contain templated variables.
|
Iteratively populate files with unique functions that contain templated variables.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
filename (str) : template file to be populated
|
filename (str) : template file to be populated
|
||||||
paths (dict) : dictionary of paths per source directory
|
paths (dict) : dictionary of paths per source directory
|
||||||
groups (dict) : dictionary of groups
|
detailed_all(dict) : dictionary with substitution details with the following keys:
|
||||||
datasets (dict) : dictionary of datasets with substitution details
|
'groups' : dictionary of groups with substitution details
|
||||||
numbers (dict) : dictionary of numbers with substitution details
|
'datasets' : dictionary of datasets with substitution details
|
||||||
strings (dict) : dictionary of strings with substitution details
|
'numbers' : dictionary of numbers with substitution details
|
||||||
|
'strings' : dictionary of strings with substitution details
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
@ -211,19 +212,19 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
|
|||||||
if id == 0:
|
if id == 0:
|
||||||
# special case for proper error handling when deallocting text groups
|
# special case for proper error handling when deallocting text groups
|
||||||
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
|
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
|
||||||
populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
|
populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
|
||||||
f_out.write(populated_line)
|
f_out.write(populated_line)
|
||||||
elif id == 1:
|
elif id == 1:
|
||||||
populated_line = iterative_replace_line(line, triggers[id], datasets, None)
|
populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
|
||||||
f_out.write(populated_line)
|
f_out.write(populated_line)
|
||||||
elif id == 2:
|
elif id == 2:
|
||||||
populated_line = iterative_replace_line(line, triggers[id], numbers, None)
|
populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
|
||||||
f_out.write(populated_line)
|
f_out.write(populated_line)
|
||||||
elif id == 3:
|
elif id == 3:
|
||||||
populated_line = iterative_replace_line(line, triggers[id], strings, None)
|
populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
|
||||||
f_out.write(populated_line)
|
f_out.write(populated_line)
|
||||||
elif id == 4:
|
elif id == 4:
|
||||||
populated_line = iterative_replace_line(line, triggers[id], groups, None)
|
populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
|
||||||
f_out.write(populated_line)
|
f_out.write(populated_line)
|
||||||
else:
|
else:
|
||||||
f_out.write(line)
|
f_out.write(line)
|
||||||
|
Loading…
Reference in New Issue
Block a user