mirror of
https://github.com/TREX-CoE/trexio.git
synced 2024-12-22 12:23:54 +01:00
[WIP] working write_ for chunked extensible (sparse) datasets
This commit is contained in:
parent
ddcfff0f83
commit
d001844c2f
@ -372,6 +372,202 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||
}
|
||||
#+end_src
|
||||
|
||||
** Template for HDF5 has/read/write the dataset of sparse data
|
||||
|
||||
Sparse data is stored using extensible datasets of HDF5. Extensibility is required
|
||||
due to the fact that the sparse data will be written in chunks of user-defined size.
|
||||
|
||||
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
|
||||
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle write_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
const int32_t* index_sparse,
|
||||
const double* value_sparse)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||
|
||||
const uint32_t rank = 1; // 4;
|
||||
const hsize_t chunk_dims[1] = {size*4}; //[4] = {size, size, size, size};
|
||||
// TODO: generator
|
||||
hsize_t maxdims[1] = {H5S_UNLIMITED}; // [4] = {H5S_UNLIMITED, H5S_UNLIMITED, H5S_UNLIMITED, H5S_UNLIMITED};
|
||||
|
||||
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {
|
||||
|
||||
hid_t dspace = H5Screate_simple(rank, chunk_dims, maxdims);
|
||||
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
|
||||
herr_t status = H5Pset_chunk(prop, rank, chunk_dims);
|
||||
|
||||
hid_t dset_id = H5Dcreate(f->$group$_group,
|
||||
$GROUP_DSET$_NAME,
|
||||
H5T_NATIVE_INT32,
|
||||
dspace,
|
||||
H5P_DEFAULT,
|
||||
prop,
|
||||
H5P_DEFAULT);
|
||||
|
||||
assert(dset_id >= 0);
|
||||
printf(" HERE HERE HERE !\n");
|
||||
status = H5Dwrite(dset_id, H5T_NATIVE_INT32, H5S_ALL, H5S_ALL, H5P_DEFAULT, index_sparse);
|
||||
|
||||
/*const herr_t status = H5LTmake_dataset(f->$group$_group,
|
||||
$GROUP_DSET$_NAME,
|
||||
(int) rank, (const hsize_t*) dims,
|
||||
H5T_$GROUP_DSET_H5_DTYPE$,
|
||||
$group_dset$);
|
||||
if (status < 0) return TREXIO_FAILURE;*/
|
||||
H5Pclose(prop);
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
|
||||
} else {
|
||||
|
||||
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
|
||||
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
|
||||
hid_t fspace = H5Dget_space(dset_id);
|
||||
hsize_t offset[1] = {offset_file*4}; //[4] = {offset_file, offset_file, offset_file, offset_file};
|
||||
|
||||
// allocate space for the dimensions to be read
|
||||
hsize_t ddims[1] = {0};
|
||||
|
||||
// get the rank and dimensions of the dataset
|
||||
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
|
||||
ddims[0] += chunk_dims[0];
|
||||
|
||||
printf("SIZE = %ld\n", ddims[0]);
|
||||
|
||||
// extend the dset size
|
||||
herr_t status = H5Dset_extent(dset_id, ddims);
|
||||
|
||||
H5Sclose(fspace);
|
||||
|
||||
fspace = H5Dget_space(dset_id);
|
||||
|
||||
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset, NULL, chunk_dims, NULL);
|
||||
hid_t dspace = H5Screate_simple(rank, chunk_dims, NULL);
|
||||
|
||||
status = H5Dwrite(dset_id,
|
||||
H5T_NATIVE_INT32,
|
||||
dspace, fspace, H5P_DEFAULT,
|
||||
index_sparse);
|
||||
assert(status >= 0);
|
||||
// TODO: CLOSE ALL OPENED
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
H5Sclose(fspace);
|
||||
//if (status < 0) return TREXIO_FAILURE;
|
||||
|
||||
}
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
int32_t* const index_sparse,
|
||||
double* const value_sparse)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
|
||||
|
||||
// open the dataset to get its dimensions
|
||||
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
|
||||
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
const uint32_t rank = 4;
|
||||
|
||||
// allocate space for the dimensions to be read
|
||||
hsize_t* ddims = CALLOC( (int) rank, hsize_t);
|
||||
if (ddims == NULL) return TREXIO_FAILURE;
|
||||
|
||||
// get the dataspace of the dataset
|
||||
hid_t dspace_id = H5Dget_space(dset_id);
|
||||
// get the rank and dimensions of the dataset
|
||||
int rrank = H5Sget_simple_extent_dims(dspace_id, ddims, NULL);
|
||||
|
||||
// check that dimensions are consistent
|
||||
if (rrank != (int) rank) {
|
||||
FREE(ddims);
|
||||
H5Sclose(dspace_id);
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ARG_3;
|
||||
}
|
||||
|
||||
free(ddims);
|
||||
H5Sclose(dspace_id);
|
||||
H5Dclose(dset_id);
|
||||
|
||||
/* High-level H5LT API. No need to deal with dataspaces and datatypes */
|
||||
/*herr_t status = H5LTread_dataset(f->$group$_group,
|
||||
$GROUP_DSET$_NAME,
|
||||
H5T_$GROUP_DSET_H5_DTYPE$,
|
||||
$group_dset$);
|
||||
if (status < 0) return TREXIO_FAILURE;*/
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
// TODO
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle has_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||
|
||||
herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME);
|
||||
/* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
|
||||
if (status == 1){
|
||||
return TREXIO_SUCCESS;
|
||||
} else if (status == 0) {
|
||||
return TREXIO_HAS_NOT;
|
||||
} else {
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
** Template for HDF5 has/read/write the dataset of strings
|
||||
|
||||
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
|
||||
@ -709,5 +905,3 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
|
||||
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
|
||||
|
@ -15,6 +15,12 @@ dsets = get_dset_dict(trex_config)
|
||||
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
|
||||
detailed_dsets = detailed_dsets_nostr.copy()
|
||||
detailed_dsets.update(detailed_dsets_str)
|
||||
# build a big dictionary with all pre-processed data
|
||||
detailed_all = {}
|
||||
detailed_all['datasets'] = dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse)
|
||||
detailed_all['groups'] = group_dict
|
||||
detailed_all['numbers'] = detailed_nums
|
||||
detailed_all['strings'] = detailed_strs
|
||||
# consistency check for dimensioning variables
|
||||
check_dim_consistency(detailed_nums, dsets)
|
||||
# --------------------------------------------------------------------------- #
|
||||
@ -33,7 +39,7 @@ files_todo = get_files_todo(source_files)
|
||||
|
||||
# populate files with iterative scheme, i.e. for unique functions
|
||||
for fname in files_todo['auxiliary']:
|
||||
iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
|
||||
iterative_populate_file(fname, template_paths, detailed_all)
|
||||
|
||||
# populate has/read/write_num functions with recursive scheme
|
||||
for fname in files_todo['attr_num']:
|
||||
|
@ -181,17 +181,18 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
|
||||
return output_line
|
||||
|
||||
|
||||
def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
|
||||
def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
|
||||
"""
|
||||
Iteratively populate files with unique functions that contain templated variables.
|
||||
|
||||
Parameters:
|
||||
filename (str) : template file to be populated
|
||||
paths (dict) : dictionary of paths per source directory
|
||||
groups (dict) : dictionary of groups
|
||||
datasets (dict) : dictionary of datasets with substitution details
|
||||
numbers (dict) : dictionary of numbers with substitution details
|
||||
strings (dict) : dictionary of strings with substitution details
|
||||
detailed_all(dict) : dictionary with substitution details with the following keys:
|
||||
'groups' : dictionary of groups with substitution details
|
||||
'datasets' : dictionary of datasets with substitution details
|
||||
'numbers' : dictionary of numbers with substitution details
|
||||
'strings' : dictionary of strings with substitution details
|
||||
|
||||
Returns:
|
||||
None
|
||||
@ -211,19 +212,19 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
|
||||
if id == 0:
|
||||
# special case for proper error handling when deallocting text groups
|
||||
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
|
||||
populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
|
||||
populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
|
||||
f_out.write(populated_line)
|
||||
elif id == 1:
|
||||
populated_line = iterative_replace_line(line, triggers[id], datasets, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
|
||||
f_out.write(populated_line)
|
||||
elif id == 2:
|
||||
populated_line = iterative_replace_line(line, triggers[id], numbers, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
|
||||
f_out.write(populated_line)
|
||||
elif id == 3:
|
||||
populated_line = iterative_replace_line(line, triggers[id], strings, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
|
||||
f_out.write(populated_line)
|
||||
elif id == 4:
|
||||
populated_line = iterative_replace_line(line, triggers[id], groups, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
|
||||
f_out.write(populated_line)
|
||||
else:
|
||||
f_out.write(line)
|
||||
|
Loading…
Reference in New Issue
Block a user