diff --git a/src/templates_hdf5/build.sh b/src/templates_hdf5/build.sh index 137c7a5..5b54c0f 100644 --- a/src/templates_hdf5/build.sh +++ b/src/templates_hdf5/build.sh @@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c cat populated/pop_write_*.c >> trexio_hdf5.c cat populated/pop_hrw_*.h >> trexio_hdf5.h +cat helpers_hdf5.c >> trexio_hdf5.c cat suffix_hdf5.h >> trexio_hdf5.h - diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index 11527f2..1dbc220 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -399,263 +399,71 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, trexio_hdf5_t* f = (trexio_hdf5_t*) file; - const uint32_t h5_rank = 1; - const hsize_t chunk_i_dims[1] = {size * $group_dset_rank$}; - const hsize_t chunk_v_dims[1] = {size}; - const hsize_t maxdims[1] = {H5S_UNLIMITED}; - - char dset_name[256] = "\0"; - strncpy(dset_name, $GROUP_DSET$_NAME "_indices", 256); - - hid_t dtype; + hid_t index_dtype; void* index_p; - int64_t size_ranked = size * $group_dset_rank$; - + uint64_t size_ranked = (uint64_t) size * $group_dset_rank$; + /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */ if (size_max < UINT8_MAX) { - dtype = H5T_NATIVE_UINT8; uint8_t* index = CALLOC(size_ranked, uint8_t); if (index == NULL) return TREXIO_ALLOCATION_FAILED; for (int64_t i=0; i$group$_group, dset_name) != 1 ) { + /* Store float values in double precision */ + hid_t value_dtype = H5T_NATIVE_DOUBLE; + /* Arrays of chunk dims that will be used for chunking the dataset */ + const hsize_t chunk_i_dims[1] = {size_ranked}; + const hsize_t chunk_v_dims[1] = {size}; - hid_t dspace = H5Screate_simple(h5_rank, chunk_i_dims, maxdims); - if (dspace < 0) return TREXIO_INVALID_ID; + /* Indices and values are stored as 2 independent datasets in the HDF5 file */ + char dset_index_name[256] = "\0"; + char dset_value_name[256] = "\0"; + /* Build the names of the datasets */ + strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256); + strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256); - hid_t prop = H5Pcreate(H5P_DATASET_CREATE); - if (prop < 0) { - H5Sclose(dspace); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } + trexio_exit_code rc_write = TREXIO_FAILURE; + /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */ + if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) { + /* If the file does not exist -> create it and write */ - herr_t status = H5Pset_chunk(prop, h5_rank, chunk_i_dims); - if (status < 0) { - H5Sclose(dspace); - H5Pclose(prop); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - hid_t dset_id = H5Dcreate(f->$group$_group, - dset_name, - dtype, - dspace, - H5P_DEFAULT, - prop, - H5P_DEFAULT); - if (dset_id < 0) { - H5Sclose(dspace); - H5Pclose(prop); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - status = H5Dwrite(dset_id, - dtype, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - index_p); - H5Sclose(dspace); - H5Pclose(prop); - H5Dclose(dset_id); + /* Create chunked dataset with index_dtype datatype and write indices into it */ + rc_write = trexio_hdf5_create_write_dset_sparse(f, f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p); if (index_p != index_sparse) FREE(index_p); + if (rc_write != TREXIO_SUCCESS) return rc_write; - if (status < 0) return TREXIO_FAILURE; - - // repeat the same for values; TODO : make a separate function ? - dspace = H5Screate_simple(h5_rank, chunk_v_dims, maxdims); - if (dspace < 0) return TREXIO_INVALID_ID; - - prop = H5Pcreate(H5P_DATASET_CREATE); - if (prop < 0) { - H5Sclose(dspace); - return TREXIO_INVALID_ID; - } - - status = H5Pset_chunk(prop, h5_rank, chunk_v_dims); - if (status < 0) { - H5Sclose(dspace); - H5Pclose(prop); - return TREXIO_INVALID_ID; - } - - // same for values - strncpy(dset_name, $GROUP_DSET$_NAME "_values", 256); - - dset_id = H5Dcreate(f->$group$_group, - dset_name, - H5T_NATIVE_DOUBLE, - dspace, - H5P_DEFAULT, - prop, - H5P_DEFAULT); - if (dset_id < 0) { - H5Sclose(dspace); - H5Pclose(prop); - return TREXIO_INVALID_ID; - } - - status = H5Dwrite(dset_id, - H5T_NATIVE_DOUBLE, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - value_sparse); - H5Sclose(dspace); - H5Pclose(prop); - H5Dclose(dset_id); - - if (status < 0) return TREXIO_FAILURE; + /* Create chunked dataset with value_dtype datatype and write values into it */ + rc_write = trexio_hdf5_create_write_dset_sparse(f, f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse); + if (rc_write != TREXIO_SUCCESS) return rc_write; } else { - - hid_t dset_id = H5Dopen(f->$group$_group, dset_name, H5P_DEFAULT); - if (dset_id <= 0) { - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - hid_t fspace = H5Dget_space(dset_id); - if (fspace < 0) { - H5Dclose(dset_id); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - + /* If the file exists -> open it and write */ hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$}; - - // allocate space for the dimensions to be read - hsize_t ddims[1] = {0}; - - // get the rank and dimensions of the dataset - int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL); - ddims[0] += chunk_i_dims[0]; - - // extend the dset size - herr_t status = H5Dset_extent(dset_id, ddims); - if (status < 0) { - H5Sclose(fspace); - H5Dclose(dset_id); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - // close and reopen the file dataspace to take into account the extension - H5Sclose(fspace); - fspace = H5Dget_space(dset_id); - if (fspace < 0) { - H5Dclose(dset_id); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - // select hyperslab to be written using chunk_dims and offset values - status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_i, NULL, chunk_i_dims, NULL); - if (status < 0) { - H5Sclose(fspace); - H5Dclose(dset_id); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - // create memory dataspace to write from - hid_t dspace = H5Screate_simple(h5_rank, chunk_i_dims, NULL); - if (dspace < 0) { - H5Sclose(fspace); - H5Sclose(dspace); - H5Dclose(dset_id); - if (index_p != index_sparse) FREE(index_p); - return TREXIO_INVALID_ID; - } - - status = H5Dwrite(dset_id, - dtype, - dspace, fspace, H5P_DEFAULT, - index_p); - H5Dclose(dset_id); - H5Sclose(dspace); - H5Sclose(fspace); - if (index_p != index_sparse) FREE(index_p); - - if (status < 0) return TREXIO_FAILURE; - - // same for values - - strncpy(dset_name, $GROUP_DSET$_NAME "_values", 256); - - dset_id = H5Dopen(f->$group$_group, dset_name, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; - - fspace = H5Dget_space(dset_id); - if (fspace < 0) { - H5Dclose(dset_id); - return TREXIO_INVALID_ID; - } - hsize_t offset_v[1] = {(hsize_t) offset_file}; - // allocate space for the dimensions to be read - ddims[0] = (hsize_t) 0; + /* Create chunked dataset with index_dtype datatype and write indices into it */ + rc_write = trexio_hdf5_open_write_dset_sparse(f, f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p); + if (index_p != index_sparse) FREE(index_p); + if (rc_write != TREXIO_SUCCESS) return rc_write; - // get the rank and dimensions of the dataset - rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL); - ddims[0] += chunk_v_dims[0]; - - // extend the dset size - status = H5Dset_extent(dset_id, ddims); - if (status < 0) { - H5Sclose(fspace); - H5Dclose(dset_id); - return TREXIO_INVALID_ID; - } - - // close and reopen the file dataspace to take into account the extension - H5Sclose(fspace); - fspace = H5Dget_space(dset_id); - if (fspace < 0) { - H5Dclose(dset_id); - return TREXIO_INVALID_ID; - } - - // select hyperslab to be written using chunk_dims and offset values - status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_v, NULL, chunk_v_dims, NULL); - if (status < 0) { - H5Sclose(fspace); - H5Dclose(dset_id); - return TREXIO_INVALID_ID; - } - - // create memory dataspace to write from - dspace = H5Screate_simple(h5_rank, chunk_v_dims, NULL); - if (dspace < 0) { - H5Sclose(fspace); - H5Sclose(dspace); - H5Dclose(dset_id); - return TREXIO_INVALID_ID; - } - - status = H5Dwrite(dset_id, - H5T_NATIVE_DOUBLE, - dspace, fspace, H5P_DEFAULT, - value_sparse); - H5Dclose(dset_id); - H5Sclose(dspace); - H5Sclose(fspace); - - if (status < 0) return TREXIO_FAILURE; + /* Create chunked dataset with value_dtype datatype and write values into it */ + rc_write = trexio_hdf5_open_write_dset_sparse(f, f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse); + if (rc_write != TREXIO_SUCCESS) return rc_write; } @@ -1230,9 +1038,143 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file) } #+end_src +** Helper functions + + #+begin_src c :tangle helpers_hdf5.c +trexio_exit_code +trexio_hdf5_create_write_dset_sparse (trexio_hdf5_t* const file, + const hid_t group_id, + const char* dset_name, + const hid_t dtype_id, + const hsize_t* chunk_dims, + const void* data_sparse) +{ + const int h5_rank = 1; + const hsize_t maxdims[1] = {H5S_UNLIMITED}; + + hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims); + if (dspace < 0) return TREXIO_INVALID_ID; + + hid_t prop = H5Pcreate(H5P_DATASET_CREATE); + if (prop < 0) { + H5Sclose(dspace); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims); + if (status < 0) { + H5Sclose(dspace); + H5Pclose(prop); + return TREXIO_INVALID_ID; + } + + hid_t dset_id = H5Dcreate(group_id, + dset_name, + dtype_id, + dspace, + H5P_DEFAULT, + prop, + H5P_DEFAULT); + if (dset_id < 0) { + H5Sclose(dspace); + H5Pclose(prop); + return TREXIO_INVALID_ID; + } + + status = H5Dwrite(dset_id, + dtype_id, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + data_sparse); + H5Sclose(dspace); + H5Pclose(prop); + H5Dclose(dset_id); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + +trexio_exit_code +trexio_hdf5_open_write_dset_sparse (trexio_hdf5_t* const file, + const hid_t group_id, + const char* dset_name, + const hid_t dtype_id, + const hsize_t* chunk_dims, + const hsize_t* offset_file, + const void* data_sparse) +{ + const int h5_rank = 1; + + hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; + + hid_t fspace = H5Dget_space(dset_id); + if (fspace < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // allocate space for the dimensions to be read + hsize_t ddims[1] = {0}; + + // get the rank and dimensions of the dataset + int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL); + ddims[0] += chunk_dims[0]; + + // extend the dset size + herr_t status = H5Dset_extent(dset_id, ddims); + if (status < 0) { + H5Sclose(fspace); + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // close and reopen the file dataspace to take into account the extension + H5Sclose(fspace); + fspace = H5Dget_space(dset_id); + if (fspace < 0) { + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // select hyperslab to be written using chunk_dims and offset values + status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL); + if (status < 0) { + H5Sclose(fspace); + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + // create memory dataspace to write from + hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL); + if (dspace < 0) { + H5Sclose(fspace); + H5Sclose(dspace); + H5Dclose(dset_id); + return TREXIO_INVALID_ID; + } + + status = H5Dwrite(dset_id, + dtype_id, + dspace, fspace, H5P_DEFAULT, + data_sparse); + H5Dclose(dset_id); + H5Sclose(dspace); + H5Sclose(fspace); + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; +} + + #+end_src + * Constant file suffixes (not used by the generator) :noexport: #+begin_src c :tangle suffix_hdf5.h +trexio_exit_code trexio_hdf5_create_write_dset_sparse (trexio_hdf5_t* const file, const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse); +trexio_exit_code trexio_hdf5_open_write_dset_sparse (trexio_hdf5_t* const file, const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse); + #endif + #+end_src