1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-11-04 21:24:08 +01:00

[WIP] refactoring; write_dset_sparse done

This commit is contained in:
q-posev 2021-12-16 16:52:35 +01:00
parent 968cfaa3bb
commit 1c49c81da8
2 changed files with 170 additions and 228 deletions

View File

@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c
cat populated/pop_write_*.c >> trexio_hdf5.c cat populated/pop_write_*.c >> trexio_hdf5.c
cat populated/pop_hrw_*.h >> trexio_hdf5.h cat populated/pop_hrw_*.h >> trexio_hdf5.h
cat helpers_hdf5.c >> trexio_hdf5.c
cat suffix_hdf5.h >> trexio_hdf5.h cat suffix_hdf5.h >> trexio_hdf5.h

View File

@ -399,263 +399,71 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file,
trexio_hdf5_t* f = (trexio_hdf5_t*) file; trexio_hdf5_t* f = (trexio_hdf5_t*) file;
const uint32_t h5_rank = 1; hid_t index_dtype;
const hsize_t chunk_i_dims[1] = {size * $group_dset_rank$};
const hsize_t chunk_v_dims[1] = {size};
const hsize_t maxdims[1] = {H5S_UNLIMITED};
char dset_name[256] = "\0";
strncpy(dset_name, $GROUP_DSET$_NAME "_indices", 256);
hid_t dtype;
void* index_p; void* index_p;
int64_t size_ranked = size * $group_dset_rank$; uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) { if (size_max < UINT8_MAX) {
dtype = H5T_NATIVE_UINT8;
uint8_t* index = CALLOC(size_ranked, uint8_t); uint8_t* index = CALLOC(size_ranked, uint8_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED; if (index == NULL) return TREXIO_ALLOCATION_FAILED;
for (int64_t i=0; i<size_ranked; ++i){ for (int64_t i=0; i<size_ranked; ++i){
index[i] = (uint8_t) index_sparse[i]; index[i] = (uint8_t) index_sparse[i];
} }
index_p = index; index_p = index;
index_dtype = H5T_NATIVE_UINT8;
} else if (size_max < UINT16_MAX) { } else if (size_max < UINT16_MAX) {
dtype = H5T_NATIVE_UINT16;
uint16_t* index = CALLOC(size_ranked, uint16_t); uint16_t* index = CALLOC(size_ranked, uint16_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED; if (index == NULL) return TREXIO_ALLOCATION_FAILED;
for (int64_t i=0; i<size_ranked; ++i){ for (int64_t i=0; i<size_ranked; ++i){
index[i] = (uint16_t) index_sparse[i]; index[i] = (uint16_t) index_sparse[i];
} }
index_p = index; index_p = index;
index_dtype = H5T_NATIVE_UINT16;
} else { } else {
dtype = H5T_NATIVE_INT32;
index_p = (int32_t*) index_sparse; index_p = (int32_t*) index_sparse;
index_dtype = H5T_NATIVE_INT32;
} }
if ( H5LTfind_dataset(f->$group$_group, dset_name) != 1 ) { /* Store float values in double precision */
hid_t value_dtype = H5T_NATIVE_DOUBLE;
/* Arrays of chunk dims that will be used for chunking the dataset */
const hsize_t chunk_i_dims[1] = {size_ranked};
const hsize_t chunk_v_dims[1] = {size};
hid_t dspace = H5Screate_simple(h5_rank, chunk_i_dims, maxdims); /* Indices and values are stored as 2 independent datasets in the HDF5 file */
if (dspace < 0) return TREXIO_INVALID_ID; char dset_index_name[256] = "\0";
char dset_value_name[256] = "\0";
/* Build the names of the datasets */
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
hid_t prop = H5Pcreate(H5P_DATASET_CREATE); trexio_exit_code rc_write = TREXIO_FAILURE;
if (prop < 0) { /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
H5Sclose(dspace); if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
if (index_p != index_sparse) FREE(index_p); /* If the file does not exist -> create it and write */
return TREXIO_INVALID_ID;
}
herr_t status = H5Pset_chunk(prop, h5_rank, chunk_i_dims); /* Create chunked dataset with index_dtype datatype and write indices into it */
if (status < 0) { rc_write = trexio_hdf5_create_write_dset_sparse(f, f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
H5Sclose(dspace);
H5Pclose(prop);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
hid_t dset_id = H5Dcreate(f->$group$_group,
dset_name,
dtype,
dspace,
H5P_DEFAULT,
prop,
H5P_DEFAULT);
if (dset_id < 0) {
H5Sclose(dspace);
H5Pclose(prop);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
index_p);
H5Sclose(dspace);
H5Pclose(prop);
H5Dclose(dset_id);
if (index_p != index_sparse) FREE(index_p); if (index_p != index_sparse) FREE(index_p);
if (rc_write != TREXIO_SUCCESS) return rc_write;
if (status < 0) return TREXIO_FAILURE; /* Create chunked dataset with value_dtype datatype and write values into it */
rc_write = trexio_hdf5_create_write_dset_sparse(f, f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
// repeat the same for values; TODO : make a separate function ? if (rc_write != TREXIO_SUCCESS) return rc_write;
dspace = H5Screate_simple(h5_rank, chunk_v_dims, maxdims);
if (dspace < 0) return TREXIO_INVALID_ID;
prop = H5Pcreate(H5P_DATASET_CREATE);
if (prop < 0) {
H5Sclose(dspace);
return TREXIO_INVALID_ID;
}
status = H5Pset_chunk(prop, h5_rank, chunk_v_dims);
if (status < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
// same for values
strncpy(dset_name, $GROUP_DSET$_NAME "_values", 256);
dset_id = H5Dcreate(f->$group$_group,
dset_name,
H5T_NATIVE_DOUBLE,
dspace,
H5P_DEFAULT,
prop,
H5P_DEFAULT);
if (dset_id < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
H5T_NATIVE_DOUBLE,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
value_sparse);
H5Sclose(dspace);
H5Pclose(prop);
H5Dclose(dset_id);
if (status < 0) return TREXIO_FAILURE;
} else { } else {
/* If the file exists -> open it and write */
hid_t dset_id = H5Dopen(f->$group$_group, dset_name, H5P_DEFAULT);
if (dset_id <= 0) {
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
hid_t fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$}; hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
ddims[0] += chunk_i_dims[0];
// extend the dset size
herr_t status = H5Dset_extent(dset_id, ddims);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
// close and reopen the file dataspace to take into account the extension
H5Sclose(fspace);
fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
// select hyperslab to be written using chunk_dims and offset values
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_i, NULL, chunk_i_dims, NULL);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
// create memory dataspace to write from
hid_t dspace = H5Screate_simple(h5_rank, chunk_i_dims, NULL);
if (dspace < 0) {
H5Sclose(fspace);
H5Sclose(dspace);
H5Dclose(dset_id);
if (index_p != index_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype,
dspace, fspace, H5P_DEFAULT,
index_p);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Sclose(fspace);
if (index_p != index_sparse) FREE(index_p);
if (status < 0) return TREXIO_FAILURE;
// same for values
strncpy(dset_name, $GROUP_DSET$_NAME "_values", 256);
dset_id = H5Dopen(f->$group$_group, dset_name, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
hsize_t offset_v[1] = {(hsize_t) offset_file}; hsize_t offset_v[1] = {(hsize_t) offset_file};
// allocate space for the dimensions to be read /* Create chunked dataset with index_dtype datatype and write indices into it */
ddims[0] = (hsize_t) 0; rc_write = trexio_hdf5_open_write_dset_sparse(f, f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
if (index_p != index_sparse) FREE(index_p);
if (rc_write != TREXIO_SUCCESS) return rc_write;
// get the rank and dimensions of the dataset /* Create chunked dataset with value_dtype datatype and write values into it */
rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL); rc_write = trexio_hdf5_open_write_dset_sparse(f, f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
ddims[0] += chunk_v_dims[0]; if (rc_write != TREXIO_SUCCESS) return rc_write;
// extend the dset size
status = H5Dset_extent(dset_id, ddims);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// close and reopen the file dataspace to take into account the extension
H5Sclose(fspace);
fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// select hyperslab to be written using chunk_dims and offset values
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_v, NULL, chunk_v_dims, NULL);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// create memory dataspace to write from
dspace = H5Screate_simple(h5_rank, chunk_v_dims, NULL);
if (dspace < 0) {
H5Sclose(fspace);
H5Sclose(dspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
H5T_NATIVE_DOUBLE,
dspace, fspace, H5P_DEFAULT,
value_sparse);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Sclose(fspace);
if (status < 0) return TREXIO_FAILURE;
} }
@ -1230,9 +1038,143 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
} }
#+end_src #+end_src
** Helper functions
#+begin_src c :tangle helpers_hdf5.c
trexio_exit_code
trexio_hdf5_create_write_dset_sparse (trexio_hdf5_t* const file,
const hid_t group_id,
const char* dset_name,
const hid_t dtype_id,
const hsize_t* chunk_dims,
const void* data_sparse)
{
const int h5_rank = 1;
const hsize_t maxdims[1] = {H5S_UNLIMITED};
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
if (dspace < 0) return TREXIO_INVALID_ID;
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
if (prop < 0) {
H5Sclose(dspace);
return TREXIO_INVALID_ID;
}
herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
if (status < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
hid_t dset_id = H5Dcreate(group_id,
dset_name,
dtype_id,
dspace,
H5P_DEFAULT,
prop,
H5P_DEFAULT);
if (dset_id < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype_id,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
data_sparse);
H5Sclose(dspace);
H5Pclose(prop);
H5Dclose(dset_id);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_open_write_dset_sparse (trexio_hdf5_t* const file,
const hid_t group_id,
const char* dset_name,
const hid_t dtype_id,
const hsize_t* chunk_dims,
const hsize_t* offset_file,
const void* data_sparse)
{
const int h5_rank = 1;
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
hid_t fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
ddims[0] += chunk_dims[0];
// extend the dset size
herr_t status = H5Dset_extent(dset_id, ddims);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// close and reopen the file dataspace to take into account the extension
H5Sclose(fspace);
fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// select hyperslab to be written using chunk_dims and offset values
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// create memory dataspace to write from
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
if (dspace < 0) {
H5Sclose(fspace);
H5Sclose(dspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype_id,
dspace, fspace, H5P_DEFAULT,
data_sparse);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Sclose(fspace);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
}
#+end_src
* Constant file suffixes (not used by the generator) :noexport: * Constant file suffixes (not used by the generator) :noexport:
#+begin_src c :tangle suffix_hdf5.h #+begin_src c :tangle suffix_hdf5.h
trexio_exit_code trexio_hdf5_create_write_dset_sparse (trexio_hdf5_t* const file, const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse);
trexio_exit_code trexio_hdf5_open_write_dset_sparse (trexio_hdf5_t* const file, const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse);
#endif #endif
#+end_src #+end_src