HDF5 back end
Table of Contents
- 1. Template for HDF5 definitions
- 2. Template for HDF5 structures
- 3. Template for HDF5 init/deinit
- 4. Template for HDF5 has/read/write a numerical attribute
- 5. Template for HDF5 has/read/write a dataset of numerical data
- 6. Template for HDF5 has/read/write a dataset of sparse data
- 7. Template for HDF5 has/read/write a dataset of strings
- 8. Template for HDF5 has/read/write a string attribute
- 9. Template for HDF5 delete a group (UNSAFE mode)
- 10. Source code for the determinant part
- 11. Helper functions
1 Template for HDF5 definitions
#define $GROUP$_GROUP_NAME "$group$" #define $GROUP_NUM$_NAME "$group_num$" #define $GROUP_DSET$_NAME "$group_dset$" #define $GROUP_STR$_NAME "$group_str$"
2 Template for HDF5 structures
typedef struct trexio_hdf5_s { trexio_t parent ; hid_t file_id; hid_t $group$_group; } trexio_hdf5_t;
3 Template for HDF5 init/deinit
trexio_exit_code trexio_hdf5_inquire(const char* file_name) { /* H5Fis_hdf5 determines whether file is in HDF5 format */ htri_t rc = H5Fis_hdf5(file_name); if (rc > 0 ) { return TREXIO_SUCCESS; //exists and HDF5 } else if (rc == 0) { return TREXIO_FILE_ERROR; //exists but not HDF5 } else { return TREXIO_FAILURE; //does not exist or function fails } }
trexio_exit_code trexio_hdf5_init (trexio_t* const file) { trexio_hdf5_t* const f = (trexio_hdf5_t*) file; /* If file doesn't exist, create it */ int f_exists = 0; struct stat st; if (stat(file->file_name, &st) == 0) f_exists = 1; if (f_exists == 1) { switch (file->mode) { case 'r': // reading the existing file -> open as RDONLY f->file_id = H5Fopen(file->file_name, H5F_ACC_RDONLY, H5P_DEFAULT); break; case 'u': case 'w': // writing the existing file -> open as RDWRITE f->file_id = H5Fopen(file->file_name, H5F_ACC_RDWR, H5P_DEFAULT); break; } } else { switch (file->mode) { case 'r': // reading non-existing file -> error return TREXIO_FAILURE; case 'u': case 'w': // writing non-existing file -> create it f->file_id = H5Fcreate(file->file_name, H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT); break; } } /* Create or open groups in the hdf5 file assuming that they exist if file exists */ switch (file->mode) { case 'r': if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) > 0) f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) == 0) f->$group$_group = (hid_t) 0; break; case 'u': case 'w': if (f_exists == 1) { if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) > 0) f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (H5Lexists(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT) == 0) f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); } else { f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); } break; } if (f->$group$_group < (hid_t) 0) return TREXIO_INVALID_ID; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_deinit (trexio_t* const file) { trexio_hdf5_t* f = (trexio_hdf5_t*) file; if (f->$group$_group != (hid_t) 0) H5Gclose(f->$group$_group); f->$group$_group = 0; H5Fclose(f->file_id); f->file_id = 0; return TREXIO_SUCCESS; }
4 Template for HDF5 has/read/write a numerical attribute
trexio_exit_code trexio_hdf5_read_$group_num$ (trexio_t* const file, $group_num_dtype_double$* const num) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (num == NULL) return TREXIO_INVALID_ARG_2; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; /* Quit if the dimensioning attribute is missing in the file */ if (H5Aexists(f->$group$_group, $GROUP_NUM$_NAME) == 0) return TREXIO_FAILURE; /* Read the $group_num$ attribute of $group$ group */ const hid_t num_id = H5Aopen(f->$group$_group, $GROUP_NUM$_NAME, H5P_DEFAULT); if (num_id <= 0) return TREXIO_INVALID_ID; const herr_t status = H5Aread(num_id, H5T_$GROUP_NUM_H5_DTYPE$, num); H5Aclose(num_id); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_double$ num) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* const f = (trexio_hdf5_t*) file; /* Delete the attribute if it exists and if the file is open in UNSAFE mode */ if (trexio_hdf5_has_$group_num$(file) == TREXIO_SUCCESS && file->mode == 'u') { herr_t status_del = H5Adelete(f->$group$_group, $GROUP_NUM$_NAME); if (status_del < 0) return TREXIO_FAILURE; } /* Setup the dataspace */ const hid_t dtype_id = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); if (dtype_id <= 0) return TREXIO_INVALID_ID; const hid_t dspace_id = H5Screate(H5S_SCALAR); if (dspace_id <= 0) { H5Tclose(dtype_id); return TREXIO_INVALID_ID; } const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); if (num_id <= 0) { H5Sclose(dspace_id); H5Tclose(dtype_id); return TREXIO_INVALID_ID; } const herr_t status = H5Awrite(num_id, dtype_id, &num); H5Sclose(dspace_id); H5Aclose(num_id); H5Tclose(dtype_id); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_has_$group_num$ (trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; if (f->$group$_group == (hsize_t) 0) return TREXIO_HAS_NOT; htri_t status = H5Aexists(f->$group$_group, $GROUP_NUM$_NAME); /* H5Aexists returns positive value if attribute exists, 0 if does not, negative if error */ if (status > 0){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } }
5 Template for HDF5 has/read/write a dataset of numerical data
trexio_exit_code trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $group_dset$, const uint32_t rank, const uint64_t* dims) { if (file == NULL) return TREXIO_INVALID_ARG_1; if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; // open the dataset to get its dimensions hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; // allocate space for the dimensions to be read hsize_t* ddims = CALLOC( (int) rank, hsize_t); if (ddims == NULL) return TREXIO_FAILURE; // get the dataspace of the dataset hid_t dspace_id = H5Dget_space(dset_id); // get the rank and dimensions of the dataset int rrank = H5Sget_simple_extent_dims(dspace_id, ddims, NULL); // check that dimensions are consistent if (rrank != (int) rank) { FREE(ddims); H5Sclose(dspace_id); H5Dclose(dset_id); return TREXIO_INVALID_ARG_3; } for (uint32_t i=0; i<rank; ++i){ if (ddims[i] != dims[i]) { FREE(ddims); H5Sclose(dspace_id); H5Dclose(dset_id); return TREXIO_INVALID_ARG_4; } } FREE(ddims); H5Sclose(dspace_id); H5Dclose(dset_id); /* High-level H5LT API. No need to deal with dataspaces and datatypes */ herr_t status = H5LTread_dataset(f->$group$_group, $GROUP_DSET$_NAME, H5T_$GROUP_DSET_H5_DTYPE$, $group_dset$); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* $group_dset$, const uint32_t rank, const uint64_t* dims) { if (file == NULL) return TREXIO_INVALID_ARG_1; if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; trexio_hdf5_t* f = (trexio_hdf5_t*) file; /* Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, thus reducing the size of the HDF5 file. In practic, this is not always the case. Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. */ if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') { herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); if (status_del < 0) return TREXIO_FAILURE; } hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); if (dspace_id <= 0) return TREXIO_INVALID_ID; hid_t dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, H5T_$GROUP_DSET_H5_DTYPE$, dspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dset_id <= 0) { H5Sclose(dspace_id); return TREXIO_INVALID_ID; } herr_t status = H5Dwrite(dset_id, H5T_$GROUP_DSET_H5_DTYPE$, H5S_ALL, dspace_id, H5P_DEFAULT, $group_dset$); H5Dclose(dset_id); H5Sclose(dspace_id); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_has_$group_dset$ (trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; if (f->$group$_group == (hsize_t) 0) return TREXIO_HAS_NOT; herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME); /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ if (status == 1){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } }
6 Template for HDF5 has/read/write a dataset of sparse data
Sparse data is stored using extensible datasets of HDF5. Extensibility is required due to the fact that the sparse data will be written in chunks of user-defined size.
trexio_exit_code trexio_hdf5_write_$group_dset$ (trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; hid_t index_dtype; void* index_p = NULL; uint64_t size_ranked = (uint64_t) size * $group_dset_rank$; /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */ if (size_max < UINT8_MAX) { uint8_t* index = CALLOC(size_ranked, uint8_t); if (index == NULL) return TREXIO_ALLOCATION_FAILED; for (uint64_t i=0; i<size_ranked; ++i){ index[i] = (uint8_t) index_sparse[i]; } index_p = index; index_dtype = H5T_NATIVE_UINT8; } else if (size_max < UINT16_MAX) { uint16_t* index = CALLOC(size_ranked, uint16_t); if (index == NULL) return TREXIO_ALLOCATION_FAILED; for (uint64_t i=0; i<size_ranked; ++i){ index[i] = (uint16_t) index_sparse[i]; } index_p = index; index_dtype = H5T_NATIVE_UINT16; } else { index_p = (int32_t*) index_sparse; index_dtype = H5T_NATIVE_INT32; } /* Store float values in double precision */ hid_t value_dtype = H5T_NATIVE_DOUBLE; /* Arrays of chunk dims that will be used for chunking the dataset */ const hsize_t chunk_i_dims[1] = {size_ranked}; const hsize_t chunk_v_dims[1] = {size}; /* Indices and values are stored as 2 independent datasets in the HDF5 file */ char dset_index_name[256]; char dset_value_name[256]; /* Build the names of the datasets */ strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256); strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256); trexio_exit_code rc_write = TREXIO_FAILURE; /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */ if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) { /* If the file does not exist -> create it and write */ /* Create chunked dataset with index_dtype datatype and write indices into it */ rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p); if (index_p != index_sparse) FREE(index_p); if (rc_write != TREXIO_SUCCESS) return rc_write; /* Create chunked dataset with value_dtype datatype and write values into it */ rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse); if (rc_write != TREXIO_SUCCESS) return rc_write; } else { /* If the file exists -> open it and write */ hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$}; hsize_t offset_v[1] = {(hsize_t) offset_file}; /* Create chunked dataset with index_dtype datatype and write indices into it */ rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p); if (index_p != index_sparse) FREE(index_p); if (rc_write != TREXIO_SUCCESS) return rc_write; /* Create chunked dataset with value_dtype datatype and write values into it */ rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse); if (rc_write != TREXIO_SUCCESS) return rc_write; } return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_read_$group_dset$ (trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_read, double* const value_read) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; /* Indices and values are stored as 2 independent datasets in the HDF5 file */ char dset_index_name[256]; char dset_value_name[256]; /* Build the names of the datasets */ strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256); strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256); hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$}; hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$}; hsize_t offset_v[1] = {(hsize_t) offset_file}; hsize_t count_v[1] = {(hsize_t) size}; int is_index = 1, is_value = 0; trexio_exit_code rc_read; // attempt to read indices rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, $group_dset_rank$, offset_i, count_i, NULL, is_index, index_read); if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read; // attempt to read values // when EOF is encountered - the count_v[0] is modified and contains the number of elements being read rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, 1, offset_v, count_v, eof_read_size, is_value, value_read); if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read; return rc_read; }
trexio_exit_code trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (size_max == NULL) return TREXIO_INVALID_ARG_2; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; hid_t fspace_id = H5Dget_space(dset_id); if (fspace_id < 0) { H5Dclose(dset_id); return TREXIO_INVALID_ID; } // allocate space for the dimensions to be read hsize_t ddims[1] = {0}; // get the rank and dimensions of the dataset H5Sget_simple_extent_dims(fspace_id, ddims, NULL); H5Dclose(dset_id); H5Sclose(fspace_id); *size_max = (int64_t) ddims[0]; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_has_$group_dset$ (trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; if (f->$group$_group == (hsize_t) 0) return TREXIO_HAS_NOT; herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values"); /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ if (status == 1){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } }
7 Template for HDF5 has/read/write a dataset of strings
trexio_exit_code trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, const uint32_t rank, const uint64_t* dims, const uint32_t max_str_len) { if (file == NULL) return TREXIO_INVALID_ARG_1; if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; herr_t status; // open the dataset to get its dimensions hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; // allocate space for the dimensions to be read hsize_t* ddims = CALLOC( (int) rank, hsize_t); if (ddims == NULL) { H5Dclose(dset_id); return TREXIO_ALLOCATION_FAILED; } hid_t dspace = H5Dget_space(dset_id); if (dset_id <= 0) { FREE(ddims); H5Dclose(dset_id); return TREXIO_INVALID_ID; } // get the rank of the dataset in a file int rrank = H5Sget_simple_extent_dims(dspace, ddims, NULL); if (rrank != (int) rank) { FREE(ddims); H5Dclose(dset_id); H5Sclose(dspace); return TREXIO_INVALID_ARG_3; } for (int i=0; i<rrank; i++) { if (ddims[i] != dims[i]) { H5Dclose(dset_id); H5Sclose(dspace); FREE(ddims); return TREXIO_INVALID_ARG_4; } } FREE(ddims); hid_t memtype = H5Tcopy (H5T_C_S1); status = H5Tset_size(memtype, H5T_VARIABLE); if (status < 0 || memtype <= 0) { H5Dclose(dset_id); H5Sclose(dspace); return TREXIO_FAILURE; } char** rdata = CALLOC(dims[0], char*); if (rdata == NULL) { H5Dclose(dset_id); H5Sclose(dspace); H5Tclose(memtype); return TREXIO_ALLOCATION_FAILED; } status = H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata); if (status < 0) { FREE(rdata); H5Dclose(dset_id); H5Sclose(dspace); H5Tclose(memtype); return TREXIO_FAILURE; } // copy contents of temporary rdata buffer into the group_dset otherwise they are lost // after calling H5Treclaim or H5Dvlen_reclaim functions strcpy($group_dset$, ""); for (uint64_t i=0; i<dims[0]; i++) { strncat($group_dset$, rdata[i], max_str_len); strcat($group_dset$, TREXIO_DELIM); } // H5Dvlen_reclaim is deprecated and replaced by H5Treclaim in HDF5 v.1.12.0 #if (H5_VERS_MAJOR <= 1 && H5_VERS_MINOR < 12) status = H5Dvlen_reclaim(memtype, dspace, H5P_DEFAULT, rdata); #else status = H5Treclaim(memtype, dspace, H5P_DEFAULT, rdata); #endif if (status < 0) { FREE(rdata); H5Dclose(dset_id); H5Sclose(dspace); H5Tclose(memtype); return TREXIO_FAILURE; } FREE(rdata); H5Dclose(dset_id); H5Sclose(dspace); H5Tclose(memtype); return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$, const uint32_t rank, const uint64_t* dims) { if (file == NULL) return TREXIO_INVALID_ARG_1; if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; trexio_hdf5_t* f = (trexio_hdf5_t*) file; /* Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, thus reducing the size of the HDF5 file. In practic, this is not always the case. Consider using HDF5-provided h5repack utility after deleting/overwriting big datasets. */ if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') { herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); if (status_del < 0) return TREXIO_FAILURE; } herr_t status; hid_t dset_id; /* we are going to write variable-length strings */ hid_t memtype = H5Tcopy (H5T_C_S1); if (memtype <= 0) return TREXIO_INVALID_ID; status = H5Tset_size (memtype, H5T_VARIABLE); if (status < 0) return TREXIO_FAILURE; hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); if (dspace <= 0) return TREXIO_INVALID_ID; /* code to create dataset */ hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); if (filetype <= 0) return TREXIO_INVALID_ID; status = H5Tset_size (filetype, H5T_VARIABLE); if (status < 0) return TREXIO_FAILURE; dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; status = H5Dwrite (dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, $group_dset$); H5Dclose (dset_id); H5Sclose (dspace); H5Tclose (filetype); H5Tclose (memtype); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_has_$group_dset$ (trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; if (f->$group$_group == (hsize_t) 0) return TREXIO_HAS_NOT; herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME); /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ if (status == 1){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } }
8 Template for HDF5 has/read/write a string attribute
trexio_exit_code trexio_hdf5_read_$group_str$ (trexio_t* const file, char* const str, const uint32_t max_str_len) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (str == NULL) return TREXIO_INVALID_ARG_2; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; /* Quit if the string attribute is missing in the file */ if (H5Aexists(f->$group$_group, $GROUP_STR$_NAME) == 0) return TREXIO_HAS_NOT; /* Read the $group_str$ attribute of $group$ group */ const hid_t str_id = H5Aopen(f->$group$_group, $GROUP_STR$_NAME, H5P_DEFAULT); if (str_id <= 0) return TREXIO_INVALID_ID; const hid_t ftype_id = H5Aget_type(str_id); if (ftype_id <= 0) return TREXIO_INVALID_ID; uint64_t sdim = H5Tget_size(ftype_id); sdim++; /* Make room for null terminator */ const hid_t mem_id = H5Tcopy(H5T_C_S1); if (mem_id <= 0) return TREXIO_INVALID_ID; herr_t status; status = (max_str_len+1) > sdim ? H5Tset_size(mem_id, sdim) : H5Tset_size(mem_id, max_str_len+1) ; if (status < 0) return TREXIO_FAILURE; status = H5Aread(str_id, mem_id, str); if (status < 0) return TREXIO_FAILURE; H5Aclose(str_id); H5Tclose(mem_id); H5Tclose(ftype_id); return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (str == NULL) return TREXIO_INVALID_ARG_2; trexio_hdf5_t* const f = (trexio_hdf5_t*) file; /* Delete the attribute if it exists and if the file is open in UNSAFE mode */ if (trexio_hdf5_has_$group_str$(file) == TREXIO_SUCCESS && file->mode == 'u') { herr_t status_del = H5Adelete(f->$group$_group, $GROUP_STR$_NAME); if (status_del < 0) return TREXIO_FAILURE; } /* Setup the datatype for variable length string */ const hid_t dtype_id = H5Tcopy(H5T_C_S1); if (dtype_id <= 0) return TREXIO_INVALID_ID; size_t str_attr_len = strlen(str) + 1; herr_t status; status = H5Tset_size(dtype_id, str_attr_len); if (status < 0) return TREXIO_FAILURE; status = H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); if (status < 0) return TREXIO_FAILURE; /* Setup the dataspace */ const hid_t dspace_id = H5Screate(H5S_SCALAR); if (dspace_id <= 0) return TREXIO_INVALID_ID; /* Create the $group_str$ attribute of $group$ group */ const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); if (str_id <= 0) { H5Sclose(dspace_id); H5Tclose(dtype_id); return TREXIO_INVALID_ID; } status = H5Awrite(str_id, dtype_id, str); H5Aclose(str_id); H5Sclose(dspace_id); H5Tclose(dtype_id); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_has_$group_str$ (trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; if (f->$group$_group == (hsize_t) 0) return TREXIO_HAS_NOT; htri_t status = H5Aexists(f->$group$_group, $GROUP_STR$_NAME); /* H5Aexists returns positive value if attribute exists, 0 if does not, negative if error */ if (status > 0){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } }
9 Template for HDF5 delete a group (UNSAFE mode)
Note: in early versions of the HDF5 library (v < 1.10) unlinking an object was not working as expected and the associated memory was not necessarily freed (see this StackOverflow discussion for example). Nevertheless, some space might remain occupied even after deleting the associated object in recent version. To take the best use of the deleted file space, we recommend to write the deleted group within the same session (i.e. before closing the TREXIO file).
In principle, one can use HDF5-provided h5repack
binary, which copies all existing objects from one file into another.
Thus, any corrupted/lost file space will remain in the first file. The use of h5repack
is highly encouraged.
trexio_exit_code trexio_hdf5_delete_$group$ (trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; // delete the link to the existing group: this should free the associated space H5Gclose(f->$group$_group); f->$group$_group = 0; herr_t status = H5Ldelete(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); if (status < 0) return TREXIO_FAILURE; // re-create the group (with the new link ?) f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (f->$group$_group <= 0L) return TREXIO_INVALID_ID; return TREXIO_SUCCESS; }
10 Source code for the determinant part
Each array is stored in a separate HDF5 dataset due to the fact that determinant I/O has to be decoupled.
Chunks are used to read/write the data to prevent memory overflow. Chunks have a given int64_t dims[0]*dims[1]
.
Size specifies the number of data items (e.g. determinants) to process.
trexio_exit_code trexio_hdf5_read_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, int64_t* const list) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; if (list == NULL) return TREXIO_INVALID_ARG_6; const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; char dset_det_name[256] = "determinant_list"; hsize_t offset[1] = {(hsize_t) offset_file * dims[1]}; hsize_t count[1] = {(hsize_t) dims[0] * dims[1]}; /* Attempt to read determinants (if EOF -> eof_read_size is modified with the number of elements read and return code is TREXIO_END) 0 argument below is requires to skip internal treatment specific to sparse indices (i.e. their de-compression).*/ return trexio_hdf5_open_read_dset_sparse(f->determinant_group, dset_det_name, (uint32_t) dims[1], offset, count, eof_read_size, 0, list); } trexio_exit_code trexio_hdf5_read_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, double* const coeff) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5; if (coeff == NULL) return TREXIO_INVALID_ARG_6; char dset_coeff_name[128]; memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); const int32_t trexio_state = file->state; if (trexio_state != 0) { sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); } else { strncpy(dset_coeff_name, "determinant_coefficient", 24); } const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; hsize_t offset[1] = {(hsize_t) offset_file}; hsize_t count[1] = {(hsize_t) dims[0]}; /* Attempt to read determinants (if EOF -> eof_read_size is modified with the number of elements read and return code is TREXIO_END) 0 argument below is requires to skip internal treatment specific to sparse indices (i.e. their de-compression).*/ return trexio_hdf5_open_read_dset_sparse(f->determinant_group, dset_coeff_name, 1, offset, count, eof_read_size, 0, coeff); }
trexio_exit_code trexio_hdf5_write_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const int64_t* list) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (list == NULL) return TREXIO_INVALID_ARG_5; trexio_hdf5_t* f = (trexio_hdf5_t*) file; hid_t det_dtype = H5T_NATIVE_INT64; uint64_t size_ranked = dims[1]*dims[0]; /* Arrays of chunk dims that will be used for chunking the dataset */ const hsize_t chunk_dims[1] = {size_ranked}; /* Indices and values are stored as 2 independent datasets in the HDF5 file */ char dset_det_name[256] = "determinant_list"; trexio_exit_code rc_write = TREXIO_FAILURE; /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */ if ( H5LTfind_dataset(f->determinant_group, dset_det_name) != 1 ) { /* If the file does not exist -> create it and write */ /* Create chunked dataset with det_dtype datatype and write indices into it */ rc_write = trexio_hdf5_create_write_dset_sparse(f->determinant_group, dset_det_name, det_dtype, chunk_dims, list); if (rc_write != TREXIO_SUCCESS) return rc_write; } else { /* If the file exists -> open it and write */ hsize_t offset_data[1] = {(hsize_t) offset_file * dims[1]}; /* Create chunked dataset with det_dtype datatype and write indices into it */ rc_write = trexio_hdf5_open_write_dset_sparse(f->determinant_group, dset_det_name, det_dtype, chunk_dims, offset_data, list); if (rc_write != TREXIO_SUCCESS) return rc_write; } return TREXIO_SUCCESS; } trexio_exit_code trexio_hdf5_write_determinant_coefficient(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const double* coeff) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (coeff == NULL) return TREXIO_INVALID_ARG_5; char dset_coeff_name[128]; memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); const int32_t trexio_state = file->state; if (trexio_state != 0) { sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); } else { strncpy(dset_coeff_name, "determinant_coefficient", 24); } trexio_hdf5_t* f = (trexio_hdf5_t*) file; hid_t det_dtype = H5T_NATIVE_DOUBLE; /* Arrays of chunk dims that will be used for chunking the dataset */ const hsize_t chunk_dims[1] = {(hsize_t) dims[0]}; trexio_exit_code rc_write = TREXIO_FAILURE; /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */ if ( H5LTfind_dataset(f->determinant_group, dset_coeff_name) != 1 ) { /* If the file does not exist -> create it and write */ /* Create chunked dataset with det_dtype datatype and write indices into it */ rc_write = trexio_hdf5_create_write_dset_sparse(f->determinant_group, dset_coeff_name, det_dtype, chunk_dims, coeff); if (rc_write != TREXIO_SUCCESS) return rc_write; } else { /* If the file exists -> open it and write */ hsize_t offset_data[1] = {(hsize_t) offset_file}; /* Create chunked dataset with det_dtype datatype and write indices into it */ rc_write = trexio_hdf5_open_write_dset_sparse(f->determinant_group, dset_coeff_name, det_dtype, chunk_dims, offset_data, coeff); if (rc_write != TREXIO_SUCCESS) return rc_write; } return TREXIO_SUCCESS; } trexio_exit_code trexio_hdf5_read_determinant_coefficient_size (trexio_t* const file, int64_t* const size_max) { if (file == NULL) return TREXIO_INVALID_ARG_1; if (size_max == NULL) return TREXIO_INVALID_ARG_2; char dset_coeff_name[128]; memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); const int32_t trexio_state = file->state; if (trexio_state != 0) { sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); } else { strncpy(dset_coeff_name, "determinant_coefficient", 24); } const trexio_hdf5_t* f = (const trexio_hdf5_t*) file; hid_t dset_id = H5Dopen(f->determinant_group, dset_coeff_name, H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; hid_t fspace_id = H5Dget_space(dset_id); if (fspace_id < 0) { H5Dclose(dset_id); return TREXIO_INVALID_ID; } // allocate space for the dimensions to be read hsize_t ddims[1] = {0}; // get the rank and dimensions of the dataset H5Sget_simple_extent_dims(fspace_id, ddims, NULL); H5Dclose(dset_id); H5Sclose(fspace_id); *size_max = (int64_t) ddims[0]; return TREXIO_SUCCESS; }
trexio_exit_code trexio_hdf5_has_determinant_list(trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; if (f->determinant_group == (hsize_t) 0) return TREXIO_HAS_NOT; herr_t status = H5LTfind_dataset(f->determinant_group, "determinant_list"); /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ if (status == 1){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } } trexio_exit_code trexio_hdf5_has_determinant_coefficient(trexio_t* const file) { if (file == NULL) return TREXIO_INVALID_ARG_1; trexio_hdf5_t* f = (trexio_hdf5_t*) file; if (f->determinant_group == (hsize_t) 0) return TREXIO_HAS_NOT; char dset_coeff_name[128]; memset(dset_coeff_name, 0, sizeof(dset_coeff_name)); const int32_t trexio_state = file->state; if (trexio_state != 0) { sprintf(dset_coeff_name, "determinant_coefficient_state_%" PRId32, trexio_state); } else { strncpy(dset_coeff_name, "determinant_coefficient", 24); } herr_t status = H5LTfind_dataset(f->determinant_group, dset_coeff_name); /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */ if (status == 1){ return TREXIO_SUCCESS; } else if (status == 0) { return TREXIO_HAS_NOT; } else { return TREXIO_FAILURE; } }
11 Helper functions
trexio_exit_code trexio_hdf5_create_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse) { const int h5_rank = 1; const hsize_t maxdims[1] = {H5S_UNLIMITED}; hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims); if (dspace < 0) return TREXIO_INVALID_ID; hid_t prop = H5Pcreate(H5P_DATASET_CREATE); if (prop < 0) { H5Sclose(dspace); return TREXIO_INVALID_ID; } herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims); if (status < 0) { H5Sclose(dspace); H5Pclose(prop); return TREXIO_INVALID_ID; } hid_t dset_id = H5Dcreate(group_id, dset_name, dtype_id, dspace, H5P_DEFAULT, prop, H5P_DEFAULT); if (dset_id < 0) { H5Sclose(dspace); H5Pclose(prop); return TREXIO_INVALID_ID; } status = H5Dwrite(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, data_sparse); H5Sclose(dspace); H5Pclose(prop); H5Dclose(dset_id); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; } trexio_exit_code trexio_hdf5_open_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse) { const int h5_rank = 1; hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; hid_t fspace = H5Dget_space(dset_id); if (fspace < 0) { H5Dclose(dset_id); return TREXIO_INVALID_ID; } // allocate space for the dimensions to be read hsize_t ddims[1] = {0}; // get the rank and dimensions of the dataset int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL); if (rrank != h5_rank) { H5Sclose(fspace); H5Dclose(dset_id); return TREXIO_FAILURE; } ddims[0] += chunk_dims[0]; // extend the dset size herr_t status = H5Dset_extent(dset_id, ddims); if (status < 0) { H5Sclose(fspace); H5Dclose(dset_id); return TREXIO_INVALID_ID; } // close and reopen the file dataspace to take into account the extension H5Sclose(fspace); fspace = H5Dget_space(dset_id); if (fspace < 0) { H5Dclose(dset_id); return TREXIO_INVALID_ID; } // select hyperslab to be written using chunk_dims and offset values status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL); if (status < 0) { H5Sclose(fspace); H5Dclose(dset_id); return TREXIO_INVALID_ID; } // create memory dataspace to write from hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL); if (dspace < 0) { H5Sclose(fspace); H5Sclose(dspace); H5Dclose(dset_id); return TREXIO_INVALID_ID; } status = H5Dwrite(dset_id, dtype_id, dspace, fspace, H5P_DEFAULT, data_sparse); H5Dclose(dset_id); H5Sclose(dspace); H5Sclose(fspace); if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; } trexio_exit_code trexio_hdf5_open_read_dset_sparse (const hid_t group_id, const char* dset_name, const uint32_t dset_rank, const hsize_t* offset_file, hsize_t* const size_read, int64_t* const eof_read_size, const int is_index, void* const data_sparse ) { const int h5_rank = 1; if (dset_rank == 0) return TREXIO_INVALID_ARG_3; // get the dataset handle hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT); if (dset_id <= 0) return TREXIO_INVALID_ID; // get the dataspace of the dataset hid_t fspace_id = H5Dget_space(dset_id); if (fspace_id < 0) { H5Dclose(dset_id); return TREXIO_INVALID_ID; } /* get dims of the dset stored in the file to check whether reading with user-provided chunk size will reach end of the dataset (i.e. EOF in TEXT back end) */ hsize_t ddims[1] = {0}; int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL); if (rrank != h5_rank) { H5Sclose(fspace_id); H5Dclose(dset_id); return TREXIO_FAILURE; } hsize_t max_offset = offset_file[0] + size_read[0]; int is_EOF = 0; // if max_offset exceed current dim of the dset => EOF if (max_offset > ddims[0]) { is_EOF = 1; // lower the value of count to reduce the number of elements which will be read size_read[0] -= (max_offset - ddims[0]); // modified the value of eof_read_size passed by address if (eof_read_size != NULL) *eof_read_size = size_read[0]/dset_rank; } // special case when reading int indices uint64_t size_ranked = (uint64_t) size_read[0]; void* index_p = NULL; // read the datatype from the dataset and compare with the pre-defined values hid_t dtype = H5Dget_type(dset_id); if (is_index == 1) { if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) { uint8_t* index = CALLOC(size_ranked, uint8_t); if (index == NULL) return TREXIO_ALLOCATION_FAILED; index_p = index; } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) { uint16_t* index = CALLOC(size_ranked, uint16_t); if (index == NULL) return TREXIO_ALLOCATION_FAILED; index_p = index; } else { index_p = data_sparse; } } herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL); if (status < 0) { H5Sclose(fspace_id); H5Dclose(dset_id); if (index_p != data_sparse) FREE(index_p); return TREXIO_INVALID_ID; } hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL); if (memspace_id < 0) { H5Sclose(fspace_id); H5Dclose(dset_id); if (index_p != data_sparse) FREE(index_p); return TREXIO_INVALID_ID; } if (is_index == 1) { status = H5Dread(dset_id, dtype, memspace_id, fspace_id, H5P_DEFAULT, index_p); } else { status = H5Dread(dset_id, dtype, memspace_id, fspace_id, H5P_DEFAULT, data_sparse); } H5Sclose(fspace_id); H5Sclose(memspace_id); H5Dclose(dset_id); if (status < 0) { if (index_p != data_sparse) FREE(index_p); return TREXIO_FAILURE; } if (is_index == 1) { if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) { uint8_t* index = (uint8_t*) index_p; for (uint64_t i=0; i<size_ranked; ++i){ ((int32_t*)data_sparse)[i] = (int32_t) index[i]; } FREE(index_p); } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) { uint16_t* index = (uint16_t*) index_p; for (uint64_t i=0; i<size_ranked; ++i){ ((int32_t*)data_sparse)[i] = (int32_t) index[i]; } FREE(index_p); } } if (is_EOF == 1) return TREXIO_END; return TREXIO_SUCCESS; }