UP | HOME

HDF5 back end

Table of Contents

1 Template for HDF5 definitions

#define $GROUP$_GROUP_NAME          "$group$"
#define $GROUP_NUM$_NAME            "$group_num$"
#define $GROUP_DSET$_NAME           "$group_dset$"
#define $GROUP_STR$_NAME            "$group_str$"

2 Template for HDF5 structures

typedef struct trexio_hdf5_s {
  trexio_t     parent ;
  hid_t        file_id;
  hid_t        $group$_group;
} trexio_hdf5_t;

3 Template for HDF5 init/deinit

trexio_exit_code
trexio_hdf5_inquire(const char* file_name)
{
  /* H5Fis_hdf5 determines whether file is in HDF5 format */
  htri_t rc = H5Fis_hdf5(file_name);
  if (rc > 0 ) {
    return TREXIO_SUCCESS;    //exists and HDF5
  } else if (rc == 0) {
    return TREXIO_FILE_ERROR; //exists but not HDF5
  } else {
    return TREXIO_FAILURE;    //does not exist or function fails
  }
}
trexio_exit_code
trexio_hdf5_init (trexio_t* const file)
{

  trexio_hdf5_t* const f = (trexio_hdf5_t*) file;

  /* If file doesn't exist, create it */
  int f_exists = 0;
  struct stat st;

  if (stat(file->file_name, &st) == 0) f_exists = 1;

  if (f_exists == 1) {

    switch (file->mode) {
    case 'r':
      // reading the existing file -> open as RDONLY
      f->file_id = H5Fopen(file->file_name, H5F_ACC_RDONLY, H5P_DEFAULT);
      break;
    case 'u':
    case 'w':
      // writing the existing file -> open as RDWRITE
      f->file_id = H5Fopen(file->file_name, H5F_ACC_RDWR, H5P_DEFAULT);
      break;
    }

  } else {

    switch (file->mode) {
    case 'r':
      // reading non-existing file -> error
      return TREXIO_FAILURE;
    case 'u':
    case 'w':
      // writing non-existing file -> create it
      f->file_id = H5Fcreate(file->file_name, H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT);
      break;
    }

  }

  /* Create or open groups in the hdf5 file assuming that they exist if file exists */
  switch (file->mode) {
    case 'r':
      f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT);
      break;
    case 'u':
    case 'w':
      if (f_exists == 1) {
        f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT);
      } else {
        f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
      }
      break;
  }
  if (f->$group$_group <= 0L) return TREXIO_INVALID_ID;

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_deinit (trexio_t* const file)
{

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  H5Gclose(f->$group$_group);
  f->$group$_group = 0;

  H5Fclose(f->file_id);
  f->file_id = 0;

  return TREXIO_SUCCESS;

}

4 Template for HDF5 has/read/write a numerical attribute

trexio_exit_code
trexio_hdf5_read_$group_num$ (trexio_t* const file, $group_num_dtype_double$* const num)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (num  == NULL) return TREXIO_INVALID_ARG_2;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
  /* Quit if the dimensioning attribute is missing in the file */
  if (H5Aexists(f->$group$_group, $GROUP_NUM$_NAME) == 0) return TREXIO_FAILURE;

  /* Read the $group_num$ attribute of $group$ group */
  const hid_t num_id = H5Aopen(f->$group$_group, $GROUP_NUM$_NAME, H5P_DEFAULT);
  if (num_id <= 0) return TREXIO_INVALID_ID;

  const herr_t status = H5Aread(num_id, H5T_$GROUP_NUM_H5_DTYPE$, num);

  H5Aclose(num_id);

  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_double$ num)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_hdf5_t* const f = (trexio_hdf5_t*) file;

  /* Delete the attribute if it exists and if the file is open in UNSAFE mode */
  if (trexio_hdf5_has_$group_num$(file) == TREXIO_SUCCESS && file->mode == 'u') {
    herr_t status_del = H5Adelete(f->$group$_group, $GROUP_NUM$_NAME);
    if (status_del < 0) return TREXIO_FAILURE;
  }

  /* Setup the dataspace */
  const hid_t dtype_id = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$);
  if (dtype_id <= 0) return TREXIO_INVALID_ID;

  const hid_t dspace_id = H5Screate(H5S_SCALAR);
  if (dspace_id <= 0) {
    H5Tclose(dtype_id);
    return TREXIO_INVALID_ID;
  }

  const hid_t num_id = H5Acreate(f->$group$_group,
                                 $GROUP_NUM$_NAME,
                                 dtype_id, dspace_id,
                                 H5P_DEFAULT, H5P_DEFAULT);
  if (num_id <= 0) {
    H5Sclose(dspace_id);
    H5Tclose(dtype_id);
    return TREXIO_INVALID_ID;
  }

  const herr_t status = H5Awrite(num_id, dtype_id, &num);

  H5Sclose(dspace_id);
  H5Aclose(num_id);
  H5Tclose(dtype_id);
  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_has_$group_num$ (trexio_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

  htri_t status = H5Aexists(f->$group$_group, $GROUP_NUM$_NAME);
  /* H5Aexists returns positive value if attribute exists, 0 if does not, negative if error */
  if (status > 0){
    return TREXIO_SUCCESS;
  } else if (status == 0) {
    return TREXIO_HAS_NOT;
  } else {
    return TREXIO_FAILURE;
  }

}

5 Template for HDF5 has/read/write a dataset of numerical data

trexio_exit_code
trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $group_dset$, const uint32_t rank, const uint64_t* dims)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

  // open the dataset to get its dimensions
  hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
  if (dset_id <= 0) return TREXIO_INVALID_ID;

  // allocate space for the dimensions to be read
  hsize_t* ddims = CALLOC( (int) rank, hsize_t);
  if (ddims == NULL) return TREXIO_FAILURE;

  // get the dataspace of the dataset
  hid_t dspace_id = H5Dget_space(dset_id);
  // get the rank and dimensions of the dataset
  int rrank = H5Sget_simple_extent_dims(dspace_id, ddims, NULL);
  // check that dimensions are consistent
  if (rrank != (int) rank) {
    FREE(ddims);
    H5Sclose(dspace_id);
    H5Dclose(dset_id);
    return TREXIO_INVALID_ARG_3;
  }

  for (uint32_t i=0; i<rank; ++i){
    if (ddims[i] != dims[i]) {
      FREE(ddims);
      H5Sclose(dspace_id);
      H5Dclose(dset_id);
      return TREXIO_INVALID_ARG_4;
    }
  }

  FREE(ddims);
  H5Sclose(dspace_id);
  H5Dclose(dset_id);

  /* High-level H5LT API. No need to deal with dataspaces and datatypes */
  herr_t status = H5LTread_dataset(f->$group$_group,
                                   $GROUP_DSET$_NAME,
                                   H5T_$GROUP_DSET_H5_DTYPE$,
                                   $group_dset$);
  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* $group_dset$, const uint32_t rank, const uint64_t* dims)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  /*
     Try to delete an existing dataset by unlinking it from the group (UNSAFE mode).
     NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it,
     thus reducing the size of the HDF5 file. In practic, this is not always the case.

     Consider using HDF5-native h5repack utility after deleting/overwriting big datasets.
  */
  if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') {
    herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
    if (status_del < 0) return TREXIO_FAILURE;
  }

  hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL);
  if (dspace_id <= 0) return TREXIO_INVALID_ID;

  hid_t dset_id = H5Dcreate (f->$group$_group,
                             $GROUP_DSET$_NAME,
                             H5T_$GROUP_DSET_H5_DTYPE$,
                             dspace_id,
                             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  if (dset_id <= 0) {
    H5Sclose(dspace_id);
    return TREXIO_INVALID_ID;
  }

  herr_t status = H5Dwrite(dset_id,
                           H5T_$GROUP_DSET_H5_DTYPE$,
                           H5S_ALL,
                           dspace_id,
                           H5P_DEFAULT,
                           $group_dset$);
  H5Dclose(dset_id);
  H5Sclose(dspace_id);
  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME);
  /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
  if (status == 1){
    return TREXIO_SUCCESS;
  } else if (status == 0) {
    return TREXIO_HAS_NOT;
  } else {
    return TREXIO_FAILURE;
  }

}

6 Template for HDF5 has/read/write a dataset of sparse data

Sparse data is stored using extensible datasets of HDF5. Extensibility is required due to the fact that the sparse data will be written in chunks of user-defined size.

trexio_exit_code
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
                                const int64_t offset_file,
                                const int64_t size,
                                const int64_t size_max,
                                const int32_t* index_sparse,
                                const double* value_sparse)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  hid_t index_dtype;
  void* index_p = NULL;
  uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
  /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
  if (size_max < UINT8_MAX) {
    uint8_t* index = CALLOC(size_ranked, uint8_t);
    if (index == NULL) return TREXIO_ALLOCATION_FAILED;
    for (uint64_t i=0; i<size_ranked; ++i){
      index[i] = (uint8_t) index_sparse[i];
    }
    index_p = index;
    index_dtype = H5T_NATIVE_UINT8;
  } else if (size_max < UINT16_MAX) {
    uint16_t* index = CALLOC(size_ranked, uint16_t);
    if (index == NULL) return TREXIO_ALLOCATION_FAILED;
    for (uint64_t i=0; i<size_ranked; ++i){
      index[i] = (uint16_t) index_sparse[i];
    }
    index_p = index;
    index_dtype = H5T_NATIVE_UINT16;
  } else {
    index_p = (int32_t*) index_sparse;
    index_dtype = H5T_NATIVE_INT32;
  }

  /* Store float values in double precision */
  hid_t value_dtype = H5T_NATIVE_DOUBLE;
  /* Arrays of chunk dims that will be used for chunking the dataset */
  const hsize_t chunk_i_dims[1] = {size_ranked};
  const hsize_t chunk_v_dims[1] = {size};

  /* Indices and values are stored as 2 independent datasets in the HDF5 file */
  char dset_index_name[256];
  char dset_value_name[256];
  /* Build the names of the datasets */
  strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
  strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);

  trexio_exit_code rc_write = TREXIO_FAILURE;
  /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
  if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
  /* If the file does not exist -> create it and write */

  /* Create chunked dataset with index_dtype datatype and write indices into it */
    rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
    if (index_p != index_sparse) FREE(index_p);
    if (rc_write != TREXIO_SUCCESS) return rc_write;

  /* Create chunked dataset with value_dtype datatype and write values into it */
    rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
    if (rc_write != TREXIO_SUCCESS) return rc_write;

  } else {
  /* If the file exists -> open it and write */
    hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
    hsize_t offset_v[1] = {(hsize_t) offset_file};

  /* Create chunked dataset with index_dtype datatype and write indices into it */
    rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
    if (index_p != index_sparse) FREE(index_p);
    if (rc_write != TREXIO_SUCCESS) return rc_write;

  /* Create chunked dataset with value_dtype datatype and write values into it */
    rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
    if (rc_write != TREXIO_SUCCESS) return rc_write;

  }

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
                               const int64_t offset_file,
                               const int64_t size,
                               const int64_t size_max,
                               int64_t* const eof_read_size,
                               int32_t* const index_read,
                               double* const value_read)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

  /* Indices and values are stored as 2 independent datasets in the HDF5 file */
  char dset_index_name[256];
  char dset_value_name[256];
  /* Build the names of the datasets */
  strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
  strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);

  hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
  hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$};

  hsize_t offset_v[1] = {(hsize_t) offset_file};
  hsize_t count_v[1] = {(hsize_t) size};

  int is_index = 1, is_value = 0;
  trexio_exit_code rc_read;

  // attempt to read indices
  rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read);
  if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
  // attempt to read values
  // when EOF is encountered - the count_v[0] is modified and contains the number of elements being read
  rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read);
  if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;

  return rc_read;
}
trexio_exit_code
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

  hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT);
  if (dset_id <= 0) return TREXIO_INVALID_ID;

  hid_t fspace_id = H5Dget_space(dset_id);
  if (fspace_id < 0) {
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  // allocate space for the dimensions to be read
  hsize_t ddims[1] = {0};

  // get the rank and dimensions of the dataset
  H5Sget_simple_extent_dims(fspace_id, ddims, NULL);

  H5Dclose(dset_id);
  H5Sclose(fspace_id);

  *size_max = (int64_t) ddims[0];

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values");
  /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
  if (status == 1){
    return TREXIO_SUCCESS;
  } else if (status == 0) {
    return TREXIO_HAS_NOT;
  } else {
    return TREXIO_FAILURE;
  }

}

7 Template for HDF5 has/read/write a dataset of strings

trexio_exit_code
trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, const uint32_t rank, const uint64_t* dims, const uint32_t max_str_len)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if ($group_dset$  == NULL) return TREXIO_INVALID_ARG_2;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

  herr_t status;

  // open the dataset to get its dimensions
  hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
  if (dset_id <= 0) return TREXIO_INVALID_ID;

  // allocate space for the dimensions to be read
  hsize_t* ddims = CALLOC( (int) rank, hsize_t);
  if (ddims == NULL) {
    H5Dclose(dset_id);
    return TREXIO_ALLOCATION_FAILED;
  }

  hid_t dspace = H5Dget_space(dset_id);
  if (dset_id <= 0) {
    FREE(ddims);
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  // get the rank of the dataset in a file
  int rrank = H5Sget_simple_extent_dims(dspace, ddims, NULL);
  if (rrank != (int) rank) {
    FREE(ddims);
    H5Dclose(dset_id);
    H5Sclose(dspace);
    return TREXIO_INVALID_ARG_3;
  }

  for (int i=0; i<rrank; i++) {
    if (ddims[i] != dims[i]) {
      H5Dclose(dset_id);
      H5Sclose(dspace);
      FREE(ddims);
      return TREXIO_INVALID_ARG_4;
    }
  }
  FREE(ddims);

  hid_t memtype = H5Tcopy (H5T_C_S1);
  status = H5Tset_size(memtype, H5T_VARIABLE);
  if (status < 0 || memtype <= 0) {
    H5Dclose(dset_id);
    H5Sclose(dspace);
    return TREXIO_FAILURE;
  }

  char** rdata = CALLOC(dims[0], char*);
  if (rdata == NULL) {
    H5Dclose(dset_id);
    H5Sclose(dspace);
    H5Tclose(memtype);
    return TREXIO_ALLOCATION_FAILED;
  }

  status = H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
  if (status < 0) {
    FREE(rdata);
    H5Dclose(dset_id);
    H5Sclose(dspace);
    H5Tclose(memtype);
    return TREXIO_FAILURE;
  }

  // copy contents of temporary rdata buffer into the group_dset otherwise they are lost
  // after calling H5Treclaim or H5Dvlen_reclaim functions
  strcpy($group_dset$, "");
  for (uint64_t i=0; i<dims[0]; i++) {
    strncat($group_dset$, rdata[i], max_str_len);
    strcat($group_dset$, TREXIO_DELIM);
  }

  // H5Dvlen_reclaim is deprecated and replaced by H5Treclaim in HDF5 v.1.12.0
  #if (H5_VERS_MAJOR <= 1 && H5_VERS_MINOR < 12)
    status = H5Dvlen_reclaim(memtype, dspace, H5P_DEFAULT, rdata);
  #else
    status = H5Treclaim(memtype, dspace, H5P_DEFAULT, rdata);
  #endif

  if (status < 0) {
    FREE(rdata);
    H5Dclose(dset_id);
    H5Sclose(dspace);
    H5Tclose(memtype);
    return TREXIO_FAILURE;
  }

  FREE(rdata);
  H5Dclose(dset_id);
  H5Sclose(dspace);
  H5Tclose(memtype);

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$, const uint32_t rank, const uint64_t* dims)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if ($group_dset$  == NULL) return TREXIO_INVALID_ARG_2;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  /*
     Try to delete an existing dataset by unlinking it from the group (UNSAFE mode).
     NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it,
     thus reducing the size of the HDF5 file. In practic, this is not always the case.

     Consider using HDF5-provided h5repack utility after deleting/overwriting big datasets.
  */
  if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') {
    herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
    if (status_del < 0) return TREXIO_FAILURE;
  }

  herr_t status;
  hid_t dset_id;

  /* we are going to write variable-length strings */
  hid_t memtype = H5Tcopy (H5T_C_S1);
  if (memtype <= 0) return TREXIO_INVALID_ID;

  status = H5Tset_size (memtype, H5T_VARIABLE);
  if (status < 0) return TREXIO_FAILURE;

  hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL);
  if (dspace <= 0) return TREXIO_INVALID_ID;

  /* code to create dataset */
  hid_t filetype = H5Tcopy (H5T_FORTRAN_S1);
  if (filetype <= 0) return TREXIO_INVALID_ID;

  status = H5Tset_size (filetype, H5T_VARIABLE);
  if (status < 0) return TREXIO_FAILURE;

  dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace,
                       H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  if (dset_id <= 0) return TREXIO_INVALID_ID;

  status = H5Dwrite (dset_id, memtype,
                     H5S_ALL, H5S_ALL, H5P_DEFAULT,
                     $group_dset$);

  H5Dclose (dset_id);
  H5Sclose (dspace);
  H5Tclose (filetype);
  H5Tclose (memtype);

  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME);
  /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
  if (status == 1){
    return TREXIO_SUCCESS;
  } else if (status == 0) {
    return TREXIO_HAS_NOT;
  } else {
    return TREXIO_FAILURE;
  }

}

8 Template for HDF5 has/read/write a string attribute

trexio_exit_code
trexio_hdf5_read_$group_str$ (trexio_t* const file, char* const str, const uint32_t max_str_len)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (str  == NULL) return TREXIO_INVALID_ARG_2;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
  /* Quit if the string attribute is missing in the file */
  if (H5Aexists(f->$group$_group, $GROUP_STR$_NAME) == 0) return TREXIO_HAS_NOT;

  /* Read the $group_str$ attribute of $group$ group */
  const hid_t str_id = H5Aopen(f->$group$_group, $GROUP_STR$_NAME, H5P_DEFAULT);
  if (str_id <= 0) return TREXIO_INVALID_ID;

  const hid_t ftype_id = H5Aget_type(str_id);
  if (ftype_id <= 0) return TREXIO_INVALID_ID;
  uint64_t sdim = H5Tget_size(ftype_id);
  sdim++;                         /* Make room for null terminator */

  const hid_t mem_id = H5Tcopy(H5T_C_S1);
  if (mem_id <= 0) return TREXIO_INVALID_ID;

  herr_t status;
  status = (max_str_len+1) > sdim ? H5Tset_size(mem_id, sdim) : H5Tset_size(mem_id, max_str_len+1) ;
  if (status < 0) return TREXIO_FAILURE;

  status = H5Aread(str_id, mem_id, str);
  if (status < 0) return TREXIO_FAILURE;

  H5Aclose(str_id);
  H5Tclose(mem_id);
  H5Tclose(ftype_id);

  return TREXIO_SUCCESS;

}
trexio_exit_code
trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;
  if (str  == NULL) return TREXIO_INVALID_ARG_2;

  trexio_hdf5_t* const f = (trexio_hdf5_t*) file;

  /* Delete the attribute if it exists and if the file is open in UNSAFE mode */
  if (trexio_hdf5_has_$group_str$(file) == TREXIO_SUCCESS && file->mode == 'u') {
    herr_t status_del = H5Adelete(f->$group$_group, $GROUP_STR$_NAME);
    if (status_del < 0) return TREXIO_FAILURE;
  }

  /* Setup the datatype for variable length string */
  const hid_t dtype_id = H5Tcopy(H5T_C_S1);
  if (dtype_id <= 0) return TREXIO_INVALID_ID;

  size_t str_attr_len = strlen(str) + 1;

  herr_t status;
  status = H5Tset_size(dtype_id, str_attr_len);
  if (status < 0) return TREXIO_FAILURE;

  status = H5Tset_strpad(dtype_id, H5T_STR_NULLTERM);
  if (status < 0) return TREXIO_FAILURE;

  /* Setup the dataspace */
  const hid_t dspace_id = H5Screate(H5S_SCALAR);
  if (dspace_id <= 0) return TREXIO_INVALID_ID;

  /* Create the $group_str$ attribute of $group$ group */
  const hid_t str_id = H5Acreate(f->$group$_group,
                                 $GROUP_STR$_NAME,
                                 dtype_id, dspace_id,
                                 H5P_DEFAULT, H5P_DEFAULT);

  if (str_id <= 0) {
    H5Sclose(dspace_id);
    H5Tclose(dtype_id);
    return TREXIO_INVALID_ID;
  }

  status = H5Awrite(str_id, dtype_id, str);

  H5Aclose(str_id);
  H5Sclose(dspace_id);
  H5Tclose(dtype_id);

  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_has_$group_str$ (trexio_t* const file)
{

  if (file == NULL) return TREXIO_INVALID_ARG_1;

  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;

  htri_t status = H5Aexists(f->$group$_group, $GROUP_STR$_NAME);
  /* H5Aexists returns positive value if attribute exists, 0 if does not, negative if error */
  if (status > 0){
    return TREXIO_SUCCESS;
  } else if (status == 0) {
    return TREXIO_HAS_NOT;
  } else {
    return TREXIO_FAILURE;
  }

}

9 Template for HDF5 delete a group (UNSAFE mode)

Note: in early versions of the HDF5 library (v < 1.10) unlinking an object was not working as expected and the associated memory was not necessarily freed (see this StackOverflow discussion for example). Nevertheless, some space might remain occupied even after deleting the associated object in recent version. To take the best use of the deleted file space, we recommend to write the deleted group within the same session (i.e. before closing the TREXIO file).

In principle, one can use HDF5-provided h5repack binary, which copies all existing objects from one file into another. Thus, any corrupted/lost file space will remain in the first file. The use of h5repack is highly encouraged.

trexio_exit_code
trexio_hdf5_delete_$group$ (trexio_t* const file)
{
  if (file == NULL) return TREXIO_INVALID_ARG_1;

  trexio_hdf5_t* f = (trexio_hdf5_t*) file;

  // delete the link to the existing group: this should free the associated space
  H5Gclose(f->$group$_group);
  f->$group$_group = 0;
  herr_t status = H5Ldelete(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT);
  if (status < 0) return TREXIO_FAILURE;

  // re-create the group (with the new link ?)
  f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
  if (f->$group$_group <= 0L) return TREXIO_INVALID_ID;

  return TREXIO_SUCCESS;
}

10 Helper functions

trexio_exit_code
trexio_hdf5_create_write_dset_sparse (const hid_t group_id,
                                      const char* dset_name,
                                      const hid_t dtype_id,
                                      const hsize_t* chunk_dims,
                                      const void* data_sparse)
{
  const int h5_rank = 1;
  const hsize_t maxdims[1] = {H5S_UNLIMITED};

  hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
  if (dspace < 0) return TREXIO_INVALID_ID;

  hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
  if (prop < 0) {
    H5Sclose(dspace);
    return TREXIO_INVALID_ID;
  }

  herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
  if (status < 0) {
    H5Sclose(dspace);
    H5Pclose(prop);
    return TREXIO_INVALID_ID;
  }

  hid_t dset_id = H5Dcreate(group_id,
                            dset_name,
                            dtype_id,
                            dspace,
                            H5P_DEFAULT,
                            prop,
                            H5P_DEFAULT);
  if (dset_id < 0) {
    H5Sclose(dspace);
    H5Pclose(prop);
    return TREXIO_INVALID_ID;
  }

  status = H5Dwrite(dset_id,
                    dtype_id,
                    H5S_ALL, H5S_ALL, H5P_DEFAULT,
                    data_sparse);
  H5Sclose(dspace);
  H5Pclose(prop);
  H5Dclose(dset_id);
  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;
}


trexio_exit_code
trexio_hdf5_open_write_dset_sparse (const hid_t group_id,
                                    const char* dset_name,
                                    const hid_t dtype_id,
                                    const hsize_t* chunk_dims,
                                    const hsize_t* offset_file,
                                    const void* data_sparse)
{
  const int h5_rank = 1;

  hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
  if (dset_id <= 0) return TREXIO_INVALID_ID;

  hid_t fspace = H5Dget_space(dset_id);
  if (fspace < 0) {
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  // allocate space for the dimensions to be read
  hsize_t ddims[1] = {0};

  // get the rank and dimensions of the dataset
  int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
  if (rrank != h5_rank) {
    H5Sclose(fspace);
    H5Dclose(dset_id);
    return TREXIO_FAILURE;
  }

  ddims[0] += chunk_dims[0];

  // extend the dset size
  herr_t status  = H5Dset_extent(dset_id, ddims);
  if (status < 0) {
    H5Sclose(fspace);
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  // close and reopen the file dataspace to take into account the extension
  H5Sclose(fspace);
  fspace = H5Dget_space(dset_id);
  if (fspace < 0) {
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  // select hyperslab to be written using chunk_dims and offset values
  status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
  if (status < 0) {
    H5Sclose(fspace);
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  // create memory dataspace to write from
  hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
  if (dspace < 0) {
    H5Sclose(fspace);
    H5Sclose(dspace);
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  status = H5Dwrite(dset_id,
                    dtype_id,
                    dspace, fspace, H5P_DEFAULT,
                    data_sparse);
  H5Dclose(dset_id);
  H5Sclose(dspace);
  H5Sclose(fspace);
  if (status < 0) return TREXIO_FAILURE;

  return TREXIO_SUCCESS;
}


trexio_exit_code
trexio_hdf5_open_read_dset_sparse (const hid_t group_id,
                                   const char* dset_name,
                                   const hsize_t* offset_file,
                                   hsize_t* const size_read,
                                   int64_t* const eof_read_size,
                                   const int is_index,
                                   void* const data_sparse
                                   )
{
  const int h5_rank = 1;

  // get the dataset handle
  hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
  if (dset_id <= 0) return TREXIO_INVALID_ID;

  // get the dataspace of the dataset
  hid_t fspace_id = H5Dget_space(dset_id);
  if (fspace_id < 0) {
    H5Dclose(dset_id);
    return TREXIO_INVALID_ID;
  }

  /* get dims of the dset stored in the file to check whether reading with user-provided chunk size
     will reach end of the dataset (i.e. EOF in TEXT back end)
   */
  hsize_t ddims[1] = {0};
  int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
  if (rrank != h5_rank) {
    H5Sclose(fspace_id);
    H5Dclose(dset_id);
    return TREXIO_FAILURE;
  }

  hsize_t max_offset = offset_file[0] + size_read[0];

  int is_EOF = 0;
  // if max_offset exceed current dim of the dset => EOF
  if (max_offset > ddims[0]) {
    is_EOF = 1;
    // lower the value of count to reduce the number of elements which will be read
    size_read[0] -= max_offset - ddims[0];
    // modified the value of eof_read_size passed by address
    if (eof_read_size != NULL) *eof_read_size = size_read[0];
  }

  // special case when reading int indices
  uint64_t size_ranked = (uint64_t) size_read[0];
  void* index_p = NULL;
  // read the datatype from the dataset and compare with the pre-defined values
  hid_t dtype = H5Dget_type(dset_id);
  if (is_index == 1) {
    if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
      uint8_t* index = CALLOC(size_ranked, uint8_t);
      if (index == NULL) return TREXIO_ALLOCATION_FAILED;
      index_p = index;
    } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
      uint16_t* index = CALLOC(size_ranked, uint16_t);
      if (index == NULL) return TREXIO_ALLOCATION_FAILED;
      index_p = index;
    } else {
      index_p = data_sparse;
    }
  }

  herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL);
  if (status < 0) {
    H5Sclose(fspace_id);
    H5Dclose(dset_id);
    if (index_p != data_sparse) FREE(index_p);
    return TREXIO_INVALID_ID;
  }

  hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL);
  if (memspace_id < 0) {
    H5Sclose(fspace_id);
    H5Dclose(dset_id);
    if (index_p != data_sparse) FREE(index_p);
    return TREXIO_INVALID_ID;
  }

  if (is_index == 1) {
    status = H5Dread(dset_id,
                     dtype,
                     memspace_id, fspace_id, H5P_DEFAULT,
                     index_p);
  } else {
    status = H5Dread(dset_id,
                     dtype,
                     memspace_id, fspace_id, H5P_DEFAULT,
                     data_sparse);
  }

  H5Sclose(fspace_id);
  H5Sclose(memspace_id);
  H5Dclose(dset_id);
  if (status < 0) {
    if (index_p != data_sparse) FREE(index_p);
    return TREXIO_FAILURE;
  }

  if (is_index == 1) {
    if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
      uint8_t* index = (uint8_t*) index_p;
      for (uint64_t i=0; i<size_ranked; ++i){
        ((int32_t*)data_sparse)[i] = (int32_t) index[i];
      }
      FREE(index_p);
    } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
      uint16_t* index = (uint16_t*) index_p;
      for (uint64_t i=0; i<size_ranked; ++i){
        ((int32_t*)data_sparse)[i] = (int32_t) index[i];
      }
      FREE(index_p);
    }
  }

  if (is_EOF == 1) return TREXIO_END;

  return TREXIO_SUCCESS;
}

Author: TREX-CoE

Created: 2022-03-11 Fri 12:54

Validate