From 82e7cd058bf3bdcb19a5d0717a7702e5576326d3 Mon Sep 17 00:00:00 2001 From: q-posev Date: Mon, 24 Jan 2022 16:15:31 +0100 Subject: [PATCH] add overwriting functionality for unsafe mode [HDF5] --- src/templates_hdf5/templator_hdf5.org | 185 ++++++++++++++------------ tests/overwrite_all_hdf5.c | 149 +++++++++++++++++++-- 2 files changed, 238 insertions(+), 96 deletions(-) diff --git a/src/templates_hdf5/templator_hdf5.org b/src/templates_hdf5/templator_hdf5.org index d14f57e..ea1dbed 100644 --- a/src/templates_hdf5/templator_hdf5.org +++ b/src/templates_hdf5/templator_hdf5.org @@ -218,29 +218,38 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub trexio_hdf5_t* const f = (trexio_hdf5_t*) file; - /* Write the dimensioning variables */ - const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); - const hid_t dspace = H5Screate(H5S_SCALAR); + /* Delete the attribute if it exists and if the file is open in UNSAFE mode */ + if (trexio_hdf5_has_$group_num$(file) == TREXIO_SUCCESS && file->mode == 'u') { + herr_t status_del = H5Adelete(f->$group$_group, $GROUP_NUM$_NAME); + if (status_del < 0) return TREXIO_FAILURE; + } - const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, - dtype, dspace, H5P_DEFAULT, H5P_DEFAULT); - if (num_id <= 0) { - H5Sclose(dspace); - H5Tclose(dtype); + /* Setup the dataspace */ + const hid_t dtype_id = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); + if (dtype_id <= 0) return TREXIO_INVALID_ID; + + const hid_t dspace_id = H5Screate(H5S_SCALAR); + if (dspace_id <= 0) { + H5Tclose(dtype_id); return TREXIO_INVALID_ID; } - const herr_t status = H5Awrite(num_id, dtype, &(num)); - if (status < 0) { - H5Aclose(num_id); - H5Sclose(dspace); - H5Tclose(dtype); - return TREXIO_FAILURE; + const hid_t num_id = H5Acreate(f->$group$_group, + $GROUP_NUM$_NAME, + dtype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT); + if (num_id <= 0) { + H5Sclose(dspace_id); + H5Tclose(dtype_id); + return TREXIO_INVALID_ID; } - H5Sclose(dspace); + const herr_t status = H5Awrite(num_id, dtype_id, &num); + + H5Sclose(dspace_id); H5Aclose(num_id); - H5Tclose(dtype); + H5Tclose(dtype_id); + if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; } @@ -340,30 +349,41 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$* trexio_hdf5_t* f = (trexio_hdf5_t*) file; - if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { - - const herr_t status = H5LTmake_dataset(f->$group$_group, - $GROUP_DSET$_NAME, - (int) rank, (const hsize_t*) dims, - H5T_$GROUP_DSET_H5_DTYPE$, - $group_dset$); - if (status < 0) return TREXIO_FAILURE; - - } else { - - hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; - - const herr_t status = H5Dwrite(dset_id, - H5T_$GROUP_DSET_H5_DTYPE$, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - $group_dset$); - - H5Dclose(dset_id); - if (status < 0) return TREXIO_FAILURE; + /* + Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). + NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, + thus reducing the size of the HDF5 file. In practic, this is not always the case. + Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. + ,*/ + if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') { + herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; } + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate (f->$group$_group, + $GROUP_DSET$_NAME, + H5T_$GROUP_DSET_H5_DTYPE$, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_$GROUP_DSET_H5_DTYPE$, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + $group_dset$); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + return TREXIO_SUCCESS; } @@ -716,6 +736,18 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$, trexio_hdf5_t* f = (trexio_hdf5_t*) file; + /* + Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). + NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, + thus reducing the size of the HDF5 file. In practic, this is not always the case. + + Consider using HDF5-provided h5repack utility after deleting/overwriting big datasets. + ,*/ + if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') { + herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + herr_t status; hid_t dset_id; @@ -726,49 +758,30 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$, status = H5Tset_size (memtype, H5T_VARIABLE); if (status < 0) return TREXIO_FAILURE; - if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { + hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); + if (dspace <= 0) return TREXIO_INVALID_ID; - /* code to create dataset */ - hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); - if (filetype <= 0) return TREXIO_INVALID_ID; + /* code to create dataset */ + hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); + if (filetype <= 0) return TREXIO_INVALID_ID; - status = H5Tset_size (filetype, H5T_VARIABLE); - if (status < 0) return TREXIO_FAILURE; + status = H5Tset_size (filetype, H5T_VARIABLE); + if (status < 0) return TREXIO_FAILURE; - hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); - if (dspace <= 0) return TREXIO_INVALID_ID; + dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; - dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; + status = H5Dwrite (dset_id, memtype, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + $group_dset$); - status = H5Dwrite (dset_id, memtype, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - $group_dset$); + H5Dclose (dset_id); + H5Sclose (dspace); + H5Tclose (filetype); + H5Tclose (memtype); - H5Dclose (dset_id); - H5Sclose (dspace); - H5Tclose (filetype); - H5Tclose (memtype); - - if (status < 0) return TREXIO_FAILURE; - - } else { - - dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; - - /* code to write dataset */ - status = H5Dwrite(dset_id, memtype, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - $group_dset$); - - H5Dclose(dset_id); - H5Tclose(memtype); - - if (status < 0) return TREXIO_FAILURE; - - } + if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; @@ -857,8 +870,13 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) trexio_hdf5_t* const f = (trexio_hdf5_t*) file; + /* Delete the attribute if it exists and if the file is open in UNSAFE mode */ + if (trexio_hdf5_has_$group_str$(file) == TREXIO_SUCCESS && file->mode == 'u') { + herr_t status_del = H5Adelete(f->$group$_group, $GROUP_STR$_NAME); + if (status_del < 0) return TREXIO_FAILURE; + } - /* Setup the dataspace */ + /* Setup the datatype for variable length string */ const hid_t dtype_id = H5Tcopy(H5T_C_S1); if (dtype_id <= 0) return TREXIO_INVALID_ID; @@ -871,12 +889,15 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) status = H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); if (status < 0) return TREXIO_FAILURE; + /* Setup the dataspace */ const hid_t dspace_id = H5Screate(H5S_SCALAR); if (dspace_id <= 0) return TREXIO_INVALID_ID; /* Create the $group_str$ attribute of $group$ group */ - const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id, - H5P_DEFAULT, H5P_DEFAULT); + const hid_t str_id = H5Acreate(f->$group$_group, + $GROUP_STR$_NAME, + dtype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT); if (str_id <= 0) { H5Sclose(dspace_id); @@ -885,18 +906,14 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str) } status = H5Awrite(str_id, dtype_id, str); - if (status < 0) { - H5Aclose(str_id); - H5Sclose(dspace_id); - H5Tclose(dtype_id); - return TREXIO_FAILURE; - } H5Aclose(str_id); H5Sclose(dspace_id); H5Tclose(dtype_id); - return TREXIO_SUCCESS; + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; } #+end_src diff --git a/tests/overwrite_all_hdf5.c b/tests/overwrite_all_hdf5.c index bcbbaf5..a623eb9 100644 --- a/tests/overwrite_all_hdf5.c +++ b/tests/overwrite_all_hdf5.c @@ -2,10 +2,11 @@ #include #include #include +#include #define TEST_BACKEND TREXIO_HDF5 #define TREXIO_FILE "test_over.h5" -#define RM_COMMAND "rm -rf " TREXIO_FILE +#define RM_COMMAND "rm -f -- " TREXIO_FILE static int test_write (const char* file_name, const back_end_t backend) { @@ -56,13 +57,13 @@ static int test_write (const char* file_name, const back_end_t backend) { rc = trexio_write_nucleus_coord(file, coord); assert (rc == TREXIO_SUCCESS); - + rc = trexio_write_nucleus_point_group(file, sym, 4); assert (rc == TREXIO_SUCCESS); - + rc = trexio_write_nucleus_label(file, labels, 2); assert (rc == TREXIO_SUCCESS); - + // close current session rc = trexio_close(file); assert (rc == TREXIO_SUCCESS); @@ -73,9 +74,61 @@ static int test_write (const char* file_name, const back_end_t backend) { } -static int test_overwrite (const char* file_name, const back_end_t backend) { +static int test_overwrite_unsafe (const char* file_name, const back_end_t backend) { -/* Try to overwrite the data that already exists in the TREXIO file */ +/* Try to overwrite the data that already exists in the TREXIO file which is open in UNSAFE mode*/ + + trexio_t* file = NULL; + trexio_exit_code rc; + + // parameters to be written + int num = 5; + double coord[15] = { + 0.00000000 , 666.666 , 0.00000000 , + -1.20594314 , 0.69625160 , 0.00000000 , + -1.20594314 , -0.69625160 , 0.00000000 , + 0.00000000 , -1.39250319 , 0.00000000 , + 1.20594314 , -0.69625160 , 0.00000000 + }; + const char* sym = "Unknown"; + const char* labels[] = {"Ru" , + "U" , + "Cl" , + "Na" , + "H" }; + +/*================= START OF TEST ==================*/ + + // open file in 'write' mode + file = trexio_open(file_name, 'u', backend, &rc); + assert (file != NULL); + + // check that the previously written data cannot be overwritten + rc = trexio_write_nucleus_num(file, num); + assert (rc == TREXIO_SUCCESS); + + rc = trexio_write_nucleus_coord(file, coord); + assert (rc == TREXIO_SUCCESS); + + rc = trexio_write_nucleus_point_group(file, sym, 16); + assert (rc == TREXIO_SUCCESS); + + rc = trexio_write_nucleus_label(file, labels, 4); + assert (rc == TREXIO_SUCCESS); + + // close current session + rc = trexio_close(file); + assert (rc == TREXIO_SUCCESS); + +/*================= END OF TEST ==================*/ + + return 0; +} + + +static int test_overwrite_safe (const char* file_name, const back_end_t backend) { + +/* Try to overwrite the data that already exists in the TREXIO file which is open in SAFE mode*/ trexio_t* file = NULL; trexio_exit_code rc; @@ -104,10 +157,10 @@ static int test_overwrite (const char* file_name, const back_end_t backend) { rc = trexio_write_nucleus_coord(file, coord); assert (rc == TREXIO_DSET_ALREADY_EXISTS); - + rc = trexio_write_nucleus_point_group(file, sym, 16); assert (rc == TREXIO_ATTR_ALREADY_EXISTS); - + rc = trexio_write_nucleus_label(file, labels, 4); assert (rc == TREXIO_DSET_ALREADY_EXISTS); @@ -121,6 +174,78 @@ static int test_overwrite (const char* file_name, const back_end_t backend) { } +int test_read(const char* file_name, const back_end_t backend) { + +/*========= Test read ===========*/ + + trexio_t* file = NULL; + trexio_exit_code rc; + + int num; + double* coord; + char** label; + char* point_group; + +/*================= START OF TEST ==================*/ + + // open existing file on 'read' mode + file = trexio_open(file_name, 'r', backend, &rc); + assert (file != NULL); + + // read nucleus_num + rc = trexio_read_nucleus_num(file,&num); + assert (rc == TREXIO_SUCCESS); + assert (num == 5); + + // read nucleus_coord + coord = (double*) calloc(3*num, sizeof(double)); + rc = trexio_read_nucleus_coord(file,coord); + assert (rc == TREXIO_SUCCESS); + + double x = coord[1] - 666.666; + assert( x*x < 1.e-14); + free(coord); + + // read nucleus_label + label = (char**) malloc(num*sizeof(char*)); + for (int i=0; i