1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-10-02 14:31:05 +02:00

add overwriting functionality for unsafe mode [HDF5]

This commit is contained in:
q-posev 2022-01-24 16:15:31 +01:00
parent f68a59417e
commit 82e7cd058b
2 changed files with 238 additions and 96 deletions

View File

@ -218,29 +218,38 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
trexio_hdf5_t* const f = (trexio_hdf5_t*) file; trexio_hdf5_t* const f = (trexio_hdf5_t*) file;
/* Write the dimensioning variables */ /* Delete the attribute if it exists and if the file is open in UNSAFE mode */
const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); if (trexio_hdf5_has_$group_num$(file) == TREXIO_SUCCESS && file->mode == 'u') {
const hid_t dspace = H5Screate(H5S_SCALAR); herr_t status_del = H5Adelete(f->$group$_group, $GROUP_NUM$_NAME);
if (status_del < 0) return TREXIO_FAILURE;
}
const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, /* Setup the dataspace */
dtype, dspace, H5P_DEFAULT, H5P_DEFAULT); const hid_t dtype_id = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$);
if (num_id <= 0) { if (dtype_id <= 0) return TREXIO_INVALID_ID;
H5Sclose(dspace);
H5Tclose(dtype); const hid_t dspace_id = H5Screate(H5S_SCALAR);
if (dspace_id <= 0) {
H5Tclose(dtype_id);
return TREXIO_INVALID_ID; return TREXIO_INVALID_ID;
} }
const herr_t status = H5Awrite(num_id, dtype, &(num)); const hid_t num_id = H5Acreate(f->$group$_group,
if (status < 0) { $GROUP_NUM$_NAME,
H5Aclose(num_id); dtype_id, dspace_id,
H5Sclose(dspace); H5P_DEFAULT, H5P_DEFAULT);
H5Tclose(dtype); if (num_id <= 0) {
return TREXIO_FAILURE; H5Sclose(dspace_id);
H5Tclose(dtype_id);
return TREXIO_INVALID_ID;
} }
H5Sclose(dspace); const herr_t status = H5Awrite(num_id, dtype_id, &num);
H5Sclose(dspace_id);
H5Aclose(num_id); H5Aclose(num_id);
H5Tclose(dtype); H5Tclose(dtype_id);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS; return TREXIO_SUCCESS;
} }
@ -340,30 +349,41 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$*
trexio_hdf5_t* f = (trexio_hdf5_t*) file; trexio_hdf5_t* f = (trexio_hdf5_t*) file;
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { /*
Try to delete an existing dataset by unlinking it from the group (UNSAFE mode).
const herr_t status = H5LTmake_dataset(f->$group$_group, NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it,
$GROUP_DSET$_NAME, thus reducing the size of the HDF5 file. In practic, this is not always the case.
(int) rank, (const hsize_t*) dims,
H5T_$GROUP_DSET_H5_DTYPE$,
$group_dset$);
if (status < 0) return TREXIO_FAILURE;
} else {
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
const herr_t status = H5Dwrite(dset_id,
H5T_$GROUP_DSET_H5_DTYPE$,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
$group_dset$);
H5Dclose(dset_id);
if (status < 0) return TREXIO_FAILURE;
Consider using HDF5-native h5repack utility after deleting/overwriting big datasets.
,*/
if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') {
herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
if (status_del < 0) return TREXIO_FAILURE;
} }
hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL);
if (dspace_id <= 0) return TREXIO_INVALID_ID;
hid_t dset_id = H5Dcreate (f->$group$_group,
$GROUP_DSET$_NAME,
H5T_$GROUP_DSET_H5_DTYPE$,
dspace_id,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
if (dset_id <= 0) {
H5Sclose(dspace_id);
return TREXIO_INVALID_ID;
}
herr_t status = H5Dwrite(dset_id,
H5T_$GROUP_DSET_H5_DTYPE$,
H5S_ALL,
dspace_id,
H5P_DEFAULT,
$group_dset$);
H5Dclose(dset_id);
H5Sclose(dspace_id);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS; return TREXIO_SUCCESS;
} }
@ -716,6 +736,18 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$,
trexio_hdf5_t* f = (trexio_hdf5_t*) file; trexio_hdf5_t* f = (trexio_hdf5_t*) file;
/*
Try to delete an existing dataset by unlinking it from the group (UNSAFE mode).
NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it,
thus reducing the size of the HDF5 file. In practic, this is not always the case.
Consider using HDF5-provided h5repack utility after deleting/overwriting big datasets.
,*/
if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') {
herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
if (status_del < 0) return TREXIO_FAILURE;
}
herr_t status; herr_t status;
hid_t dset_id; hid_t dset_id;
@ -726,49 +758,30 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$,
status = H5Tset_size (memtype, H5T_VARIABLE); status = H5Tset_size (memtype, H5T_VARIABLE);
if (status < 0) return TREXIO_FAILURE; if (status < 0) return TREXIO_FAILURE;
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL);
if (dspace <= 0) return TREXIO_INVALID_ID;
/* code to create dataset */ /* code to create dataset */
hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); hid_t filetype = H5Tcopy (H5T_FORTRAN_S1);
if (filetype <= 0) return TREXIO_INVALID_ID; if (filetype <= 0) return TREXIO_INVALID_ID;
status = H5Tset_size (filetype, H5T_VARIABLE); status = H5Tset_size (filetype, H5T_VARIABLE);
if (status < 0) return TREXIO_FAILURE; if (status < 0) return TREXIO_FAILURE;
hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace,
if (dspace <= 0) return TREXIO_INVALID_ID; H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace, status = H5Dwrite (dset_id, memtype,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); H5S_ALL, H5S_ALL, H5P_DEFAULT,
if (dset_id <= 0) return TREXIO_INVALID_ID; $group_dset$);
status = H5Dwrite (dset_id, memtype, H5Dclose (dset_id);
H5S_ALL, H5S_ALL, H5P_DEFAULT, H5Sclose (dspace);
$group_dset$); H5Tclose (filetype);
H5Tclose (memtype);
H5Dclose (dset_id); if (status < 0) return TREXIO_FAILURE;
H5Sclose (dspace);
H5Tclose (filetype);
H5Tclose (memtype);
if (status < 0) return TREXIO_FAILURE;
} else {
dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
/* code to write dataset */
status = H5Dwrite(dset_id, memtype,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
$group_dset$);
H5Dclose(dset_id);
H5Tclose(memtype);
if (status < 0) return TREXIO_FAILURE;
}
return TREXIO_SUCCESS; return TREXIO_SUCCESS;
@ -857,8 +870,13 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
trexio_hdf5_t* const f = (trexio_hdf5_t*) file; trexio_hdf5_t* const f = (trexio_hdf5_t*) file;
/* Delete the attribute if it exists and if the file is open in UNSAFE mode */
if (trexio_hdf5_has_$group_str$(file) == TREXIO_SUCCESS && file->mode == 'u') {
herr_t status_del = H5Adelete(f->$group$_group, $GROUP_STR$_NAME);
if (status_del < 0) return TREXIO_FAILURE;
}
/* Setup the dataspace */ /* Setup the datatype for variable length string */
const hid_t dtype_id = H5Tcopy(H5T_C_S1); const hid_t dtype_id = H5Tcopy(H5T_C_S1);
if (dtype_id <= 0) return TREXIO_INVALID_ID; if (dtype_id <= 0) return TREXIO_INVALID_ID;
@ -871,12 +889,15 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
status = H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); status = H5Tset_strpad(dtype_id, H5T_STR_NULLTERM);
if (status < 0) return TREXIO_FAILURE; if (status < 0) return TREXIO_FAILURE;
/* Setup the dataspace */
const hid_t dspace_id = H5Screate(H5S_SCALAR); const hid_t dspace_id = H5Screate(H5S_SCALAR);
if (dspace_id <= 0) return TREXIO_INVALID_ID; if (dspace_id <= 0) return TREXIO_INVALID_ID;
/* Create the $group_str$ attribute of $group$ group */ /* Create the $group_str$ attribute of $group$ group */
const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id, const hid_t str_id = H5Acreate(f->$group$_group,
H5P_DEFAULT, H5P_DEFAULT); $GROUP_STR$_NAME,
dtype_id, dspace_id,
H5P_DEFAULT, H5P_DEFAULT);
if (str_id <= 0) { if (str_id <= 0) {
H5Sclose(dspace_id); H5Sclose(dspace_id);
@ -885,18 +906,14 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
} }
status = H5Awrite(str_id, dtype_id, str); status = H5Awrite(str_id, dtype_id, str);
if (status < 0) {
H5Aclose(str_id);
H5Sclose(dspace_id);
H5Tclose(dtype_id);
return TREXIO_FAILURE;
}
H5Aclose(str_id); H5Aclose(str_id);
H5Sclose(dspace_id); H5Sclose(dspace_id);
H5Tclose(dtype_id); H5Tclose(dtype_id);
return TREXIO_SUCCESS;
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
} }
#+end_src #+end_src

View File

@ -2,10 +2,11 @@
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#define TEST_BACKEND TREXIO_HDF5 #define TEST_BACKEND TREXIO_HDF5
#define TREXIO_FILE "test_over.h5" #define TREXIO_FILE "test_over.h5"
#define RM_COMMAND "rm -rf " TREXIO_FILE #define RM_COMMAND "rm -f -- " TREXIO_FILE
static int test_write (const char* file_name, const back_end_t backend) { static int test_write (const char* file_name, const back_end_t backend) {
@ -56,13 +57,13 @@ static int test_write (const char* file_name, const back_end_t backend) {
rc = trexio_write_nucleus_coord(file, coord); rc = trexio_write_nucleus_coord(file, coord);
assert (rc == TREXIO_SUCCESS); assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_point_group(file, sym, 4); rc = trexio_write_nucleus_point_group(file, sym, 4);
assert (rc == TREXIO_SUCCESS); assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_label(file, labels, 2); rc = trexio_write_nucleus_label(file, labels, 2);
assert (rc == TREXIO_SUCCESS); assert (rc == TREXIO_SUCCESS);
// close current session // close current session
rc = trexio_close(file); rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS); assert (rc == TREXIO_SUCCESS);
@ -73,9 +74,61 @@ static int test_write (const char* file_name, const back_end_t backend) {
} }
static int test_overwrite (const char* file_name, const back_end_t backend) { static int test_overwrite_unsafe (const char* file_name, const back_end_t backend) {
/* Try to overwrite the data that already exists in the TREXIO file */ /* Try to overwrite the data that already exists in the TREXIO file which is open in UNSAFE mode*/
trexio_t* file = NULL;
trexio_exit_code rc;
// parameters to be written
int num = 5;
double coord[15] = {
0.00000000 , 666.666 , 0.00000000 ,
-1.20594314 , 0.69625160 , 0.00000000 ,
-1.20594314 , -0.69625160 , 0.00000000 ,
0.00000000 , -1.39250319 , 0.00000000 ,
1.20594314 , -0.69625160 , 0.00000000
};
const char* sym = "Unknown";
const char* labels[] = {"Ru" ,
"U" ,
"Cl" ,
"Na" ,
"H" };
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'u', backend, &rc);
assert (file != NULL);
// check that the previously written data cannot be overwritten
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_coord(file, coord);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_point_group(file, sym, 16);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_label(file, labels, 4);
assert (rc == TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_overwrite_safe (const char* file_name, const back_end_t backend) {
/* Try to overwrite the data that already exists in the TREXIO file which is open in SAFE mode*/
trexio_t* file = NULL; trexio_t* file = NULL;
trexio_exit_code rc; trexio_exit_code rc;
@ -104,10 +157,10 @@ static int test_overwrite (const char* file_name, const back_end_t backend) {
rc = trexio_write_nucleus_coord(file, coord); rc = trexio_write_nucleus_coord(file, coord);
assert (rc == TREXIO_DSET_ALREADY_EXISTS); assert (rc == TREXIO_DSET_ALREADY_EXISTS);
rc = trexio_write_nucleus_point_group(file, sym, 16); rc = trexio_write_nucleus_point_group(file, sym, 16);
assert (rc == TREXIO_ATTR_ALREADY_EXISTS); assert (rc == TREXIO_ATTR_ALREADY_EXISTS);
rc = trexio_write_nucleus_label(file, labels, 4); rc = trexio_write_nucleus_label(file, labels, 4);
assert (rc == TREXIO_DSET_ALREADY_EXISTS); assert (rc == TREXIO_DSET_ALREADY_EXISTS);
@ -121,6 +174,78 @@ static int test_overwrite (const char* file_name, const back_end_t backend) {
} }
int test_read(const char* file_name, const back_end_t backend) {
/*========= Test read ===========*/
trexio_t* file = NULL;
trexio_exit_code rc;
int num;
double* coord;
char** label;
char* point_group;
/*================= START OF TEST ==================*/
// open existing file on 'read' mode
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
// read nucleus_num
rc = trexio_read_nucleus_num(file,&num);
assert (rc == TREXIO_SUCCESS);
assert (num == 5);
// read nucleus_coord
coord = (double*) calloc(3*num, sizeof(double));
rc = trexio_read_nucleus_coord(file,coord);
assert (rc == TREXIO_SUCCESS);
double x = coord[1] - 666.666;
assert( x*x < 1.e-14);
free(coord);
// read nucleus_label
label = (char**) malloc(num*sizeof(char*));
for (int i=0; i<num; i++){
label[i] = (char*) malloc(32*sizeof(char));
}
rc = trexio_read_nucleus_label(file, label, 2);
assert (rc == TREXIO_SUCCESS);
assert (strcmp(label[0], "Ru") == 0);
assert (strcmp(label[3], "Na") == 0);
for (int i=0; i<num; i++){
free(label[i]);
}
free(label);
point_group = (char*) malloc(32*sizeof(char));
rc = trexio_read_nucleus_point_group(file, point_group, 10);
assert (rc == TREXIO_SUCCESS);
char * pch;
pch = strtok(point_group, " ");
assert (strcmp(pch, "Unknown") == 0);
/* alternative test when 3 symbols are read from the file to point_group */
/*rc = trexio_read_nucleus_point_group(file, point_group, 3);
assert (rc == TREXIO_SUCCESS);
assert (strcmp(point_group, "B3U") == 0 );*/
free(point_group);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST =====================*/
return 0;
}
int main(void) { int main(void) {
/*============== Test launcher ================*/ /*============== Test launcher ================*/
@ -129,13 +254,13 @@ int main(void) {
rc = system(RM_COMMAND); rc = system(RM_COMMAND);
assert (rc == 0); assert (rc == 0);
test_write (TREXIO_FILE, TEST_BACKEND); test_write (TREXIO_FILE, TEST_BACKEND);
test_overwrite (TREXIO_FILE, TEST_BACKEND); test_overwrite_safe (TREXIO_FILE, TEST_BACKEND);
test_overwrite_unsafe (TREXIO_FILE, TEST_BACKEND);
test_read (TREXIO_FILE, TEST_BACKEND);
rc = system(RM_COMMAND); rc = system(RM_COMMAND);
assert (rc == 0); assert (rc == 0);
return 0; return 0;
} }