diff --git a/README.html b/README.html index f4aadec..432b903 100644 --- a/README.html +++ b/README.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- +program print_energy @@ -411,8 +411,8 @@ One needs to read from the TREXIO file:
integer :: i, j, k, l, m @@ -427,8 +427,8 @@ One needs to read from the TREXIO file:
call getarg(1, filename) @@ -444,8 +444,8 @@ f = trexio_open (filename, 'r', TREXIO_HDF5
rc = trexio_read_nucleus_repulsion(f, E_nn)
@@ -459,8 +459,8 @@ f = trexio_open (filename, 'r', TREXIO_HDF5
rc = trexio_read_mo_num(f, n)
@@ -474,8 +474,8 @@ f = trexio_open (filename, 'r', TREXIO_HDF5
allocate( D(n,n), h0(n,n) )
@@ -487,8 +487,8 @@ W(:,:,:,:) = 0.d0
rc = trexio_has_mo_1e_int_core_hamiltonian(f) @@ -520,8 +520,8 @@ rc = trexio_read_rdm_1e(f, D)
Reading is done with OpenMP. Each thread reads its own buffer, and @@ -537,8 +537,8 @@ to be protected in the critical section when modified.
rc = trexio_has_mo_2e_int_eri(f) @@ -587,8 +587,8 @@ icount = BUFSIZE
rc = trexio_has_rdm_2e(f) @@ -632,8 +632,8 @@ icount = bufsize
When the orbitals are real, we can use @@ -679,8 +679,8 @@ E = E + E_nn
deallocate( D, h0, G, W )
@@ -695,7 +695,7 @@ E = E + E_nn
stdint.h
Memory allocation of structures can be facilitated by using the @@ -528,8 +536,8 @@ The maximum string size for the filenames is 4096 characters.
All calls to TREXIO are thread-safe. @@ -537,10 +545,10 @@ TREXIO front end is modular, which simplifies implementation of new back ends.
31 | 'Possible integer overflow' | + +|
TREXIO_SAFE_MODE |
+32 | +'Unsafe operation in safe mode' | +
The trexio_string_of_error
converts an exit code into a string. The
@@ -808,8 +822,8 @@ and the corresponding message are not propagated to the source code.
const char* @@ -906,6 +920,9 @@ and the corresponding message are not propagated to the source code. case TREXIO_INT_SIZE_OVERFLOW: return "Possible integer overflow"; break; + case TREXIO_SAFE_MODE: + return "Unsafe operation in safe mode"; + break; } return "Unknown error"; } @@ -920,8 +937,8 @@ and the corresponding message are not propagated to the source code.
interface
@@ -937,8 +954,8 @@ and the corresponding message are not propagated to the source code.
class Error(Exception): @@ -977,8 +994,8 @@ and the corresponding message are not propagated to the source code.
TREXIO has several back ends:
@@ -1002,8 +1019,8 @@ lines that correspond to the TREXIO_JSON
back end (not implemented
typedef int32_t back_end_t; @@ -1051,8 +1068,8 @@ This is useful due to the fact that HDF5 back end can be disabled at configure s
integer(trexio_back_end_t), parameter :: TREXIO_HDF5 = 0
@@ -1092,8 +1109,8 @@ consistency, in version 2.2 it was renamed trexio_has_back_end
.
# define TREXIO back ends
@@ -1107,8 +1124,8 @@ consistency, in version 2.2 it was renamed trexio_has_back_end
.
Every time a reading function is called, the data is read from the @@ -1136,8 +1153,8 @@ concurrent programs, the behavior is not specified.
trexio_s
is the the main type for TREXIO files, visible to the users
@@ -1171,8 +1188,8 @@ TREXIO files will have as a first argument the TREXIO file handle.
class File: @@ -1254,8 +1271,8 @@ TREXIO files will have as a first argument the TREXIO file handle.
Polymorphism of the trexio_t
type is handled by ensuring that the
@@ -1274,8 +1291,8 @@ corresponding types for all back ends can be safely casted to
trexio_open
creates a new TREXIO file or opens existing one.
@@ -1290,6 +1307,7 @@ input parameters:
'w'
- (write) creates a new file as READWRITE (overwrite existing file)'r'
- (read) opens existing file as READONLY'u'
- (unsafe) opens existing file as READWRITE with the possibility to overwrite blocks and delete full groups.back_end
- integer number (or the corresponding global parameter) specifying the back end
-Note: the file_name
in TEXT back end actually corresponds to the
+Note: the file_name
in TEXT back end actually corresponds to the
name of the directory where .txt
data files are stored. The
actual name of each .txt
file corresponds to the group name
provided in trex.config
(e.g. nucleus.txt
for nuclei-related
@@ -1312,10 +1330,14 @@ data). These names are populated by the generator.py (i.e. they
are hard-coded), which is why the user should tend to avoid
renaming the .txt
data files.
+Note: internal consistency is not guaranteed once the file has been modified in 'u'
(unsafe) mode.
+
trexio_t* @@ -1334,7 +1356,7 @@ renaming the.txt
data files. return NULL; } - if (mode != 'r' && mode != 'w') { + if (mode != 'r' && mode != 'w' && mode != 'u') { if (rc_open != NULL) *rc_open = TREXIO_INVALID_ARG_2; return NULL; } @@ -1412,6 +1434,7 @@ renaming the.txt
data files. break; #else if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING; + free(result); return NULL; #endif /* @@ -1444,6 +1467,7 @@ renaming the.txt
data files. break; #else if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING; + free(result); return NULL; #endif /* @@ -1480,6 +1504,7 @@ renaming the.txt
data files. break; #else if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING; + free(result); return NULL; #endif @@ -1487,11 +1512,51 @@ renaming the.txt
data files. } if (rc != TREXIO_SUCCESS) { - if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR; + if (rc_open != NULL) *rc_open = rc; free(result); return NULL; } + + /* Mark the file as unsafe upon opening in UNSAFE 'u' mode */ + if (mode == 'u') { + + rc = trexio_has_metadata_unsafe(result); + if (rc == TREXIO_FAILURE) { + if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR; + free(result); + return NULL; + } + + if (rc == TREXIO_HAS_NOT) { + int64_t unsafe_val = 1; + switch (back_end) { + + case TREXIO_TEXT: + rc = trexio_text_write_metadata_unsafe(result, unsafe_val); + break; + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + rc = trexio_hdf5_write_metadata_unsafe(result, unsafe_val); + break; +#else + if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING; + free(result); + return NULL; +#endif + } + } + + if (rc != TREXIO_SUCCESS) { + if (rc_open != NULL) *rc_open = rc; + free(result); + return NULL; + } + + } + + /* Exit upon success */ if (rc_open != NULL) *rc_open = TREXIO_SUCCESS; @@ -1502,8 +1567,8 @@ renaming the.txt
data files.
interface
@@ -1521,8 +1586,8 @@ renaming the .txt
data files.
def _open(file_name: str, mode: str, back_end: int):
@@ -1570,8 +1635,8 @@ renaming the .txt
data files.
Because arrays are zero-based in Fortran, we need to set a flag to @@ -1605,8 +1670,8 @@ know if we need to shift by 1 arrays of indices.
trexio_close
closes an existing trexio_t
file.
@@ -1623,8 +1688,8 @@ output:
trexio_exit_code
@@ -1705,8 +1770,8 @@ output:
interface
@@ -1721,8 +1786,8 @@ output:
def _close(trexio_file): @@ -1743,8 +1808,8 @@ output:
trexio_inquire
check whether TREXIO file exists.
@@ -1781,8 +1846,8 @@ You can see examples of both functionalities in test_f.f90
(search
trexio_exit_code
@@ -1812,8 +1877,8 @@ You can see examples of both functionalities in test_f.f90
(search
The function below is a C binding.
@@ -1833,8 +1898,8 @@ The front end Fortran function for trexio_inquire
can be found in t
def _inquire(file_name: str) -> bool:
@@ -1854,12 +1919,12 @@ The front end Fortran function for trexio_inquire
can be found in t
Consider the following block of trex.json
:
@@ -2080,12 +2145,12 @@ value will result in TREXIO_INVALID_ARG_2
exit code.
This section concerns API calls related to numerical attributes, @@ -2156,8 +2221,8 @@ namely single value of int/float types.
The C
templates that correspond to each of the abovementioned
@@ -2171,12 +2236,12 @@ precision (see Table above).
trexio_exit_code @@ -2214,7 +2279,7 @@ precision (see Table above). { if (file == NULL) return TREXIO_INVALID_ARG_1; //if (num <= 0L) return TREXIO_INVALID_NUM; /* this line is uncommented by the generator for dimensioning variables; do NOT remove! */ - if (trexio_has_$group_num$(file) == TREXIO_SUCCESS) return TREXIO_ATTR_ALREADY_EXISTS; + if (trexio_has_$group_num$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_ATTR_ALREADY_EXISTS; switch (file->back_end) { @@ -2240,8 +2305,8 @@ precision (see Table above).
trexio_exit_code @@ -2290,7 +2355,7 @@ precision (see Table above). if (file == NULL) return TREXIO_INVALID_ARG_1; //if (num <= 0) return TREXIO_INVALID_NUM; /* this line is uncommented by the generator for dimensioning variables; do NOT remove! */ - if (trexio_has_$group_num$(file) == TREXIO_SUCCESS) return TREXIO_ATTR_ALREADY_EXISTS; + if (trexio_has_$group_num$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_ATTR_ALREADY_EXISTS; switch (file->back_end) { @@ -2317,8 +2382,8 @@ precision (see Table above).
trexio_exit_code
@@ -2373,8 +2438,8 @@ precision (see Table above).
The Fortran
templates that provide an access to the C
API calls from Fortran.
@@ -2466,8 +2531,8 @@ These templates are based on the use of iso_c_binding
. Pointers hav
def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None:
@@ -2552,12 +2617,12 @@ These templates are based on the use of iso_c_binding
. Pointers hav
This section concerns API calls related to datasets. @@ -2639,8 +2704,8 @@ This section concerns API calls related to datasets.
The C templates that correspond to each of the abovementioned functions can be found below. @@ -2651,12 +2716,12 @@ The basic (non-suffixed) API call on datasets deals with real(cdouble
trexio_exit_code @@ -2729,7 +2794,7 @@ The basic (non-suffixed) API call on datasets deals with real(cdouble assert(file->back_end < TREXIO_INVALID_BACK_END); if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; - if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS; + if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; trexio_exit_code rc; @@ -2819,8 +2884,8 @@ The basic (non-suffixed) API call on datasets deals with real(cdouble
trexio_exit_code @@ -2904,7 +2969,7 @@ The basic (non-suffixed) API call on datasets deals with real(cdouble if (file == NULL) return TREXIO_INVALID_ARG_1; if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2; - if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS; + if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; trexio_exit_code rc; int64_t $group_dset_dim$ = 0; @@ -2971,11 +3036,11 @@ The basic (non-suffixed) API call on datasets deals with real(cdouble
trexio_exit_code rc; +trexio_exit_code rc; int64_t $group_dset_dim$ = 0; /* Error handling for this call is added by the generator */ @@ -3036,7 +3101,7 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$)); if (file == NULL) return TREXIO_INVALID_ARG_1; if (dset_in == NULL) return TREXIO_INVALID_ARG_2; - if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS; + if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; trexio_exit_code rc; int64_t $group_dset_dim$ = 0; @@ -3103,7 +3168,7 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$)); if (file == NULL) return TREXIO_INVALID_ARG_1; if (dset_in == NULL) return TREXIO_INVALID_ARG_2; - if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS; + if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; trexio_exit_code rc; int64_t $group_dset_dim$ = 0; @@ -3131,8 +3196,8 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
trexio_exit_code
@@ -3207,8 +3272,8 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
The Fortran
templates that provide an access to the C
API calls from Fortran
.
@@ -3300,8 +3365,8 @@ These templates are based on the use of iso_c_binding
. Pointers hav
def write_$group_dset$(trexio_file, dset_w) -> None:
@@ -3496,12 +3561,12 @@ These templates are based on the use of iso_c_binding
. Pointers hav
Sparse data structures are used typically for large tensors such as @@ -3662,16 +3727,16 @@ This section concerns API calls related to sparse data structures.
trexio_exit_code trexio_read_safe_$group_dset$(trexio_t* const file, @@ -3947,8 +4012,8 @@ This section concerns API calls related to sparse data structures.
The Fortran
templates that provide an access to the C
API calls from Fortran
.
@@ -4049,8 +4114,8 @@ These templates are based on the use of iso_c_binding
. Pointers hav
def write_$group_dset$(trexio_file: File, offset_file: int, buffer_size: int, indices: list, values: list) -> None:
@@ -4271,12 +4336,12 @@ These templates are based on the use of iso_c_binding
. Pointers hav
This section concerns API calls related to datasets of strings. @@ -4316,8 +4381,8 @@ This section concerns API calls related to datasets of strings.
First parameter is the TREXIO
file handle. Second parameter is the variable to be written/read
@@ -4325,12 +4390,12 @@ to/from the TREXIO
file (except for trexio_has_
functi
trexio_exit_code @@ -4431,7 +4496,7 @@ to/from theTREXIO
file (except fortrexio_has_
functi if (file == NULL) return TREXIO_INVALID_ARG_1; if (dset_in == NULL) return TREXIO_INVALID_ARG_2; if (max_str_len <= 0) return TREXIO_INVALID_ARG_3; - if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS; + if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; trexio_exit_code rc; int64_t $group_dset_dim$ = 0; @@ -4515,7 +4580,7 @@ to/from theTREXIO
file (except fortrexio_has_
functi if (file == NULL) return TREXIO_INVALID_ARG_1; if (dset_in == NULL) return TREXIO_INVALID_ARG_2; if (max_str_len <= 0) return TREXIO_INVALID_ARG_3; - if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS; + if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_DSET_ALREADY_EXISTS; assert(file->back_end < TREXIO_INVALID_BACK_END); @@ -4580,8 +4645,8 @@ to/from theTREXIO
file (except fortrexio_has_
functi
The Fortran
templates that provide an access to the C
API calls from Fortran
.
@@ -4680,8 +4745,8 @@ These templates are based on the use of iso_c_binding
. Pointers hav
def write_$group_dset$(trexio_file, dset_w: list) -> None:
@@ -4797,12 +4862,12 @@ These templates are based on the use of iso_c_binding
. Pointers hav
This section concerns API calls related to string attributes. @@ -4842,16 +4907,16 @@ This section concerns API calls related to string attributes.
trexio_exit_code @@ -4894,7 +4959,7 @@ This section concerns API calls related to string attributes. if (file == NULL) return TREXIO_INVALID_ARG_1; if (str == NULL) return TREXIO_INVALID_ARG_2; if (max_str_len <= 0) return TREXIO_INVALID_ARG_3; - if (trexio_has_$group_str$(file) == TREXIO_SUCCESS) return TREXIO_ATTR_ALREADY_EXISTS; + if (trexio_has_$group_str$(file) == TREXIO_SUCCESS && file->mode != 'u') return TREXIO_ATTR_ALREADY_EXISTS; size_t len_write = strlen(str); if ((size_t) max_str_len < len_write) return TREXIO_INVALID_STR_LEN; @@ -4955,8 +5020,8 @@ This section concerns API calls related to string attributes.
The Fortran
templates that provide an access to the C
API calls from Fortran.
@@ -5036,8 +5101,8 @@ These templates are based on the use of iso_c_binding
. Pointers hav
def write_$group_str$(trexio_file, str_w: str) -> None:
@@ -5125,10 +5190,127 @@ These templates are based on the use of iso_c_binding
. Pointers hav
+This section concerns API calls related to string attributes. +
+ +Function name | +Description | +
---|---|
trexio_delete_$group$ |
+Delete a given group from the TREXIO file | +
trexio_exit_code +trexio_delete_$group$ (trexio_t* const file) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + if (file->mode != 'u') return TREXIO_SAFE_MODE; + + switch (file->back_end) { + + case TREXIO_TEXT: + return trexio_text_delete_$group$(file); + + case TREXIO_HDF5: +#ifdef HAVE_HDF5 + return trexio_hdf5_delete_$group$(file); +#else + return TREXIO_BACK_END_MISSING; +#endif +/* + case TREXIO_JSON: + return trexio_json_delete_$group$(file); + break; +*/ + } + + return TREXIO_FAILURE; +} ++
+The Fortran
templates that provide an access to the C
API calls from Fortran.
+These templates are based on the use of iso_c_binding
. Pointers have to be passed by value.
+
interface + integer(trexio_exit_code) function trexio_delete_$group$ (trex_file) bind(C) + use, intrinsic :: iso_c_binding + import + integer(trexio_t), intent(in), value :: trex_file + end function trexio_delete_$group$ +end interface ++
def delete_$group$(trexio_file) -> None: + """Delete the entire $group$ group from the TREXIO file. + + Parameters: + + trexio_file: + TREXIO File object. + + Raises: + - Exception from AssertionError if TREXIO return code ~rc~ is different from TREXIO_SUCCESS and prints the error message using trexio_string_of_error. + - Exception from some other error (e.g. RuntimeError). + """ + + rc = pytr.trexio_delete_$group$(trexio_file.pytrexio_s) + if rc != TREXIO_SUCCESS: + raise Error(rc) ++
This section contains general helper functions like trexio_info
.
@@ -5145,10 +5327,18 @@ In particular:
HDF5_VERSION
[string] (optional, only if HAVE_HDF5
is true
)TREXIO_GIT_HASH
[string]
+trexio_mark_safety
checks if the file has been open in UNSAFE mode.
+If it was, the metadata_unsafe
attribute can be overwritten with the value provided in a second argument of the function.
+Since metadata_unsafe
is set to 1
(true
) upon the first opening of the file in UNSAFE mode, this value is immutable.
+However, if the user validated that the file is correct (e.g. using trexio-tools
),
+then value of the metadata_unsafe
attribute can be changed using the aforementioned function.
+
trexio_exit_code
@@ -5174,11 +5364,27 @@ In particular:
}
trexio_exit_code +trexio_mark_safety (trexio_t* const file, const int32_t safety_flag) +{ + + if (file == NULL) return TREXIO_INVALID_ARG_1; + /* 1 for true ; 0 for false */ + if (safety_flag != 0 && safety_flag != 1) return TREXIO_INVALID_ARG_2; + /* Cannot mark the file in safe mode */ + if (file->mode != 'u') return TREXIO_FAILURE; + + return trexio_write_metadata_unsafe(file, safety_flag); +} ++
interface
@@ -5191,8 +5397,8 @@ In particular:
def info(): @@ -5208,8 +5414,8 @@ In particular:
The function below adapts the original C-based trexio_open
for Fortran.
@@ -5361,7 +5567,7 @@ two code are identical, i.e. if the assert
statement pass.
#define $GROUP$_GROUP_NAME "$group$" #define $GROUP_NUM$_NAME "$group_num$" @@ -345,9 +338,9 @@ for the JavaScript code in this tag.
typedef struct trexio_hdf5_s { trexio_t parent ; @@ -359,9 +352,9 @@ for the JavaScript code in this tag.
trexio_exit_code trexio_hdf5_inquire(const char* file_name) @@ -399,6 +392,7 @@ for the JavaScript code in this tag. // reading the existing file -> open as RDONLY f->file_id = H5Fopen(file->file_name, H5F_ACC_RDONLY, H5P_DEFAULT); break; + case 'u': case 'w': // writing the existing file -> open as RDWRITE f->file_id = H5Fopen(file->file_name, H5F_ACC_RDWR, H5P_DEFAULT); @@ -411,6 +405,7 @@ for the JavaScript code in this tag. case 'r': // reading non-existing file -> error return TREXIO_FAILURE; + case 'u': case 'w': // writing non-existing file -> create it f->file_id = H5Fcreate(file->file_name, H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT); @@ -424,6 +419,7 @@ for the JavaScript code in this tag. case 'r': f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); break; + case 'u': case 'w': if (f_exists == 1) { f->$group$_group = H5Gopen(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); @@ -460,9 +456,9 @@ for the JavaScript code in this tag.
trexio_exit_code trexio_hdf5_read_$group_num$ (trexio_t* const file, $group_num_dtype_double$* const num) @@ -501,29 +497,38 @@ for the JavaScript code in this tag. trexio_hdf5_t* const f = (trexio_hdf5_t*) file; - /* Write the dimensioning variables */ - const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); - const hid_t dspace = H5Screate(H5S_SCALAR); + /* Delete the attribute if it exists and if the file is open in UNSAFE mode */ + if (trexio_hdf5_has_$group_num$(file) == TREXIO_SUCCESS && file->mode == 'u') { + herr_t status_del = H5Adelete(f->$group$_group, $GROUP_NUM$_NAME); + if (status_del < 0) return TREXIO_FAILURE; + } - const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, - dtype, dspace, H5P_DEFAULT, H5P_DEFAULT); - if (num_id <= 0) { - H5Sclose(dspace); - H5Tclose(dtype); + /* Setup the dataspace */ + const hid_t dtype_id = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$); + if (dtype_id <= 0) return TREXIO_INVALID_ID; + + const hid_t dspace_id = H5Screate(H5S_SCALAR); + if (dspace_id <= 0) { + H5Tclose(dtype_id); return TREXIO_INVALID_ID; } - const herr_t status = H5Awrite(num_id, dtype, &(num)); - if (status < 0) { - H5Aclose(num_id); - H5Sclose(dspace); - H5Tclose(dtype); - return TREXIO_FAILURE; + const hid_t num_id = H5Acreate(f->$group$_group, + $GROUP_NUM$_NAME, + dtype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT); + if (num_id <= 0) { + H5Sclose(dspace_id); + H5Tclose(dtype_id); + return TREXIO_INVALID_ID; } - H5Sclose(dspace); + const herr_t status = H5Awrite(num_id, dtype_id, &num); + + H5Sclose(dspace_id); H5Aclose(num_id); - H5Tclose(dtype); + H5Tclose(dtype_id); + if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; } @@ -555,9 +560,9 @@ for the JavaScript code in this tag.
trexio_exit_code trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $group_dset$, const uint32_t rank, const uint64_t* dims) @@ -623,30 +628,41 @@ for the JavaScript code in this tag. trexio_hdf5_t* f = (trexio_hdf5_t*) file; - if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { - - const herr_t status = H5LTmake_dataset(f->$group$_group, - $GROUP_DSET$_NAME, - (int) rank, (const hsize_t*) dims, - H5T_$GROUP_DSET_H5_DTYPE$, - $group_dset$); - if (status < 0) return TREXIO_FAILURE; - - } else { - - hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; - - const herr_t status = H5Dwrite(dset_id, - H5T_$GROUP_DSET_H5_DTYPE$, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - $group_dset$); - - H5Dclose(dset_id); - if (status < 0) return TREXIO_FAILURE; + /* + Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). + NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, + thus reducing the size of the HDF5 file. In practic, this is not always the case. + Consider using HDF5-native h5repack utility after deleting/overwriting big datasets. + */ + if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') { + herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; } + hid_t dspace_id = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); + if (dspace_id <= 0) return TREXIO_INVALID_ID; + + hid_t dset_id = H5Dcreate (f->$group$_group, + $GROUP_DSET$_NAME, + H5T_$GROUP_DSET_H5_DTYPE$, + dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) { + H5Sclose(dspace_id); + return TREXIO_INVALID_ID; + } + + herr_t status = H5Dwrite(dset_id, + H5T_$GROUP_DSET_H5_DTYPE$, + H5S_ALL, + dspace_id, + H5P_DEFAULT, + $group_dset$); + H5Dclose(dset_id); + H5Sclose(dspace_id); + if (status < 0) return TREXIO_FAILURE; + return TREXIO_SUCCESS; } @@ -678,9 +694,9 @@ for the JavaScript code in this tag.
Sparse data is stored using extensible datasets of HDF5. Extensibility is required due to the fact that the sparse data will be written in chunks of user-defined size. @@ -880,9 +896,9 @@ due to the fact that the sparse data will be written in chunks of user-defined s
trexio_exit_code trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, const uint32_t rank, const uint64_t* dims, const uint32_t max_str_len) @@ -1000,6 +1016,18 @@ due to the fact that the sparse data will be written in chunks of user-defined s trexio_hdf5_t* f = (trexio_hdf5_t*) file; + /* + Try to delete an existing dataset by unlinking it from the group (UNSAFE mode). + NOTE: In principle, HDF5 should see the deallocated (unused) file space and free it, + thus reducing the size of the HDF5 file. In practic, this is not always the case. + + Consider using HDF5-provided h5repack utility after deleting/overwriting big datasets. + */ + if (H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) == 1 && file->mode == 'u') { + herr_t status_del = H5Ldelete(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); + if (status_del < 0) return TREXIO_FAILURE; + } + herr_t status; hid_t dset_id; @@ -1010,49 +1038,30 @@ due to the fact that the sparse data will be written in chunks of user-defined s status = H5Tset_size (memtype, H5T_VARIABLE); if (status < 0) return TREXIO_FAILURE; - if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) { + hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); + if (dspace <= 0) return TREXIO_INVALID_ID; - /* code to create dataset */ - hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); - if (filetype <= 0) return TREXIO_INVALID_ID; + /* code to create dataset */ + hid_t filetype = H5Tcopy (H5T_FORTRAN_S1); + if (filetype <= 0) return TREXIO_INVALID_ID; - status = H5Tset_size (filetype, H5T_VARIABLE); - if (status < 0) return TREXIO_FAILURE; + status = H5Tset_size (filetype, H5T_VARIABLE); + if (status < 0) return TREXIO_FAILURE; - hid_t dspace = H5Screate_simple( (int) rank, (const hsize_t*) dims, NULL); - if (dspace <= 0) return TREXIO_INVALID_ID; + dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id <= 0) return TREXIO_INVALID_ID; - dset_id = H5Dcreate (f->$group$_group, $GROUP_DSET$_NAME, filetype, dspace, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; + status = H5Dwrite (dset_id, memtype, + H5S_ALL, H5S_ALL, H5P_DEFAULT, + $group_dset$); - status = H5Dwrite (dset_id, memtype, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - $group_dset$); + H5Dclose (dset_id); + H5Sclose (dspace); + H5Tclose (filetype); + H5Tclose (memtype); - H5Dclose (dset_id); - H5Sclose (dspace); - H5Tclose (filetype); - H5Tclose (memtype); - - if (status < 0) return TREXIO_FAILURE; - - } else { - - dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME, H5P_DEFAULT); - if (dset_id <= 0) return TREXIO_INVALID_ID; - - /* code to write dataset */ - status = H5Dwrite(dset_id, memtype, - H5S_ALL, H5S_ALL, H5P_DEFAULT, - $group_dset$); - - H5Dclose(dset_id); - H5Tclose(memtype); - - if (status < 0) return TREXIO_FAILURE; - - } + if (status < 0) return TREXIO_FAILURE; return TREXIO_SUCCESS; @@ -1085,9 +1094,9 @@ due to the fact that the sparse data will be written in chunks of user-defined s
trexio_exit_code trexio_hdf5_read_$group_str$ (trexio_t* const file, char* const str, const uint32_t max_str_len) @@ -1140,8 +1149,13 @@ due to the fact that the sparse data will be written in chunks of user-defined s trexio_hdf5_t* const f = (trexio_hdf5_t*) file; + /* Delete the attribute if it exists and if the file is open in UNSAFE mode */ + if (trexio_hdf5_has_$group_str$(file) == TREXIO_SUCCESS && file->mode == 'u') { + herr_t status_del = H5Adelete(f->$group$_group, $GROUP_STR$_NAME); + if (status_del < 0) return TREXIO_FAILURE; + } - /* Setup the dataspace */ + /* Setup the datatype for variable length string */ const hid_t dtype_id = H5Tcopy(H5T_C_S1); if (dtype_id <= 0) return TREXIO_INVALID_ID; @@ -1154,12 +1168,15 @@ due to the fact that the sparse data will be written in chunks of user-defined s status = H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); if (status < 0) return TREXIO_FAILURE; + /* Setup the dataspace */ const hid_t dspace_id = H5Screate(H5S_SCALAR); if (dspace_id <= 0) return TREXIO_INVALID_ID; /* Create the $group_str$ attribute of $group$ group */ - const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id, - H5P_DEFAULT, H5P_DEFAULT); + const hid_t str_id = H5Acreate(f->$group$_group, + $GROUP_STR$_NAME, + dtype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT); if (str_id <= 0) { H5Sclose(dspace_id); @@ -1168,18 +1185,14 @@ due to the fact that the sparse data will be written in chunks of user-defined s } status = H5Awrite(str_id, dtype_id, str); - if (status < 0) { - H5Aclose(str_id); - H5Sclose(dspace_id); - H5Tclose(dtype_id); - return TREXIO_FAILURE; - } H5Aclose(str_id); H5Sclose(dspace_id); H5Tclose(dtype_id); - return TREXIO_SUCCESS; + if (status < 0) return TREXIO_FAILURE; + + return TREXIO_SUCCESS; }
+Note: in early versions of the HDF5 library (v < 1.10) unlinking an object was not working as expected +and the associated memory was not necessarily freed (see this StackOverflow discussion for example). +Nevertheless, some space might remain occupied even after deleting the associated object in recent version. +To take the best use of the deleted file space, we recommend to write the deleted group within the same session +(i.e. before closing the TREXIO file). +
+ +
+In principle, one can use HDF5-provided h5repack
binary, which copies all existing objects from one file into another.
+Thus, any corrupted/lost file space will remain in the first file. The use of h5repack
is highly encouraged.
+
trexio_exit_code +trexio_hdf5_delete_$group$ (trexio_t* const file) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + + trexio_hdf5_t* f = (trexio_hdf5_t*) file; + + // delete the link to the existing group: this should free the associated space + H5Gclose(f->$group$_group); + f->$group$_group = 0; + herr_t status = H5Ldelete(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT); + if (status < 0) return TREXIO_FAILURE; + + // re-create the group (with the new link ?) + f->$group$_group = H5Gcreate(f->file_id, $GROUP$_GROUP_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (f->$group$_group <= 0L) return TREXIO_INVALID_ID; + + return TREXIO_SUCCESS; +} ++
trexio_exit_code trexio_hdf5_create_write_dset_sparse (const hid_t group_id, @@ -1468,10 +1522,9 @@ due to the fact that the sparse data will be written in chunks of user-defined s
The "file" produced by the text back end is a directory with one @@ -353,8 +354,8 @@ The file is written when closed, or when the flush function is called.
typedef struct $group$_s { @@ -373,8 +374,8 @@ The file is written when closed, or when the flush function is called.
typedef struct trexio_text_s { @@ -387,8 +388,8 @@ The file is written when closed, or when the flush function is called.
trexio_exit_code
@@ -531,8 +532,8 @@ The file is written when closed, or when the flush function is called.
trexio_exit_code
@@ -554,8 +555,8 @@ The file is written when closed, or when the flush function is called.
$group$_t* @@ -847,8 +848,8 @@ trexio_text_read_$group$ (trexio_text_t*
trexio_exit_code @@ -860,12 +861,11 @@ trexio_text_read_$group$ (trexio_text_t*if (file->parent.mode == 'r') return TREXIO_READONLY; $group$_t* $group$ = file->$group$; - if ($group$ == NULL) return TREXIO_SUCCESS; if ($group$->to_flush == 0) return TREXIO_SUCCESS; - assert (file->parent.mode == 'w'); + assert (file->parent.mode == 'w' || file->parent.mode == 'u'); FILE* f = fopen($group$->file_name, "w"); if (f == NULL) return TREXIO_INVALID_ARG_1; @@ -913,8 +913,8 @@ trexio_text_read_$group$ (trexio_text_t*
Memory is allocated when reading. The following function frees memory. @@ -951,6 +951,8 @@ Memory is allocated when reading. The following function frees memory. // END REPEAT GROUP_ATTR_STR FREE ($group$); + file->$group$ = NULL; + return TREXIO_SUCCESS; } @@ -959,8 +961,8 @@ Memory is allocated when reading. The following function frees memory.
trexio_exit_code
@@ -1023,8 +1025,8 @@ Memory is allocated when reading. The following function frees memory.
The group_dset
array is assumed allocated with the appropriate size.
@@ -1120,8 +1122,8 @@ The group_dset
array is assumed allocated with the appropriate size
The group_dset
array is assumed allocated with the appropriate size.
@@ -1222,8 +1224,8 @@ The group_dset
array is assumed allocated with the appropriate size
trexio_exit_code
@@ -1297,8 +1299,8 @@ The group_dset
array is assumed allocated with the appropriate size
Each sparse array is stored in a separate .txt
file due to the fact that sparse I/O has to be decoupled
@@ -1571,11 +1573,40 @@ User provides indices and values of the sparse array as two separate variables.
trexio_exit_code +trexio_text_delete_$group$ (trexio_t* const file) +{ + if (file == NULL) return TREXIO_INVALID_ARG_1; + + trexio_text_t* f = (trexio_text_t*) file; + + $group$_t* $group$ = trexio_text_read_$group$(f); + if ($group$ == NULL) return TREXIO_FAILURE; + + int rc = remove($group$->file_name); + if (rc == -1) return TREXIO_FAILURE; + + $group$->to_flush = 0; + + trexio_exit_code rc_free = trexio_text_free_$group$(f); + if (rc_free != TREXIO_SUCCESS) return rc_free; + + return TREXIO_SUCCESS; +} ++
As we expect our files to be archived in open-data repositories, we @@ -420,7 +420,7 @@ which have participated to the creation of the file, a list of authors of the file, and a textual description.
-Text describing the content of file | + +|||
unsafe |
+int |
++ | 1 : true, 0 : false |
+
+Note: unsafe
attribute of the metadata
group indicates whether the file has been previously opened with 'u'
mode.
+It is automatically written in the file upon the first unsafe opening.
+If the user has checked that the TREXIO file is valid (e.g. using trexio-tools
) after unsafe operations,
+then the unsafe
attribute value can be manually overwritten (in unsafe mode) from 1
to 0
.
+
We consider wave functions expressed in the spin-free formalism, where the number of ↑ and ↓ electrons is fixed.
-For example, consider H2 with the following basis set (in GAMESS @@ -1032,8 +1046,8 @@ prim_factor =
Going from the atomic basis set to AOs implies a systematic @@ -1086,13 +1100,13 @@ shell, as in the GAMESS convention where
In such a case, one should set the normalization of the shell (in -the Basis set section) to \(\mathcal{N}_{z^2}\), which is the +the Basis set section) to \(\mathcal{N}_{z^2}\), which is the normalization factor of the atomic orbitals in spherical coordinates. The normalization factor of the \(xy\) function which should be introduced here should be \(\frac{\mathcal{N}_{xy}}{\mathcal{N}_{z^2}}\).
-