1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2025-01-03 10:06:01 +01:00

add memory-safe API for numerical datasets

This commit is contained in:
q-posev 2021-07-28 10:19:00 +02:00
parent 9f4176a8b6
commit da5a990acc
3 changed files with 471 additions and 46 deletions

View File

@ -152,6 +152,7 @@ typedef int32_t trexio_exit_code;
| ~TREXIO_GROUP_WRITE_ERROR~ | 20 | 'Error writing group' |
| ~TREXIO_ELEM_READ_ERROR~ | 21 | 'Error reading element' |
| ~TREXIO_ELEM_WRITE_ERROR~ | 22 | 'Error writing element' |
| ~TREXIO_UNSAFE_ARRAY_DIM~ | 23 | 'Access to memory beyond allocated' |
| ~TREXIO_INVALID_STR_LEN~ | 30 | 'Invalid max_str_len' |
# We need to force Emacs not to indent the Python code:
@ -182,8 +183,8 @@ return '\n'.join(result)
#+RESULTS:
:results:
#+begin_src c :tangle prefix_front.h
:RESULTS:
#+begin_src c :tangle prefix_front.h :exports none
#define TREXIO_FAILURE -1 //((trexio_exit_code) -1)
#define TREXIO_SUCCESS 0 //((trexio_exit_code) 0)
#define TREXIO_INVALID_ARG_1 1 //((trexio_exit_code) 1)
@ -208,10 +209,11 @@ return '\n'.join(result)
#define TREXIO_GROUP_WRITE_ERROR 20 //((trexio_exit_code) 20)
#define TREXIO_ELEM_READ_ERROR 21 //((trexio_exit_code) 21)
#define TREXIO_ELEM_WRITE_ERROR 22 //((trexio_exit_code) 22)
#define TREXIO_UNSAFE_ARRAY_DIM 23 //((trexio_exit_code) 23)
#define TREXIO_INVALID_STR_LEN 30 //((trexio_exit_code) 30)
#+end_src
#+begin_src f90 :tangle prefix_fortran.f90
#+begin_src f90 :tangle prefix_fortran.f90 :exports none
integer(trexio_exit_code), parameter :: TREXIO_FAILURE = -1
integer(trexio_exit_code), parameter :: TREXIO_SUCCESS = 0
integer(trexio_exit_code), parameter :: TREXIO_INVALID_ARG_1 = 1
@ -236,9 +238,10 @@ return '\n'.join(result)
integer(trexio_exit_code), parameter :: TREXIO_GROUP_WRITE_ERROR = 20
integer(trexio_exit_code), parameter :: TREXIO_ELEM_READ_ERROR = 21
integer(trexio_exit_code), parameter :: TREXIO_ELEM_WRITE_ERROR = 22
integer(trexio_exit_code), parameter :: TREXIO_UNSAFE_ARRAY_DIM = 23
integer(trexio_exit_code), parameter :: TREXIO_INVALID_STR_LEN = 30
#+end_src
:end:
:END:
The ~trexio_string_of_error~ converts an exit code into a string. The
string is assumed to be large enough to contain the error message
@ -269,9 +272,7 @@ result = []
for (text, code, message) in table:
text = text.replace("~","")
message = message.replace("'",'"')
result += [ f"""case {text}:
return {message};
break;""" ]
result += [ f"""case {text}:\n return {message};\n break;""" ]
return '\n'.join(result)
#+end_src
@ -320,8 +321,11 @@ return '\n'.join(result)
case TREXIO_INVALID_NUM:
return "Invalid dimensions";
break;
case TREXIO_NUM_ALREADY_EXISTS:
return "Variable already exists";
case TREXIO_ATTR_ALREADY_EXISTS:
return "Attribute (num/str) already exists";
break;
case TREXIO_DSET_ALREADY_EXISTS:
return "Dataset already exists";
break;
case TREXIO_OPEN_ERROR:
return "Error opening file";
@ -347,6 +351,12 @@ return '\n'.join(result)
case TREXIO_ELEM_WRITE_ERROR:
return "Error writing element";
break;
case TREXIO_UNSAFE_ARRAY_DIM:
return "Access to memory beyond allocated";
break;
case TREXIO_INVALID_STR_LEN:
return "Invalid max_str_len";
break;
#+end_example
# Source
@ -1114,14 +1124,16 @@ end interface
This section concerns API calls related to datasets.
| Function name | Description | Precision |
|--------------------------------+-------------------------------------+-----------|
|----------------------------------+--------------------------------------+-----------------------------|
| ~trexio_has_$group_dset$~ | Check if a dataset exists in a file | --- |
| ~trexio_read_$group_dset$~ | Read a dataset | Double |
| ~trexio_write_$group_dset$~ | Write a dataset | Double |
| ~trexio_read_$group_dset$_32~ | Read a dataset | Single |
| ~trexio_write_$group_dset$_32~ | Write a dataset | Single |
| ~trexio_read_$group_dset$_64~ | Read a dataset | Double |
| ~trexio_write_$group_dset$_64~ | Write a dataset | Double |
| ~trexio_read_$group_dset$~ | Read a dataset in default precision | Double/Single for float/int |
| ~trexio_write_$group_dset$~ | Write a dataset in default precision | Double/Single for float/int |
| ~trexio_read_safe_$group_dset$~ | Read a bounded dataset | Double |
| ~trexio_write_safe_$group_dset$~ | Write a bounded dataset | Double |
| ~trexio_read_$group_dset$_32~ | Read a dataset in single precision | Single |
| ~trexio_write_$group_dset$_32~ | Write a dataset in single precision | Single |
| ~trexio_read_$group_dset$_64~ | Read a dataset in double precision | Double |
| ~trexio_write_$group_dset$_64~ | Write a dataset in double precision | Double |
*** C templates for front end
@ -1131,6 +1143,8 @@ end interface
Suffixes ~_32~ and ~_64~ correspond to API calls dealing with single and double precision, respectively.
The basic (non-suffixed) API call on datasets deals with double precision (see Table above).
**** Function declarations
#+begin_src c :tangle hrw_dset_data_front.h :exports none
trexio_exit_code trexio_has_$group_dset$(trexio_t* const file);
@ -1140,8 +1154,13 @@ trexio_exit_code trexio_read_$group_dset$_32(trexio_t* const file, $group_dset_d
trexio_exit_code trexio_write_$group_dset$_32(trexio_t* const file, const $group_dset_dtype_single$* $group_dset$);
trexio_exit_code trexio_read_$group_dset$_64(trexio_t* const file, $group_dset_dtype_double$* const $group_dset$);
trexio_exit_code trexio_write_$group_dset$_64(trexio_t* const file, const $group_dset_dtype_double$* $group_dset$);
trexio_exit_code trexio_read_safe_$group_dset$(trexio_t* const file, $group_dset_dtype_default$* const dset_out, const uint64_t dim_out);
trexio_exit_code trexio_write_safe_$group_dset$(trexio_t* const file, const $group_dset_dtype_default$* dset_in, const uint64_t dim_in);
#+end_src
**** Source code for double precision functions
#+begin_src c :tangle read_dset_data_64_front.c
trexio_exit_code
trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* const $group_dset$)
@ -1155,7 +1174,6 @@ trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* co
/* Error handling for this call is added by the generator */
rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
if (rc != TREXIO_SUCCESS) return rc;
if ($group_dset_dim$ == 0L) return TREXIO_INVALID_NUM;
@ -1197,6 +1215,7 @@ trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* co
}
#+end_src
#+begin_src c :tangle write_dset_data_64_front.c
trexio_exit_code
trexio_write_$group_dset$_64 (trexio_t* const file, const $group_dset_dtype_double$* $group_dset$)
@ -1211,7 +1230,6 @@ trexio_write_$group_dset$_64 (trexio_t* const file, const $group_dset_dtype_doub
/* Error handling for this call is added by the generator */
rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
if (rc != TREXIO_SUCCESS) return rc;
if ($group_dset_dim$ == 0L) return TREXIO_INVALID_NUM;
@ -1264,6 +1282,9 @@ trexio_write_$group_dset$_64 (trexio_t* const file, const $group_dset_dtype_doub
}
#+end_src
**** Source code for single precision functions
#+begin_src c :tangle read_dset_data_32_front.c
trexio_exit_code
trexio_read_$group_dset$_32 (trexio_t* const file, $group_dset_dtype_single$* const $group_dset$)
@ -1331,6 +1352,7 @@ trexio_read_$group_dset$_32 (trexio_t* const file, $group_dset_dtype_single$* co
}
#+end_src
#+begin_src c :tangle write_dset_data_32_front.c
trexio_exit_code
trexio_write_$group_dset$_32 (trexio_t* const file, const $group_dset_dtype_single$* $group_dset$)
@ -1397,6 +1419,77 @@ trexio_write_$group_dset$_32 (trexio_t* const file, const $group_dset_dtype_sing
}
#+end_src
**** Source code for memory-safe functions
#+begin_src c :tangle read_dset_data_safe_front.c
trexio_exit_code
trexio_read_safe_$group_dset$ (trexio_t* const file, $group_dset_dtype_default$* const dset_out, const uint64_t dim_out)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset_out == NULL) return TREXIO_INVALID_ARG_2;
trexio_exit_code rc;
int64_t $group_dset_dim$ = 0;
/* Error handling for this call is added by the generator */
rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
if ($group_dset_dim$ == 0L) return TREXIO_INVALID_NUM;
uint32_t rank = $group_dset_rank$;
uint64_t dims[$group_dset_rank$] = {$group_dset_dim_list$};
/* The block below is specific to safe API as it checks the boundaries */
uint64_t dim_size = 1;
for (uint32_t i=0; i<rank; ++i){
dim_size *= dims[i];
}
if (dim_out > dim_size) return TREXIO_UNSAFE_ARRAY_DIM;
/* */
return trexio_read_$group_dset$_$default_prec$(file, dset_out);
}
#+end_src
#+begin_src c :tangle write_dset_data_safe_front.c
trexio_exit_code
trexio_write_safe_$group_dset$ (trexio_t* const file, const $group_dset_dtype_default$* dset_in, const uint64_t dim_in)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset_in == NULL) return TREXIO_INVALID_ARG_2;
if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS;
trexio_exit_code rc;
int64_t $group_dset_dim$ = 0;
/* Error handling for this call is added by the generator */
rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
if ($group_dset_dim$ == 0L) return TREXIO_INVALID_NUM;
uint32_t rank = $group_dset_rank$;
uint64_t dims[$group_dset_rank$] = {$group_dset_dim_list$};
/* The block below is specific to safe API as it checks the boundaries */
uint64_t dim_size = 1;
for (uint32_t i=0; i<rank; ++i){
dim_size *= dims[i];
}
if (dim_in > dim_size) return TREXIO_UNSAFE_ARRAY_DIM;
/* */
return trexio_write_$group_dset$_$default_prec$(file, dset_in);
}
#+end_src
**** Source code for default functions
#+begin_src c :tangle read_dset_data_def_front.c
trexio_exit_code
trexio_read_$group_dset$ (trexio_t* const file, $group_dset_dtype_default$* const $group_dset$)
@ -1405,6 +1498,7 @@ trexio_read_$group_dset$ (trexio_t* const file, $group_dset_dtype_default$* cons
}
#+end_src
#+begin_src c :tangle write_dset_data_def_front.c
trexio_exit_code
trexio_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype_default$* $group_dset$)
@ -1413,6 +1507,7 @@ trexio_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype_default
}
#+end_src
#+begin_src c :tangle has_dset_data_front.c
trexio_exit_code
trexio_has_$group_dset$ (trexio_t* const file)

View File

@ -0,0 +1,165 @@
#include "trexio.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TEST_BACKEND TREXIO_HDF5
#define TREXIO_FILE "test_safe_dset_f.h5"
#define RM_COMMAND "rm -rf " TREXIO_FILE
static int test_write_dset (const char* file_name, const back_end_t backend) {
/* Try to write a dataset with floating point values into the TREXIO file using safe API */
trexio_t* file = NULL;
trexio_exit_code rc;
// parameters to be written
int num = 12;
double coord[36] = {
0.00000000 , 1.39250319 , 0.00000000 ,
-1.20594314 , 0.69625160 , 0.00000000 ,
-1.20594314 , -0.69625160 , 0.00000000 ,
0.00000000 , -1.39250319 , 0.00000000 ,
1.20594314 , -0.69625160 , 0.00000000 ,
1.20594314 , 0.69625160 , 0.00000000 ,
-2.14171677 , 1.23652075 , 0.00000000 ,
-2.14171677 , -1.23652075 , 0.00000000 ,
0.00000000 , -2.47304151 , 0.00000000 ,
2.14171677 , -1.23652075 , 0.00000000 ,
2.14171677 , 1.23652075 , 0.00000000 ,
0.00000000 , 2.47304151 , 0.00000000 ,
};
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'w', backend);
assert (file != NULL);
// write numerical attribute in an empty file
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
/* write numerical dataset with an unsafe dimension
* this should return TREXIO_UNSAFE_ARRAY_DIM indicating
* that access beyong allocated memory is likely to occur */
uint64_t dim_unsafe = num * 12;
rc = trexio_write_safe_nucleus_coord(file, coord, dim_unsafe);
assert (rc == TREXIO_UNSAFE_ARRAY_DIM);
/* write numerical dataset with a safe dimension
* this should return TREXIO_SUCCESS */
uint64_t dim_safe = num * 3;
rc = trexio_write_safe_nucleus_coord(file, coord, dim_safe);
assert (rc == TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_has_dset (const char* file_name, const back_end_t backend) {
/* Try to check the existence of a dataset in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file in 'read' mode
file = trexio_open(file_name, 'r', backend);
assert (file != NULL);
// check that the previously written dataset exists
rc = trexio_has_nucleus_coord(file);
assert (rc == TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset (const char* file_name, const back_end_t backend) {
/* Try to read a dataset with floating point values from the TREXIO file using safe API */
trexio_t* file = NULL;
trexio_exit_code rc;
// parameters to be read
int num;
double* coord;
/*================= START OF TEST ==================*/
// open file in 'read' mode
file = trexio_open(file_name, 'r', backend);
assert (file != NULL);
// read numerical attribute from the file
rc = trexio_read_nucleus_num(file, &num);
assert (rc == TREXIO_SUCCESS);
assert (num == 12);
// read numerical (floating point) dataset from the file
coord = (double*) calloc(3*num, sizeof(double));
/* write numerical dataset with an unsafe dimension
* this should return TREXIO_UNSAFE_ARRAY_DIM indicating
* that access beyong allocated memory is likely to occur */
uint64_t dim_unsafe = num * 12;
rc = trexio_read_safe_nucleus_coord(file, coord, dim_unsafe);
assert (rc == TREXIO_UNSAFE_ARRAY_DIM);
/* write numerical dataset with a safe dimension
* this should return TREXIO_SUCCESS */
uint64_t dim_safe = num * 3;
rc = trexio_read_safe_nucleus_coord(file, coord, dim_safe);
assert (rc == TREXIO_SUCCESS);
double x = coord[30] - 2.14171677;
assert( x*x < 1.e-14 );
free(coord);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
int main(void) {
/*============== Test launcher ================*/
int rc;
rc = system(RM_COMMAND);
assert (rc == 0);
test_write_dset (TREXIO_FILE, TEST_BACKEND);
test_has_dset (TREXIO_FILE, TEST_BACKEND);
test_read_dset (TREXIO_FILE, TEST_BACKEND);
rc = system(RM_COMMAND);
assert (rc == 0);
return 0;
}

View File

@ -0,0 +1,165 @@
#include "trexio.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TEST_BACKEND TREXIO_TEXT
#define TREXIO_FILE "test_safe_dset_f.dir"
#define RM_COMMAND "rm -rf " TREXIO_FILE
static int test_write_dset (const char* file_name, const back_end_t backend) {
/* Try to write a dataset with floating point values into the TREXIO file using safe API */
trexio_t* file = NULL;
trexio_exit_code rc;
// parameters to be written
int num = 12;
double coord[36] = {
0.00000000 , 1.39250319 , 0.00000000 ,
-1.20594314 , 0.69625160 , 0.00000000 ,
-1.20594314 , -0.69625160 , 0.00000000 ,
0.00000000 , -1.39250319 , 0.00000000 ,
1.20594314 , -0.69625160 , 0.00000000 ,
1.20594314 , 0.69625160 , 0.00000000 ,
-2.14171677 , 1.23652075 , 0.00000000 ,
-2.14171677 , -1.23652075 , 0.00000000 ,
0.00000000 , -2.47304151 , 0.00000000 ,
2.14171677 , -1.23652075 , 0.00000000 ,
2.14171677 , 1.23652075 , 0.00000000 ,
0.00000000 , 2.47304151 , 0.00000000 ,
};
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'w', backend);
assert (file != NULL);
// write numerical attribute in an empty file
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
/* write numerical dataset with an unsafe dimension
* this should return TREXIO_UNSAFE_ARRAY_DIM indicating
* that access beyong allocated memory is likely to occur */
uint64_t dim_unsafe = num * 12;
rc = trexio_write_safe_nucleus_coord(file, coord, dim_unsafe);
assert (rc == TREXIO_UNSAFE_ARRAY_DIM);
/* write numerical dataset with a safe dimension
* this should return TREXIO_SUCCESS */
uint64_t dim_safe = num * 3;
rc = trexio_write_safe_nucleus_coord(file, coord, dim_safe);
assert (rc == TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_has_dset (const char* file_name, const back_end_t backend) {
/* Try to check the existence of a dataset in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file in 'read' mode
file = trexio_open(file_name, 'r', backend);
assert (file != NULL);
// check that the previously written dataset exists
rc = trexio_has_nucleus_coord(file);
assert (rc == TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset (const char* file_name, const back_end_t backend) {
/* Try to read a dataset with floating point values from the TREXIO file using safe API */
trexio_t* file = NULL;
trexio_exit_code rc;
// parameters to be read
int num;
double* coord;
/*================= START OF TEST ==================*/
// open file in 'read' mode
file = trexio_open(file_name, 'r', backend);
assert (file != NULL);
// read numerical attribute from the file
rc = trexio_read_nucleus_num(file, &num);
assert (rc == TREXIO_SUCCESS);
assert (num == 12);
// read numerical (floating point) dataset from the file
coord = (double*) calloc(3*num, sizeof(double));
/* write numerical dataset with an unsafe dimension
* this should return TREXIO_UNSAFE_ARRAY_DIM indicating
* that access beyong allocated memory is likely to occur */
uint64_t dim_unsafe = num * 12;
rc = trexio_read_safe_nucleus_coord(file, coord, dim_unsafe);
assert (rc == TREXIO_UNSAFE_ARRAY_DIM);
/* write numerical dataset with a safe dimension
* this should return TREXIO_SUCCESS */
uint64_t dim_safe = num * 3;
rc = trexio_read_safe_nucleus_coord(file, coord, dim_safe);
assert (rc == TREXIO_SUCCESS);
double x = coord[30] - 2.14171677;
assert( x*x < 1.e-14 );
free(coord);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
int main(void) {
/*============== Test launcher ================*/
int rc;
rc = system(RM_COMMAND);
assert (rc == 0);
test_write_dset (TREXIO_FILE, TEST_BACKEND);
test_has_dset (TREXIO_FILE, TEST_BACKEND);
test_read_dset (TREXIO_FILE, TEST_BACKEND);
rc = system(RM_COMMAND);
assert (rc == 0);
return 0;
}