1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-12-22 20:35:44 +01:00

add functions to read the number of stored sparse elements

This commit is contained in:
q-posev 2021-12-02 17:28:37 +01:00
parent acdf982a94
commit d2c95cd306
2 changed files with 156 additions and 16 deletions

View File

@ -188,10 +188,16 @@ __trexio_path__ = None
| ~TREXIO_DSET_MISSING~ | 25 | 'Dataset does not exist in the file' |
| ~TREXIO_BACK_END_MISSING~ | 26 | 'Requested back end is disabled' |
| ~TREXIO_INVALID_STR_LEN~ | 30 | 'Invalid max_str_len' |
| ~TREXIO_INT_SIZE_OVERFLOW~ | 31 | 'Possible integer overflow' |
# We need to force Emacs not to indent the Python code:
# -*- org-src-preserve-indentation: t
*IMPORTANT!*
The code below has to be executed within Emacs each time
a new error code is added to the table above. Otherwise, the codes
and the corresponding message are not propagated to the source code.
#+begin_src python :var table=table-exit-codes :results drawer
""" This script generates the C and Fortran constants for the error
codes from the org-mode table.
@ -257,6 +263,7 @@ return '\n'.join(result)
#define TREXIO_DSET_MISSING ((trexio_exit_code) 25)
#define TREXIO_BACK_END_MISSING ((trexio_exit_code) 26)
#define TREXIO_INVALID_STR_LEN ((trexio_exit_code) 30)
#define TREXIO_INT_SIZE_OVERFLOW ((trexio_exit_code) 31)
#+end_src
#+begin_src f90 :tangle prefix_fortran.f90 :exports none
@ -289,6 +296,7 @@ return '\n'.join(result)
integer(trexio_exit_code), parameter :: TREXIO_DSET_MISSING = 25
integer(trexio_exit_code), parameter :: TREXIO_BACK_END_MISSING = 26
integer(trexio_exit_code), parameter :: TREXIO_INVALID_STR_LEN = 30
integer(trexio_exit_code), parameter :: TREXIO_INT_SIZE_OVERFLOW = 31
#+end_src
#+begin_src python :tangle prefix_python.py :exports none
@ -322,6 +330,7 @@ return '\n'.join(result)
TREXIO_DSET_MISSING = 25
TREXIO_BACK_END_MISSING = 26
TREXIO_INVALID_STR_LEN = 30
TREXIO_INT_SIZE_OVERFLOW = 31
#+end_src
:END:
@ -342,7 +351,10 @@ const char* trexio_string_of_error(const trexio_exit_code error);
void trexio_string_of_error_f(const trexio_exit_code error, char result[<<MAX_STRING_LENGTH()>>]);
#+end_src
The text strings are extracted from the previous table.
*IMPORTANT!*
The code below has to be executed within Emacs each time
a new error code is added to the table above. Otherwise, the codes
and the corresponding message are not propagated to the source code.
#+NAME:cases
#+begin_src python :var table=table-exit-codes :exports none :noweb yes
@ -442,9 +454,15 @@ return '\n'.join(result)
case TREXIO_DSET_MISSING:
return "Dataset does not exist in the file";
break;
case TREXIO_BACK_END_MISSING:
return "Requested back end is disabled";
break;
case TREXIO_INVALID_STR_LEN:
return "Invalid max_str_len";
break;
case TREXIO_INT_SIZE_OVERFLOW:
return "Possible integer overflow";
break;
#+end_example
**** C source code
@ -2397,6 +2415,7 @@ def has_$group_dset$(trexio_file) -> bool:
#+begin_src c :tangle hrw_dset_sparse_front.h :exports none
trexio_exit_code trexio_has_$group_sparse_dset$(trexio_t* const file);
trexio_exit_code trexio_read_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_read_$group_sparse_dset$_size(trexio_t* const file, int64_t* const size_max);
trexio_exit_code trexio_write_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int32_t* index_sparse, const double* value_sparse);
#+end_src
@ -2416,18 +2435,16 @@ trexio_read_$group_sparse_dset$(trexio_t* const file,
if (buffer_size <= 0L) return TREXIO_INVALID_ARG_3;
if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
if (trexio_has_$group_sparse_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices
int64_t size_max; // Max number of integrals (already in the file)
trexio_exit_code rc;
// temporary
size_max = buffer_size;
/* TODO
rc = trexio_read_$group_sparse_dset$_num(file, &size_max);
/* Read the max number of integrals stored in the file */
rc = trexio_read_$group_sparse_dset$_size(file, &size_max);
if (rc != TREXIO_SUCCESS) return rc;
*/
switch (file->back_end) {
@ -2446,13 +2463,46 @@ trexio_read_$group_sparse_dset$(trexio_t* const file,
case TREXIO_JSON:
return trexio_json_read_$group_sparse_dset$(...);
break;
*/
,*/
default:
return TREXIO_FAILURE; /* Impossible case */
}
}
#+end_src
#+begin_src c :tangle read_dset_sparse_size_front.c
trexio_exit_code
trexio_read_$group_sparse_dset$_size(trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (trexio_has_$group_sparse_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_read_$group_sparse_dset$_size(file, size_max);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_read_$group_sparse_dset$_size(file, size_max);
break;
#else
return TREXIO_BACK_END_MISSING;
#endif
/*
case TREXIO_JSON:
return trexio_json_read_$group_sparse_dset$_size(...);
break;
,*/
default:
return TREXIO_FAILURE; /* Impossible case */
}
}
#+end_src
#+begin_src c :tangle write_dset_sparse_front.c
trexio_exit_code
trexio_write_$group_sparse_dset$(trexio_t* const file,
@ -2470,15 +2520,17 @@ trexio_write_$group_sparse_dset$(trexio_t* const file,
const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices
int64_t size_max; // Max number of integrals (already in the file)
int64_t size_max=0L; // Max number of integrals (already in the file)
trexio_exit_code rc;
// temporary
size_max = buffer_size;
/* TODO
rc = trexio_read_$group_sparse_dset$_num(file, &size_max);
if (rc != TREXIO_SUCCESS) return rc;
*/
/* Read the max number of integrals stored in the file */
rc = trexio_has_$group_sparse_dset$(file);
if (rc == TREXIO_SUCCESS) {
rc = trexio_read_$group_sparse_dset$_size(file, &size_max);
if (rc != TREXIO_SUCCESS) return rc;
} else {
size_max = 0L;
}
switch (file->back_end) {
@ -2573,6 +2625,17 @@ interface
end interface
#+end_src
#+begin_src f90 :tangle read_dset_sparse_size_front_fortran.f90
interface
integer function trexio_read_$group_sparse_dset$_size (trex_file, &
size_max) bind(C)
use, intrinsic :: iso_c_binding
integer(8), intent(in), value :: trex_file
integer(8), intent(out) :: size_max
end function trexio_read_$group_sparse_dset$_size
end interface
#+end_src
#+begin_src f90 :tangle has_dset_sparse_front_fortran.f90
interface
integer function trexio_has_$group_sparse_dset$ (trex_file) bind(C)

View File

@ -1006,9 +1006,11 @@ trexio_text_has_$group_str$ (trexio_t* const file)
#+begin_src c :tangle hrw_dset_sparse_text.h :exports none
trexio_exit_code trexio_text_has_$group_sparse_dset$(trexio_t* const file);
trexio_exit_code trexio_text_read_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_text_read_$group_sparse_dset$_size(trexio_t* const file, int64_t* const size_max);
trexio_exit_code trexio_text_write_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
#+end_src
#+begin_src c :tangle write_dset_sparse_text.c
trexio_exit_code trexio_text_write_$group_sparse_dset$(trexio_t* const file,
const int64_t offset_file,
@ -1035,10 +1037,13 @@ trexio_exit_code trexio_text_write_$group_sparse_dset$(trexio_t* const file,
FILE* f = fopen(file_full_path, "a");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Get the starting position of the IO stream to be written in the .size file */
int64_t io_start_pos = (int64_t) ftell(f);
/* Specify the line length in order to offset properly. For example, for 4-index quantities
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
CURRENTLY NO OFFSET IS USED WHEN WRITING !
*/
,*/
const uint64_t line_length = $group_sparse_dset_line_length$L;
//fseek(f, (long) offset_file * line_length, SEEK_SET);
@ -1061,6 +1066,27 @@ trexio_exit_code trexio_text_write_$group_sparse_dset$(trexio_t* const file,
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
/* Append .size to the file_full_path in order to write additional info about the written buffer of data */
strncat(file_full_path, ".size", 6);
/* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
FILE *f_wSize = fopen(file_full_path, "a");
if (f_wSize == NULL) return TREXIO_FILE_ERROR;
/* Write the buffer_size */
rc = fprintf(f_wSize, "%ld %ld\n", size, io_start_pos);
if (rc <= 0) {
fclose(f_wSize);
return TREXIO_FAILURE;
}
/* Close the TXT file */
rc = fclose(f_wSize);
if(rc != 0) return TREXIO_FILE_ERROR;
/* Exit upon success */
return TREXIO_SUCCESS;
}
#+end_src
@ -1078,7 +1104,7 @@ trexio_exit_code trexio_text_read_$group_sparse_dset$(trexio_t* const file,
/* Build the name of the file with sparse data.
The $group_sparse_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
,*/
*/
const char $group_sparse_dset$_file_name[256] = "/$group_sparse_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
@ -1130,6 +1156,57 @@ trexio_exit_code trexio_text_read_$group_sparse_dset$(trexio_t* const file,
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_text.c
trexio_exit_code trexio_text_read_$group_sparse_dset$_size(trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
/* Build the name of the file with sparse data.
The $group_sparse_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
,*/
const char $group_sparse_dset$_file_name[256] = "/$group_sparse_dset$.txt.size";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_sparse_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_sparse_dset$_file_name));
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "r");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
int rc;
int64_t size_item, offset_item, size_accum=0L;
/* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
while(fscanf(f, "%ld %ld", &size_item, &offset_item) != EOF) {
/* Check that summation will not overflow the int64_t value */
if (INT64_MAX - size_accum > size_item) {
size_accum += size_item;
} else {
fclose(f);
*size_max = -1L;
return TREXIO_INT_SIZE_OVERFLOW;
}
}
/* Close the TXT file */
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
/* Overwrite the value at the input address and return TREXIO_SUCCESS */
*size_max = size_accum;
return TREXIO_SUCCESS;
}
#+end_src