mirror of
https://github.com/TREX-CoE/trexio.git
synced 2024-11-03 20:54:07 +01:00
only shift indices that have been read to the buffer (EOF case)
This commit is contained in:
parent
7afb53be49
commit
9f5ee463e1
@ -2445,19 +2445,19 @@ trexio_read_$group_dset$(trexio_t* const file,
|
||||
/* Read the max number of integrals stored in the file */
|
||||
rc = trexio_read_$group_dset$_size(file, &size_max);
|
||||
if (rc != TREXIO_SUCCESS) return rc;
|
||||
/* Cannot read more data points than there is already in the file */
|
||||
// TODO: YOU CAN AND WILL REACH EOF SO NO PROBLEM
|
||||
if (buffer_size > size_max) return TREXIO_INVALID_ARG_3;
|
||||
|
||||
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
|
||||
uint64_t eof_read_size = 0UL;
|
||||
|
||||
switch (file->back_end) {
|
||||
|
||||
case TREXIO_TEXT:
|
||||
rc = trexio_text_read_$group_dset$(file, offset_file, buffer_size, size_max, index_sparse, value_sparse);
|
||||
rc = trexio_text_read_$group_dset$(file, offset_file, buffer_size, size_max, &eof_read_size, index_sparse, value_sparse);
|
||||
break;
|
||||
|
||||
case TREXIO_HDF5:
|
||||
#ifdef HAVE_HDF5
|
||||
rc = trexio_hdf5_read_$group_dset$(file, offset_file, buffer_size, size_max, index_sparse, value_sparse);
|
||||
rc = trexio_hdf5_read_$group_dset$(file, offset_file, buffer_size, size_max, &eof_read_size, index_sparse, value_sparse);
|
||||
break;
|
||||
#else
|
||||
rc = TREXIO_BACK_END_MISSING;
|
||||
@ -2471,21 +2471,18 @@ trexio_read_$group_dset$(trexio_t* const file,
|
||||
rc = TREXIO_FAILURE; /* Impossible case */
|
||||
}
|
||||
|
||||
if (rc != TREXIO_SUCCESS) return rc;
|
||||
if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc;
|
||||
|
||||
// shift indices to be one-based if Fortran API is used
|
||||
// TODO :
|
||||
// THIS WILL SHIFT ALL INDICES, HOWEVER IF EOF IS ENCOUNTERED THIS IS NOT DESIRABLE
|
||||
// WE CAN MODIFY BY ADDRESS AND INT VALUE INDICATING THE NUMBER OF INTEGRALS READ BEFORE EOF
|
||||
// AND ONLY SHIFT THEM !
|
||||
if (file->one_based) {
|
||||
uint64_t index_size = 4L*buffer_size;
|
||||
// if EOF is reached - shift only indices that have been read, not an entire buffer
|
||||
uint64_t index_size = (rc == TREXIO_END) ? (4UL*eof_read_size) : (4UL*buffer_size) ;
|
||||
for (uint64_t i=0; i<index_size; ++i){
|
||||
index_sparse[i] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
@ -2551,7 +2548,7 @@ trexio_write_$group_dset$(trexio_t* const file,
|
||||
// shift indices to be zero-based if Fortran API is used
|
||||
if (file->one_based) {
|
||||
|
||||
uint64_t index_size = 4L*buffer_size;
|
||||
uint64_t index_size = 4UL*buffer_size;
|
||||
index_sparse_p = CALLOC(index_size, int32_t);
|
||||
if (index_sparse_p == NULL) return TREXIO_ALLOCATION_FAILED;
|
||||
|
||||
|
@ -379,7 +379,7 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||
|
||||
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
|
||||
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, uint64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||
#+end_src
|
||||
@ -483,11 +483,13 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
uint64_t* const eof_read_size,
|
||||
int32_t* const index_read,
|
||||
double* const value_read)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
|
||||
|
||||
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
|
||||
|
||||
@ -515,6 +517,8 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file,
|
||||
eof_reachable = 1;
|
||||
// lower the value of count to reduce the number of elements which will be read
|
||||
count[0] -= max_offset - ddims[0];
|
||||
// modify the eof_read_size accordingly
|
||||
*eof_read_size = (uint64_t) (count[0] / 4UL);
|
||||
}
|
||||
|
||||
herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL);
|
||||
|
@ -520,16 +520,16 @@ trexio_text_read_$group$ (trexio_text_t* const file)
|
||||
}
|
||||
|
||||
/* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
|
||||
, BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
|
||||
,*/
|
||||
BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
|
||||
*/
|
||||
char* tmp_$group_dset$;
|
||||
tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);
|
||||
|
||||
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
|
||||
$group$->$group_dset$[i] = tmp_$group_dset$;
|
||||
/* conventional fcanf with "%s" only return the string before the first space character
|
||||
,* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
|
||||
,* Q: depending on what ? */
|
||||
* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
|
||||
* Q: depending on what ? */
|
||||
rc = fscanf(f, " %1023[^\n]", tmp_$group_dset$);
|
||||
assert(!(rc != 1));
|
||||
if (rc != 1) {
|
||||
@ -1003,21 +1003,29 @@ trexio_text_has_$group_str$ (trexio_t* const file)
|
||||
#+end_src
|
||||
** Template for has/read/write the dataset of sparse data
|
||||
|
||||
Each sparse array is stored in a separate =.txt= file due to the fact that sparse I/O has to be decoupled
|
||||
from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data
|
||||
to prevent memory overflow. Chunks have a given ~int64_t size~
|
||||
(size specifies the number of sparse data items, e.g. integrals).
|
||||
|
||||
User provides indices and values of the sparse array as two separate variables.
|
||||
|
||||
|
||||
#+begin_src c :tangle hrw_dset_sparse_text.h :exports none
|
||||
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file);
|
||||
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, uint64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
|
||||
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle write_dset_sparse_text.c
|
||||
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
const int32_t* index_sparse,
|
||||
const double* value_sparse)
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
const int32_t* index_sparse,
|
||||
const double* value_sparse)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
@ -1040,14 +1048,14 @@ trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
|
||||
/* Specify the line length in order to offset properly. For example, for 4-index quantities
|
||||
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
|
||||
CURRENTLY NO OFFSET IS USED WHEN WRITING !
|
||||
*/
|
||||
*/
|
||||
const int64_t line_length = $group_dset_sparse_line_length$L;
|
||||
|
||||
/* Get the starting position of the IO stream to be written in the .size file.
|
||||
This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
|
||||
One can use ftello function which is adapted for large files.
|
||||
For now, we can use front-end-provided size_max, which has been checked for INT64_MAX overflow.
|
||||
*/
|
||||
*/
|
||||
//int64_t io_start_pos = (int64_t) ftell(f);
|
||||
int64_t io_start_pos = size_max * line_length;
|
||||
|
||||
@ -1055,7 +1063,7 @@ trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
|
||||
|
||||
/* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
|
||||
int rc;
|
||||
for (uint64_t i=0L; i<size; ++i) {
|
||||
for (uint64_t i=0UL; i<size; ++i) {
|
||||
|
||||
rc = fprintf(f, "$group_dset_format_printf$\n",
|
||||
$group_dset_sparse_indices_printf$,
|
||||
@ -1100,17 +1108,19 @@ trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_text.c
|
||||
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
int32_t* const index_sparse,
|
||||
double* const value_sparse)
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
uint64_t* const eof_read_size,
|
||||
int32_t* const index_sparse,
|
||||
double* const value_sparse)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
|
||||
|
||||
/* Build the name of the file with sparse data.
|
||||
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
|
||||
,*/
|
||||
*/
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
@ -1127,7 +1137,7 @@ trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
|
||||
|
||||
/* Specify the line length in order to offset properly. For example, for 4-index quantities
|
||||
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
|
||||
,*/
|
||||
*/
|
||||
const uint64_t line_length = $group_dset_sparse_line_length$L;
|
||||
|
||||
fseek(f, (long) offset_file * line_length, SEEK_SET);
|
||||
@ -1135,13 +1145,15 @@ trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
|
||||
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
|
||||
int rc;
|
||||
char buffer[1024];
|
||||
for (uint64_t i=0L; i<size; ++i) {
|
||||
uint64_t count = 0UL;
|
||||
for (uint64_t i=0UL; i<size; ++i) {
|
||||
|
||||
memset(buffer,0,sizeof(buffer));
|
||||
|
||||
if(fgets(buffer, 1023, f) == NULL){
|
||||
|
||||
fclose(f);
|
||||
*eof_read_size = count;
|
||||
return TREXIO_END;
|
||||
|
||||
} else {
|
||||
@ -1149,11 +1161,11 @@ trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
|
||||
rc = sscanf(buffer, "$group_dset_format_scanf$",
|
||||
$group_dset_sparse_indices_scanf$,
|
||||
value_sparse + i);
|
||||
|
||||
if(rc <= 0) {
|
||||
fclose(f);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
count += 1UL;
|
||||
|
||||
}
|
||||
}
|
||||
@ -1163,7 +1175,6 @@ trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
|
||||
if(rc != 0) return TREXIO_FILE_ERROR;
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
@ -1175,7 +1186,7 @@ trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_
|
||||
|
||||
/* Build the name of the file with sparse data.
|
||||
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
|
||||
,*/
|
||||
*/
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
@ -1225,7 +1236,7 @@ trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
|
||||
|
||||
/* Build the name of the file with sparse data.
|
||||
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
|
||||
,*/
|
||||
*/
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
|
@ -195,6 +195,9 @@ subroutine test_read(file_name, back_end)
|
||||
double precision :: value_sparse_mo_2e_int_eri(20)
|
||||
integer(8) :: read_buf_size = 10
|
||||
integer(8) :: offset_read = 40
|
||||
integer(8) :: offset_data_read = 5
|
||||
integer(8) :: offset_eof = 97
|
||||
integer(8) :: offset_data_eof = 1
|
||||
integer(8) :: size_toread = 0
|
||||
|
||||
character*(128) :: str
|
||||
@ -271,10 +274,11 @@ subroutine test_read(file_name, back_end)
|
||||
|
||||
|
||||
rc = trexio_read_mo_2e_int_eri(trex_file, offset_read, read_buf_size, &
|
||||
index_sparse_mo_2e_int_eri(1,5+1), &
|
||||
value_sparse_mo_2e_int_eri(5+1))
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_read + 1), &
|
||||
value_sparse_mo_2e_int_eri(offset_data_read + 1))
|
||||
call trexio_assert(rc, TREXIO_SUCCESS)
|
||||
if (index_sparse_mo_2e_int_eri(1,1) == 0 .and. index_sparse_mo_2e_int_eri(1,5+1) == offset_read*4+1) then
|
||||
if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1) then
|
||||
write(*,*) 'SUCCESS READ SPARSE DATA'
|
||||
else
|
||||
print *, 'FAILURE SPARSE DATA CHECK'
|
||||
@ -282,6 +286,24 @@ subroutine test_read(file_name, back_end)
|
||||
endif
|
||||
|
||||
|
||||
! attempt to read reaching EOF: should return TREXIO_END and
|
||||
! NOT increment the existing values in the buffer (only upd with what has been read)
|
||||
rc = trexio_read_mo_2e_int_eri(trex_file, offset_eof, read_buf_size, &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1), &
|
||||
value_sparse_mo_2e_int_eri(offset_data_eof + 1))
|
||||
call trexio_assert(rc, TREXIO_END)
|
||||
!do i = 1,20
|
||||
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
|
||||
!enddo
|
||||
if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1) == offset_eof*4 + 1) then
|
||||
write(*,*) 'SUCCESS READ SPARSE DATA EOF'
|
||||
else
|
||||
print *, 'FAILURE SPARSE DATA EOF CHECK'
|
||||
call exit(-1)
|
||||
endif
|
||||
|
||||
rc = trexio_read_mo_2e_int_eri_size(trex_file, size_toread)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS)
|
||||
if (size_toread == 100) then
|
||||
|
Loading…
Reference in New Issue
Block a user