1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2025-01-03 10:06:01 +01:00

working HDF5 I/O for sparse indices

This commit is contained in:
q-posev 2021-12-09 16:13:28 +01:00
parent e774cb6852
commit e00f3b106e
3 changed files with 24 additions and 14 deletions

View File

@ -498,12 +498,26 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file,
// get the dataspace of the dataset // get the dataspace of the dataset
hid_t fspace_id = H5Dget_space(dset_id); hid_t fspace_id = H5Dget_space(dset_id);
// possible overflow HERE ? // TODO: check for possible overflow HERE ?
hsize_t offset[1] = {(hsize_t) offset_file*4}; hsize_t offset[1] = {(hsize_t) offset_file*4};
hsize_t count[1] = {(hsize_t) size*4}; hsize_t count[1] = {(hsize_t) size*4};
herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL); /* get dimensions of the dataset in the file to check whether reading with user-provided chunk size
will reach end of the dataset (i.e. EOF in TEXT back end)
,*/
hsize_t ddims[1] = {0};
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
hsize_t max_offset = offset[0] + count[0];
int eof_reachable = 0;
if (max_offset > ddims[0]) {
eof_reachable = 1;
// lower the value of count to reduce the number of elements which will be read
count[0] -= max_offset - ddims[0];
}
herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset, NULL, count, NULL);
hid_t memspace_id = H5Screate_simple(1, count, NULL); hid_t memspace_id = H5Screate_simple(1, count, NULL);
status = H5Dread(dset_id, H5T_NATIVE_INT32, memspace_id, fspace_id, H5P_DEFAULT, index_read); status = H5Dread(dset_id, H5T_NATIVE_INT32, memspace_id, fspace_id, H5P_DEFAULT, index_read);
@ -514,6 +528,8 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file,
assert (status >= 0); assert (status >= 0);
if (eof_reachable == 1) return TREXIO_END;
return TREXIO_SUCCESS; return TREXIO_SUCCESS;
} }
#+end_src #+end_src

View File

@ -122,7 +122,7 @@ static int test_read_dset_sparse (const char* file_name, const back_end_t backen
// specify the read parameters, here: // specify the read parameters, here:
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5 // 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
int64_t chunk_read = 30L; int64_t chunk_read = 10L;
int64_t offset_file_read = 40L; int64_t offset_file_read = 40L;
int offset_data_read = 5; int offset_data_read = 5;
@ -133,7 +133,6 @@ static int test_read_dset_sparse (const char* file_name, const back_end_t backen
assert(index_read[4*offset_data_read] == offset_file_read*4); assert(index_read[4*offset_data_read] == offset_file_read*4);
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max) // now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
/*
offset_file_read = 97L; offset_file_read = 97L;
offset_data_read = 1; offset_data_read = 1;
@ -142,11 +141,6 @@ static int test_read_dset_sparse (const char* file_name, const back_end_t backen
assert(rc == TREXIO_END); assert(rc == TREXIO_END);
assert(index_read[4*size_r-1] == 0); assert(index_read[4*size_r-1] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) offset_file_read); assert(index_read[4*offset_data_read] == 4 * (int32_t) offset_file_read);
*/
for (int i=0; i<size_r; i++){
printf("%d %d \n", index_read[4*i], index_read[4*i+1]);
}
// close current session // close current session
rc = trexio_close(file); rc = trexio_close(file);
@ -181,7 +175,6 @@ static int test_read_dset_sparse_size (const char* file_name, const back_end_t b
// read one chunk using the aforementioned parameters // read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri_size(file, &size_written); rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
assert(rc == TREXIO_SUCCESS); assert(rc == TREXIO_SUCCESS);
printf("%ld \n", size_written);
assert(size_written == size_check); assert(size_written == size_check);
// close current session // close current session
@ -208,10 +201,10 @@ int main(){
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, (int64_t) SIZE); test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, (int64_t) SIZE);
// check the second write attempt (SIZE elements written in N_CHUNKS chunks) // check the second write attempt (SIZE elements written in N_CHUNKS chunks)
//test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND); test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND);
//test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, (int64_t) SIZE*2); test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, (int64_t) SIZE*2);
//rc = system(RM_COMMAND); rc = system(RM_COMMAND);
assert (rc == 0); assert (rc == 0);
return 0; return 0;

View File

@ -66,7 +66,7 @@ subroutine test_write(file_name, back_end)
double precision :: value_sparse_mo_2e_int_eri(100) double precision :: value_sparse_mo_2e_int_eri(100)
integer :: i, n_buffers = 5 integer :: i, n_buffers = 5
integer(8) :: buf_size, offset = 0 integer(8) :: buf_size, offset
buf_size = 100/n_buffers buf_size = 100/n_buffers
do i = 1, 100 do i = 1, 100
@ -138,6 +138,7 @@ subroutine test_write(file_name, back_end)
rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index) rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX') call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX')
offset = 0
do i = 1, n_buffers do i = 1, n_buffers
rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, & rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, &
index_sparse_mo_2e_int_eri(1,offset+1), & index_sparse_mo_2e_int_eri(1,offset+1), &