1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-11-03 20:54:07 +01:00

[WIP] text back end

This commit is contained in:
q-posev 2022-04-11 09:26:54 +02:00
parent e5d6104a63
commit a2fee3164b
6 changed files with 229 additions and 112 deletions

View File

@ -112,6 +112,7 @@ TESTS_C = \
tests/io_dset_float_text \
tests/io_dset_int_text \
tests/io_dset_sparse_text \
tests/io_determinant_text \
tests/io_safe_dset_float_text \
tests/io_str_text \
tests/io_dset_str_text \

View File

@ -39,6 +39,9 @@ echo "" >> trexio_f.f90
cat populated/pop_*.c >> trexio.c
cat populated/pop_*.h >> trexio.h
cat hrw_determinant_front.h >> trexio.h
cat *_determinant_front.c >> trexio.c
# fortran front end
cat populated/pop_*.f90 >> trexio_f.f90
# add helper functions

View File

@ -2749,7 +2749,7 @@ trexio_read_$group_dset$(trexio_t* const file,
if (rc != TREXIO_SUCCESS) return rc;
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
int64_t eof_read_size = 0UL;
int64_t eof_read_size = 0L;
switch (file->back_end) {
@ -4128,18 +4128,16 @@ def delete_$group$(trexio_file) -> None:
#+begin_src c :tangle hrw_determinant_front.h :exports none
trexio_exit_code trexio_has_determinant_list(trexio_t* const file);
trexio_exit_code trexio_read_determinant_list(trexio_t* const file, int64_t* const dset);
trexio_exit_code trexio_write_determinant_list(trexio_t* const file, const int64_t* dset);
trexio_exit_code trexio_has_determinant_coefficient(trexio_t* const file);
trexio_exit_code trexio_read_determinant_coefficient(trexio_t* const file, double* const dset);
trexio_exit_code trexio_write_determinant_coefficient(trexio_t* const file, const double* dset);
trexio_exit_code trexio_read_determinant_list(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, int64_t* const dset);
trexio_exit_code trexio_write_determinant_list(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int64_t* dset);
#+end_src
**** Source code for default functions
#+begin_src c :tangle read_determinant_front.c
trexio_exit_code
trexio_read_determinant_list (trexio_t* const file, int64_t* const dset)
trexio_read_determinant_list (trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, int64_t* const dset)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
@ -4147,116 +4145,92 @@ trexio_read_determinant_list (trexio_t* const file, int64_t* const dset)
if (trexio_has_determinant_list(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
trexio_exit_code rc;
int64_t det_dim = 0L;
rc = trexio_read_determinant_num_64(file, &det_dim);
/* Read the number of mos */
int64_t mo_num = 0L;
rc = trexio_read_mo_num_64(file, &mo_num);
if (rc != TREXIO_SUCCESS) return rc;
if (det_dim == 0L) return TREXIO_INVALID_NUM;
if (mo_num == 0L) return TREXIO_INVALID_NUM;
/* Compute how many integer numbers is needed to represent a determinant */
uint32_t int_num = 0;
int_num = ((mo_num - 1)/64) + 1;
uint32_t rank = 2;
uint64_t dims[2] = {det_dim, 2};
uint64_t det_size = (uint64_t) (*buffer_size);
uint64_t dims[2] = {det_size, int_num*2UL};
assert(file->back_end < TREXIO_INVALID_BACK_END);
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
int64_t eof_read_size = 0L;
rc = TREXIO_FAILURE;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_read_determinant_list(file, dset, rank, dims);
rc = trexio_text_read_determinant_list(file, offset_file, rank, dims, &eof_read_size, dset);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_read_determinant_list(file, dset, rank, dims);
rc = -1; //trexio_hdf5_read_determinant_list(file, offset_file, rank, dims, &eof_read_size, dset);
break;
#else
return TREXIO_BACK_END_MISSING;
rc = TREXIO_BACK_END_MISSING;
break;
#endif
/*
case TREXIO_JSON:
return trexio_json_read_
return trexio_json_read_$group_dset$(...);
break;
,*/
default:
rc = TREXIO_FAILURE; /* Impossible case */
break;
}
if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc;
if (rc == TREXIO_END) *buffer_size = eof_read_size;
return rc;
}
trexio_exit_code
trexio_read_determinant_coefficient (trexio_t* const file, double* const dset)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
if (trexio_has_determinant_coefficient(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
trexio_exit_code rc;
int64_t det_dim = 0L;
rc = trexio_read_determinant_num_64(file, &det_dim);
if (rc != TREXIO_SUCCESS) return rc;
if (det_dim == 0L) return TREXIO_INVALID_NUM;
uint32_t rank = 1;
uint64_t dims[1] = {det_dim};
assert(file->back_end < TREXIO_INVALID_BACK_END);
rc = TREXIO_FAILURE;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_read_determinant_coefficient(file, dset, rank, dims);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_read_determinant_coefficient(file, dset, rank, dims);
break;
#else
return TREXIO_BACK_END_MISSING;
break;
#endif
/*
case TREXIO_JSON:
return trexio_json_read_
break;
,*/
}
}
#+end_src
#+begin_src c :tangle write_determinant_front.c
trexio_exit_code
trexio_write_determinant_list (trexio_t* const file, const int64_t* dset)
trexio_write_determinant_list (trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int64_t* dset)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
trexio_exit_code rc;
int64_t det_dim = 0L;
rc = trexio_read_determinant_num_64(file, &det_dim);
/* Read the number of mos */
int64_t mo_num = 0L;
rc = trexio_read_mo_num_64(file, &mo_num);
if (rc != TREXIO_SUCCESS) return rc;
if (det_dim == 0L) return TREXIO_INVALID_NUM;
if (mo_num == 0L) return TREXIO_INVALID_NUM;
/* Compute how many integer numbers is needed to represent a determinant */
uint32_t int_num = 0;
int_num = ((mo_num - 1)/64) + 1;
uint32_t rank = 2;
uint64_t dims[2] = {det_dim, 2};
uint64_t dims[2] = {buffer_size, int_num*2UL};
assert(file->back_end < TREXIO_INVALID_BACK_END);
rc = TREXIO_FAILURE;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_write_determinant_list(file, dset, rank, dims);
return trexio_text_write_determinant_list(file, offset_file, rank, dims, dset);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_write_determinant_list(file, dset, rank, dims);
return -1; //trexio_hdf5_write_determinant_list(file, dset, rank, dims);
break;
#else
return TREXIO_BACK_END_MISSING;
@ -4271,47 +4245,6 @@ trexio_write_determinant_list (trexio_t* const file, const int64_t* dset)
}
trexio_exit_code
trexio_write_determinant_coefficient (trexio_t* const file, const double* dset)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
trexio_exit_code rc;
int64_t det_dim = 0L;
rc = trexio_read_determinant_num_64(file, &det_dim);
if (rc != TREXIO_SUCCESS) return rc;
if (det_dim == 0L) return TREXIO_INVALID_NUM;
uint32_t rank = 1;
uint64_t dims[1] = {det_dim};
assert(file->back_end < TREXIO_INVALID_BACK_END);
rc = TREXIO_FAILURE;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_write_determinant_coefficient(file, dset, rank, dims);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_write_determinant_coefficient(file, dset, rank, dims);
break;
#else
return TREXIO_BACK_END_MISSING;
break;
#endif
/*
case TREXIO_JSON:
return trexio_json_read_
break;
,*/
}
}
#+end_src
#+begin_src c :tangle has_determinant_front.c
@ -4326,11 +4259,11 @@ trexio_has_determinant_list (trexio_t* const file)
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_has_determinant_list(file)
return trexio_text_has_determinant_list(file);
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_has_determinant_list(file);
return -1; //trexio_hdf5_has_determinant_list(file);
#else
return TREXIO_BACK_END_MISSING;
#endif
@ -4355,11 +4288,11 @@ trexio_has_determinant_coefficient (trexio_t* const file)
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_has_determinant_coefficient(file)
return -1; //trexio_text_has_determinant_coefficient(file);
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_has_determinant_coefficient(file);
return -1; //trexio_hdf5_has_determinant_coefficient(file);
#else
return TREXIO_BACK_END_MISSING;
#endif

View File

@ -10,6 +10,9 @@ cat populated/pop_struct_text_group_dset.h >> trexio_text.h
cat populated/pop_struct_text_group.h >> trexio_text.h
cat basic_text.h >> trexio_text.h
cat hrw_determinant_text.h >> trexio_text.h
cat *_determinant_text.c >> trexio_text.c
cat populated/pop_free_group_text.c >> trexio_text.c
cat populated/pop_read_group_text.c >> trexio_text.c
cat populated/pop_flush_group_text.c >> trexio_text.c

View File

@ -1313,6 +1313,182 @@ trexio_text_delete_$group$ (trexio_t* const file)
}
#+end_src
** Source code for the determinant part
Each array is stored in a separate =.txt= file due to the fact that determinant I/O has to be decoupled
from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write the data
to prevent memory overflow. Chunks have a given ~int64_t size~.
Size specifies the number of data items, e.g. determinants.
#+begin_src c :tangle hrw_determinant_text.h :exports none
trexio_exit_code trexio_text_has_determinant_list(trexio_t* const file);
trexio_exit_code trexio_text_read_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, int64_t* const eof_read_size, int64_t* const list);
trexio_exit_code trexio_text_write_determinant_list(trexio_t* const file, const int64_t offset_file, const uint32_t rank, const uint64_t* dims, const int64_t* list);
#+end_src
#+begin_src c :tangle read_determinant_text.c
trexio_exit_code trexio_text_read_determinant_list(
trexio_t* const file,
const int64_t offset_file,
const uint32_t rank,
const uint64_t* dims,
int64_t* const eof_read_size,
int64_t* const list)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
const char determinant_list_file_name[256] = "/determinant_list.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, determinant_list_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen(determinant_list_file_name));
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "r");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Specify the line length in order to offset properly.
Each 64-bit integer takes at most 10 slots and requires one space,
we have int_num integers per up-spin determinant,
then this number is doubled because we have the same number for down-spin electrons,
and then one newline char.
,*/
uint64_t line_length = 6UL*10UL + 6UL; // + 6UL + 1UL; // dims[1]*11UL + 1UL;
/* Offset in the file according to the provided value of offset_file and optimal line_length */
fseek(f, (long) offset_file * line_length, SEEK_SET);
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
int rc;
//char buffer[16];
char* buffer = CALLOC(line_length+1, char);
uint64_t count = 0UL;
int shift = 0;
uint64_t accum = 0UL;
for (uint64_t i=0UL; i < (uint64_t) dims[0]; ++i) {
accum = 0UL;
memset(buffer, 0, line_length+1); // sizeof(buffer)); //1024);
if(fgets(buffer, line_length, f) == NULL){
fclose(f);
FREE(buffer);
*eof_read_size = count;
return TREXIO_END;
} else {
/* The format string is not anymore static but rather dynamic (the number of ints depend on the mo_num)
Thus, we parse the buffer string int_num*2 times to get the bit field determinants.
*/
for (int32_t j=0; j<dims[1]; ++j) {
rc = sscanf(buffer, "%10" SCNd64 "%n", list + i + j, &shift);
if(rc <= 0) {
fclose(f);
FREE(buffer);
return TREXIO_FAILURE;
}
buffer += shift;
accum += shift;
}
count += 1UL;
buffer -= accum;
}
}
FREE(buffer);
/* Close the TXT file */
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle write_determinant_text.c
trexio_exit_code trexio_text_write_determinant_list(trexio_t* const file,
const int64_t offset_file,
const uint32_t rank,
const uint64_t* dims,
const int64_t* list)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
const char determinant_list_file_name[256] = "/determinant_list.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, determinant_list_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen(determinant_list_file_name));
/* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "a");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
int rc;
for (uint64_t i=0UL; i < dims[0]; ++i) {
/* The loop below is needed to write a line with int bit fields for alpha and beta electrons */
for (uint32_t j=0; j < (uint32_t) dims[1]; ++j) {
rc = fprintf(f, "%10" PRId64 " ", *(list + i + j));
if(rc <= 0) {
fclose(f);
return TREXIO_FAILURE;
}
}
fprintf(f, "%s", "\n");
}
/* Close the TXT file */
rc = fclose(f);
if (rc != 0) return TREXIO_FILE_ERROR;
/* Exit upon success */
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle has_determinant_text.c
trexio_exit_code trexio_text_has_determinant_list(trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
const char determinant_list_file_name[256] = "/determinant_list.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, determinant_list_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen(determinant_list_file_name));
/* Check the return code of access function to determine whether the file with data exists or not */
if (access(file_full_path, F_OK) == 0){
return TREXIO_SUCCESS;
} else {
return TREXIO_HAS_NOT;
}
}
#+end_src
* Constant file suffixes (not used by the generator) :noexport:
#+begin_src c :tangle suffix_text.h

View File

@ -4,6 +4,7 @@ from generator_tools import *
# --------------------- GET CONFIGURATION FROM THE TREX.JSON ---------------- #
config_file = 'trex.json'
trex_config = read_json(config_file)
trex_config.pop('determinant')
# --------------------------------------------------------------------------- #
# -------------------- GET ATTRIBUTES FROM THE CONFIGURATION ---------------- #