From c1e11764007b3e8ce311339e7174bca74e34b1c8 Mon Sep 17 00:00:00 2001 From: q-posev Date: Wed, 24 Nov 2021 17:44:54 +0100 Subject: [PATCH] [WIP] text back end --- src/templates_front/templator_front.org | 107 +++--- src/templates_text/templator_text.org | 423 +++++++----------------- 2 files changed, 189 insertions(+), 341 deletions(-) diff --git a/src/templates_front/templator_front.org b/src/templates_front/templator_front.org index 3a81741..d6932bf 100644 --- a/src/templates_front/templator_front.org +++ b/src/templates_front/templator_front.org @@ -1628,7 +1628,6 @@ def has_$group_num$(trexio_file) -> bool: **** Function declarations - #+begin_src c :tangle hrw_dset_data_front.h :exports none trexio_exit_code trexio_has_$group_dset$(trexio_t* const file); trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, $group_dset_dtype_default$* const $group_dset$); @@ -1647,7 +1646,6 @@ trexio_exit_code trexio_write_safe_$group_dset$_64(trexio_t* const file, const $ **** Source code for double precision functions - #+begin_src c :tangle read_dset_data_64_front.c trexio_exit_code trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* const $group_dset$) @@ -2350,7 +2348,8 @@ def has_$group_dset$(trexio_file) -> bool: return False #+end_src -** Sparse data structures +** Templates for front end has/read/write a dataset of sparse data +*** Introduction Sparse data structures are used typically for large tensors such as two-electron integrals. For example, in the =trex.json= file sparse @@ -2358,12 +2357,11 @@ def has_$group_dset$(trexio_file) -> bool: #+begin_src python "ao_2e_int" : { - "eri_num" : [ "int", [ ] ] "eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ] } #+end_src - The electron repulsion integral $\langle ij | kl \rangle$ is + The electron repulsion integral (eri) $\langle ij | kl \rangle$ is represented as a quartet of integers $(i,j,k,l)$ and a floating point value. @@ -2384,85 +2382,118 @@ def has_$group_dset$(trexio_file) -> bool: As the number of integrals to store can be prohibitively large, we provide the possibility to read/write the integrals in chunks. So the functions take two extra parameters: - - ~offset~ : the index of the 1st integral we want to read. An - offset of zero implies to read the first integral. - - ~num~ : the number of integrals to read. + + - ~offset~ : how many integrals in the file should be skipped when reading. + An offset of zero implies to read the first integral. + - ~size~ : the number of integrals to read. We provide a function to read a chunk of indices, and a function to read a chunk of values, because some users might want to read only the values of the integrals, or only the indices. - Here is an example for the indices: +*** C templates for front end +**** Function declarations - #+BEGIN_SRC c + #+begin_src c :tangle hrw_sparse_front.h :exports none +trexio_exit_code trexio_has_$group_sparse_dset$(trexio_t* const file); +trexio_exit_code trexio_read_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t offset_dset, const int64_t size, int32_t* const index_sparse, double* const value_sparse); +trexio_exit_code trexio_write_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t offset_dset, const int64_t size, const int32_t* index_sparse, const double* value_sparse); +//trexio_exit_code trexio_read_$group_sparse_dset$_value(trexio_t* const file, const uint64_t offset, const uint_64_t size, int32_t* const value_sparse); +//trexio_exit_code trexio_write_$group_sparse_dset$_value(trexio_t* const file, const uint64_t offset, const uint_64_t size, double* const value_sparse); + #+end_src + +**** Source code for default functions + + #+begin_src c :tangle read_sparse_front.c trexio_exit_code -trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file, - const int64_t offset, - const int64_t num, - int32_t* buffer) +trexio_read_$group_sparse_dset$(trexio_t* const file, + const int64_t offset_file, + const int64_t offset_dset, + const int64_t size, + int32_t* const index_sparse, + double* const value_sparse + ) { if (file == NULL) return TREXIO_INVALID_ARG_1; - if (offset < 0L) return TREXIO_INVALID_ARG_2; - if (num < 0L) return TREXIO_INVALID_ARG_3; + if (offset_file < 0L) return TREXIO_INVALID_ARG_2; + if (offset_dset < 0L) return TREXIO_INVALID_ARG_3; + if (size <= 0L) return TREXIO_INVALID_ARG_4; + if (index_sparse == NULL) return TREXIO_INVALID_ARG_5; + if (value_sparse == NULL) return TREXIO_INVALID_ARG_6; - const uint32_t rank = 4; // To be set by generator : number of indices + const uint32_t rank = $group_sparse_dset_rank$; // To be set by generator : number of indices - int64_t nmax; // Max number of integrals + int64_t size_max; // Max number of integrals (already in the file) trexio_exit_code rc; - rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + rc = trexio_read_$group_sparse_dset$_num(file, &size_max); if (rc != TREXIO_SUCCESS) return rc; switch (file->back_end) { case TREXIO_TEXT: - return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + return trexio_text_read_$group_sparse_dset$(file, offset_file, offset_dset, size, size_max, rank, index_sparse, value_sparse); break; case TREXIO_HDF5: - return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax); + return trexio_hdf5_read_$group_sparse_dset$(file, offset_file, offset_dset, size, size_max, rank, index_sparse, value_sparse); break; - +/* + case TREXIO_JSON: + return trexio_json_read_$group_sparse_dset$(...); + break; +*/ default: return TREXIO_FAILURE; /* Impossible case */ } } - #+END_SRC + #+end_src - For the values, - - #+BEGIN_SRC c + #+begin_src c :tangle write_sparse_front.c trexio_exit_code -trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file, - const int64_t offset, - const int64_t num, - double* buffer) +trexio_write_$group_sparse_dset$(trexio_t* const file, + const int64_t offset_file, + const int64_t offset_dset, + const int64_t size, + const int32_t* index_sparse, + const double* value_sparse + ) { if (file == NULL) return TREXIO_INVALID_ARG_1; - if (offset < 0L) return TREXIO_INVALID_ARG_2; - if (num < 0L) return TREXIO_INVALID_ARG_3; + if (offset_file < 0L) return TREXIO_INVALID_ARG_2; + if (offset_dset < 0L) return TREXIO_INVALID_ARG_3; + if (size <= 0L) return TREXIO_INVALID_ARG_4; + if (index_sparse == NULL) return TREXIO_INVALID_ARG_5; + if (value_sparse == NULL) return TREXIO_INVALID_ARG_6; - int64_t nmax; // Max number of integrals + const uint32_t rank = $group_sparse_dset_rank$; // To be set by generator : number of indices + + int64_t size_max; // Max number of integrals (already in the file) trexio_exit_code rc; - rc = trexio_read_ao_2e_int_eri_num(const file, &nmax); + rc = trexio_read_$group_sparse_dset$_num(file, &size_max); if (rc != TREXIO_SUCCESS) return rc; switch (file->back_end) { case TREXIO_TEXT: - return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax); + return trexio_text_write_$group_sparse_dset$(file, offset_file, offset_dset, size, size_max, rank, index_sparse, value_sparse); break; case TREXIO_HDF5: - return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax); + return trexio_hdf5_write_$group_sparse_dset$(file, offset_file, offset_dset, size, size_max, rank, index_sparse, value_sparse); break; - +/* + case TREXIO_JSON: + return trexio_json_write_$group_sparse_dset$(...); + break; +*/ default: return TREXIO_FAILURE; /* Impossible case */ } } - #+END_SRC + #+end_src + ** Templates for front end has/read/write a dataset of strings *** Introduction diff --git a/src/templates_text/templator_text.org b/src/templates_text/templator_text.org index f82c3be..c9d8204 100644 --- a/src/templates_text/templator_text.org +++ b/src/templates_text/templator_text.org @@ -93,22 +93,10 @@ typedef struct $group$_s { ** Template for general structure in text back end - #+begin_src c :tangle struct_text_group.h -typedef struct rdm_s { - uint64_t dim_one_e; - uint32_t to_flush; - uint32_t padding; - double* one_e; - char file_name[TREXIO_MAX_FILENAME_LENGTH]; - char two_e_file_name[TREXIO_MAX_FILENAME_LENGTH]; -} rdm_t; - #+end_src - #+begin_src c :tangle struct_text_group.h typedef struct trexio_text_s { trexio_t parent ; $group$_t* $group$; - rdm_t* rdm; int lock_file; } trexio_text_t; #+end_src @@ -269,9 +257,6 @@ trexio_text_deinit (trexio_t* const file) /* Error handling for this call is added by the generator */ rc = trexio_text_free_$group$( (trexio_text_t*) file); - rc = trexio_text_free_rdm( (trexio_text_t*) file); - if (rc != TREXIO_SUCCESS) return rc; - return TREXIO_SUCCESS; } @@ -1016,324 +1001,156 @@ trexio_text_has_$group_str$ (trexio_t* const file) } #+end_src -** RDM struct (hard-coded) -*** Read the complete struct +** Template for has/read/write the dataset of sparse data - #+begin_src c :tangle rdm_text.h -rdm_t* trexio_text_read_rdm(trexio_text_t* const file); - #+end_src - - #+begin_src c :tangle rdm_text.c -rdm_t* trexio_text_read_rdm(trexio_text_t* const file) { - if (file == NULL) return NULL; - - if (file->rdm != NULL) return file->rdm; - - /* Allocate the data structure */ - rdm_t* rdm = MALLOC(rdm_t); - assert (rdm != NULL); - - rdm->one_e = NULL; - rdm->two_e_file_name[0] = '\0'; - rdm->to_flush = 0; - - /* Try to open the file. If the file does not exist, return */ - const char* rdm_file_name = "/rdm.txt"; - - strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH); - - strncat (rdm->file_name, rdm_file_name, - TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name)); - - if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') { - FREE(rdm); - return NULL; - } - /* If the file exists, read it */ - FILE* f = fopen(rdm->file_name,"r"); - if (f != NULL) { - - /* Find size of file to allocate the max size of the string buffer */ - fseek(f, 0L, SEEK_END); - size_t sz = ftell(f); - fseek(f, 0L, SEEK_SET); - sz = (sz < 1024) ? (1024) : (sz); - char* buffer = CALLOC(sz, char); - - /* Read the dimensioning variables */ - int rc; - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - assert (strcmp(buffer, "dim_one_e") == 0); - - rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e)); - assert (rc == 1); - - /* Allocate arrays */ - rdm->one_e = CALLOC(rdm->dim_one_e, double); - assert (rdm->one_e != NULL); - - /* Read one_e */ - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - assert (strcmp(buffer, "one_e") == 0); - - for (uint64_t i=0 ; idim_one_e; ++i) { - rc = fscanf(f, "%lf", &(rdm->one_e[i])); - assert (rc == 1); - } - - /* Read two_e */ - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - assert (strcmp(buffer, "two_e_file_name") == 0); - - rc = fscanf(f, "%1023s", buffer); - assert (rc == 1); - strncpy(rdm->two_e_file_name, buffer, 1024); - if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') { - FREE(buffer); - FREE(rdm->one_e); - FREE(rdm); - fclose(f); - return NULL; - } - - FREE(buffer); - fclose(f); - f = NULL; - } - file->rdm = rdm ; - return rdm; -} + #+begin_src c :tangle hrw_sparse_text.h :exports none +trexio_exit_code trexio_text_has_$group_sparse_dset$(trexio_t* const file); +trexio_exit_code trexio_text_read_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t offset_dset, const int_64_t size, const int64_t size_max, int32_t* const index_sparse, double* const value_sparse); +trexio_exit_code trexio_text_write_$group_sparse_dset$(trexio_t* const file, const int64_t offset_file, const int64_t offset_dset, const int_64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse); #+end_src -*** Flush the complete struct + #+begin_src c :tangle write_sparse_text.c +trexio_exit_code trexio_text_write_$group_sparse_dset$(trexio_t* const file, + const int64_t offset_file, + const int64_t offset_dset, + const int64_t size, + const int64_t size_max, + const int32_t* index_sparse, + const double* value_sparse) +{ + if (file == NULL) return TREXIO_FILE_ERROR; - #+begin_src c :tangle rdm_text.h -trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file); - #+end_src + /* Build the name of the file with sparse data*/ + const char* $group_sparse_dset$_file_name = "/$group_sparse_dset$.txt"; + const char file_abs_path[TREXIO_MAX_FILENAME_LENGTH]; - #+begin_src c :tangle rdm_text.c -trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) { - if (file == NULL) return TREXIO_INVALID_ARG_1; + strncpy (file_abs_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + strncat (file_abs_path, $group_sparse_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_sparse_dset$_file_name)); - if (file->parent.mode == 'r') return TREXIO_READONLY; - rdm_t* const rdm = file->rdm; - if (rdm == NULL) return TREXIO_SUCCESS; - - if (rdm->to_flush == 0) return TREXIO_SUCCESS; - - FILE* f = fopen(rdm->file_name,"w"); - assert (f != NULL); - - /* Write the dimensioning variables */ - fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e); - - /* Write arrays */ - fprintf(f, "one_e\n"); - for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) { - fprintf(f, "%lf\n", rdm->one_e[i]); + FILE* f = fopen(file_abs_path, "a"); + //TODO ERROR HANDLING + assert(f != NULL); + + // read the currently written number of elements + // line_length is 69 because + // 10 per index (40 in total) + 4 spaces + 24 for floating point value + 1 for newline char + // in general: 10*n_indices + n_indices + 24 + 1 + const uint64_t line_length = $group_sparse_dset_line_length$L; + + fseek(f, (long) offset_file * line_length, SEEK_SET); + + for (uint64_t i=0L+offset_data ; i 0); } - - fprintf(f, "two_e_file_name\n"); - fprintf(f, "%s\n", rdm->two_e_file_name); - +/* + int rc = fprintf(f, "%10d %10d %10d %10d %24.16e\n", + index[4*i], + index[4*i+1], + index[4*i+2], + index[4*i+3], + value[i]); +*/ + fclose(f); - rdm->to_flush = 0; return TREXIO_SUCCESS; } #+end_src -*** Free memory + + #+begin_src c :tangle read_sparse_text.c +trexio_exit_code trexio_text_read_$group_sparse_dset$(trexio_t* const file, + const int64_t offset_file, + const int64_t offset_dset, + const int64_t size, + const int64_t size_max, + int32_t* const index_sparse, + double* const value_sparse) +{ + if (file == NULL) return TREXIO_FILE_ERROR; + + /* Build the name of the file with sparse data*/ + const char* $group_sparse_dset$_file_name = "/$group_sparse_dset$.txt"; + const char file_abs_path[TREXIO_MAX_FILENAME_LENGTH]; - Memory is allocated when reading. The followig function frees memory. + strncpy (file_abs_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + strncat (file_abs_path, $group_sparse_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_sparse_dset$_file_name)); - #+begin_src c :tangle rdm_text.h -trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file); - #+end_src - #+begin_src c :tangle rdm_text.c -trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) { - if (file == NULL) return TREXIO_INVALID_ARG_1; + FILE* f = fopen(file_abs_path, "a"); + //TODO ERROR HANDLING + assert(f != NULL); + + // read the currently written number of elements + // line_length is 69 because + // 10 per index (40 in total) + 4 spaces + 24 for floating point value + 1 for newline char + // in general: 10*n_indices + n_indices + 24 + 1 + const uint64_t line_length = $group_sparse_dset_line_length$L; + + fseek(f, (long) offset_file * line_length, SEEK_SET); + + for (uint64_t i=0L+offset_data ; iparent.mode != 'r') { - trexio_exit_code rc = trexio_text_flush_rdm(file); - if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE; +// TODO: find a way to indicate the number of elements being read (useful?) + if (rc == EOF){ + return TREXIO_END; + } else { + assert(rc > 0); + } } - - rdm_t* const rdm = file->rdm; - if (rdm == NULL) return TREXIO_SUCCESS; - - if (rdm->one_e != NULL) { - FREE (rdm->one_e); - } - - free (rdm); - file->rdm = NULL; +/* + int rc = fscanf(f, "%d %d %d %d %lf", + &index[4*i], + &index[4*i+1], + &index[4*i+2],N OPEN + &index[4*i+3], + &value[i]); +,*/ + + fclose(f); return TREXIO_SUCCESS; + } - #+end_src - -*** Read/Write the one_e attribute - - The ~one_e~ array is assumed allocated with the appropriate size. - - #+begin_src c :tangle rdm_text.h -trexio_exit_code -trexio_text_read_rdm_one_e(trexio_t* const file, - double* const one_e, - const uint64_t dim_one_e); - -trexio_exit_code -trexio_text_write_rdm_one_e(trexio_t* const file, - const double* one_e, - const uint64_t dim_one_e); #+end_src - #+begin_src c :tangle rdm_text.c -trexio_exit_code -trexio_text_read_rdm_one_e(trexio_t* const file, - double* const one_e, - const uint64_t dim_one_e) + #+begin_src c :tangle has_sparse_text.c +trexio_exit_code trexio_text_has_$group_sparse_dset$(trexio_t* const file) { - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (one_e == NULL) return TREXIO_INVALID_ARG_2; + if (file == NULL) return TREXIO_FILE_ERROR; + + /* Build the name of the file with sparse data*/ + const char* $group_sparse_dset$_file_name = "/$group_sparse_dset$.txt"; + const char file_abs_path[TREXIO_MAX_FILENAME_LENGTH]; - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; + strncpy (file_abs_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH); + strncat (file_abs_path, $group_sparse_dset$_file_name, + TREXIO_MAX_FILENAME_LENGTH-strlen($group_sparse_dset$_file_name)); - if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3; + /*struct stat buffer; + int rc = stat(file_abs_path, &buffer) + if (rc == 0) { + return TREXIO_SUCCESS; + } else { + return TREXIO_HAS_NOT; + }*/ - for (uint64_t i=0 ; ione_e[i]; + int fd = open(file_abs_path, O_CREAT | O_EXCL | O_RDONLY); + if (fd < 0) { + if(errno == EEXIST) return TREXIO_SUCCESS; + } else { + return TREXIO_HAS_NOT; } - - return TREXIO_SUCCESS; } - - -trexio_exit_code -trexio_text_write_rdm_one_e(trexio_t* const file, - const double* one_e, - const uint64_t dim_one_e) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (one_e == NULL) return TREXIO_INVALID_ARG_2; - if (file->mode != 'r') return TREXIO_READONLY; - - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - rdm->dim_one_e = dim_one_e; - for (uint64_t i=0 ; ione_e[i] = one_e[i]; - } - - rdm->to_flush = 1; - return TREXIO_SUCCESS; -} - #+end_src - -*** Read/Write the two_e attribute - - ~two_e~ is a sparse data structure, which can be too large to fit - in memory. So we provide functions to read and write it by - chunks. - In the text back end, the easiest way to do it is to create a - file for each sparse float structure. - - #+begin_src c :tangle rdm_text.h -trexio_exit_code -trexio_text_buffered_read_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - int64_t* const index, - double* const value); - -trexio_exit_code -trexio_text_buffered_write_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - const int64_t* index, - const double* value); #+end_src - #+begin_src c :tangle rdm_text.c -trexio_exit_code -trexio_text_buffered_read_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - int64_t* const index, - double* const value) -{ - if (file == NULL) return TREXIO_INVALID_ARG_1; - if (index == NULL) return TREXIO_INVALID_ARG_4; - if (value == NULL) return TREXIO_INVALID_ARG_5; - - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - FILE* f = fopen(rdm->two_e_file_name, "r"); - if (f == NULL) return TREXIO_END; - - const uint64_t line_length = 64L; - fseek(f, (long) offset * line_length, SEEK_SET); - - for (uint64_t i=0 ; imode != 'r') return TREXIO_READONLY; - - rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file); - if (rdm == NULL) return TREXIO_FAILURE; - - FILE* f = fopen(rdm->two_e_file_name, "w"); - if (f == NULL) return TREXIO_FAILURE; - - const uint64_t line_length = 64L; - fseek(f, (long) offset * line_length, SEEK_SET); - - for (uint64_t i=0 ; i