1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-11-04 21:24:08 +01:00

Merge pull request #70 from TREX-CoE/add-sparse-datasets

- Add sparse datasets
- Modularize generator_tools.py
This commit is contained in:
Evgeny Posenitskiy 2021-12-17 17:13:22 +01:00 committed by GitHub
commit 8ca74ffef1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 2349 additions and 783 deletions

4
.gitignore vendored
View File

@ -11,6 +11,8 @@ m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
autom4te.cache/
build-config/
ar-lib
compile
config.guess
config.log
@ -38,5 +40,3 @@ test-suite.log
*.h5
trexio-*.tar.gz
trex.json

View File

@ -90,6 +90,7 @@ TESTS_C = \
tests/io_num_text \
tests/io_dset_float_text \
tests/io_dset_int_text \
tests/io_dset_sparse_text \
tests/io_safe_dset_float_text \
tests/io_str_text \
tests/io_dset_str_text \
@ -102,6 +103,7 @@ TESTS_C += \
tests/io_num_hdf5 \
tests/io_dset_float_hdf5 \
tests/io_dset_int_hdf5 \
tests/io_dset_sparse_hdf5 \
tests/io_safe_dset_float_hdf5 \
tests/io_str_hdf5 \
tests/io_dset_str_hdf5 \
@ -117,8 +119,8 @@ check_PROGRAMS = $(TESTS)
# specify common LDADD options for all tests
LDADD = src/libtrexio.la
test_trexio_f = $(srcdir)/tests/trexio_f.f90
CLEANFILES += $(test_trexio_f)
$(test_trexio_f): $(trexio_f)
cp $(trexio_f) $(test_trexio_f)
@ -126,7 +128,6 @@ $(test_trexio_f): $(trexio_f)
trexio.mod: tests/trexio_f.o
tests_test_f_SOURCES = $(test_trexio_f) tests/test_f.f90
tests_test_f_LDFLAGS = -no-install
clean-local:
-rm -rf -- *.dir/ *.h5 __pycache__/
@ -134,7 +135,7 @@ clean-local:
# =============== DOCUMENTATION =============== #
HTML_TANGLED = docs/index.html \
docs/Sparse.html \
docs/examples.html \
docs/templator_hdf5.html \
docs/trex.html \
docs/README.html \
@ -179,11 +180,13 @@ BUILT_SOURCES += $(SOURCES) $(trexio_f) $(test_trexio_f)
all: .git_hash
GENERATOR_FILES = $(srcdir)/tools/generator.py \
$(srcdir)/tools/generator_tools.py
$(SOURCES): $(trexio_f)
src/trexio.c: $(trexio_h)
$(trexio_f): $(ORG_FILES)
$(trexio_f): $(ORG_FILES) $(GENERATOR_FILES)
cd $(srcdir)/tools && ./build_trexio.sh
$(htmlizer): $(ORG_FILES) $(srcdir)/src/README.org
@ -227,7 +230,7 @@ $(pytrexio_py): $(pytrexio_c)
# Build Python module and C wrapper code for TREXIO using SWIG
# [?] swig -python -threads pytrexio.i ----> Add thread support for all the interface
$(pytrexio_c): $(ORG_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
$(pytrexio_c): $(ORG_FILES) $(GENERATOR_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
cp $(trexio_h) src/
cd src/ && \
$(SWIG) -python -py3 -o pytrexio_wrap.c pytrexio.i
@ -248,4 +251,3 @@ CLEANFILES += $(pytrexio_c) \
.PHONY: cppcheck python-test python-install python-sdist check-numpy FORCE
endif

View File

@ -1,22 +0,0 @@
See templator_front.org
* Text back end
As the size of the dataset should be extensible, the simplest
solution is to use one file for each sparse data set, and store a
the name of this file in the group.
Each integral can be a line in the file:
i j k l x
which can be read with "%10ld %10ld %10ld %10ld %24.16e".
The offset can be used with ~fseek(69L*offset, SEEK_SET)~
* HDF5 Back end
We need to declare the number of rows of the dataset as
~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
chunks should absolutely not be larger than 1MB.
To extend the storage, see :
https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
(figure 17)
If the offset+num > nmax, we need to extend the dataset.

262
examples.org Normal file
View File

@ -0,0 +1,262 @@
#+TITLE: Examples
#+STARTUP: latexpreview
#+SETUPFILE: docs/theme.setup
* Accessing sparse quantities
** Fortran
:PROPERTIES:
:header-args: :tangle print_energy.f90
:END:
#+begin_src f90
program print_energy
use trexio
implicit none
character*(128) :: filename ! Name of the input file
integer :: rc ! Return code for error checking
integer(8) :: f ! TREXIO file handle
character*(128) :: err_msg ! Error message
#+end_src
This program computes the energy as:
\[
E = E_{\text{NN}} + \sum_{ij} D_{ij}\, \langle i | h | j \rangle\,
+\, \frac{1}{2} \sum_{ijkl} \Gamma_{ijkl}\, \langle i j | k l
\rangle\; \textrm{ with } \; 0 < i,j,k,l \le n
\]
One needs to read from the TREXIO file:
- $n$ :: The number of molecular orbitals
- $E_{\text{NN}}$ :: The nuclear repulsion energy
- $D_{ij}$ :: The one-body reduced density matrix
- $\langle i |h| j \rangle$ :: The one-electron Hamiltonian integrals
- $\Gamma_{ijkl}$ :: The two-body reduced density matrix
- $\langle i j | k l \rangle$ :: The electron repulsion integrals
#+begin_src f90
integer :: n
double precision :: E, E_nn
double precision, allocatable :: D(:,:), h0(:,:)
double precision, allocatable :: G(:,:,:,:), W(:,:,:,:)
#+end_src
*** Declare Temporary variables
#+begin_src f90
integer :: i, j, k, l, m
integer(8), parameter :: BUFSIZE = 100000_8
integer(8) :: offset, icount, size_max
integer :: buffer_index(4,BUFSIZE)
double precision :: buffer_values(BUFSIZE)
double precision, external :: ddot ! BLAS dot product
#+end_src
*** Obtain the name of the TREXIO file from the command line, and open it for reading
#+begin_src f90
call getarg(1, filename)
f = trexio_open (filename, 'r', TREXIO_HDF5, rc)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error opening TREXIO file: '//trim(err_msg)
stop
end if
#+end_src
*** Read the nuclear repulsion energy
#+begin_src f90
rc = trexio_read_nucleus_repulsion(f, E_nn)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading nuclear repulsion: '//trim(err_msg)
stop
end if
#+end_src
*** Read the number of molecular orbitals
#+begin_src f90
rc = trexio_read_mo_num(f, n)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading number of MOs: '//trim(err_msg)
stop
end if
#+end_src
*** Allocate memory
#+begin_src f90
allocate( D(n,n), h0(n,n) )
allocate( G(n,n,n,n), W(n,n,n,n) )
G(:,:,:,:) = 0.d0
W(:,:,:,:) = 0.d0
#+end_src
*** Read one-electron quantities
#+begin_src f90
rc = trexio_has_mo_1e_int_core_hamiltonian(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No core hamiltonian in file'
end if
rc = trexio_read_mo_1e_int_core_hamiltonian(f, h0)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading core Hamiltonian: '//trim(err_msg)
stop
end if
rc = trexio_has_rdm_1e(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No 1e RDM in file'
end if
rc = trexio_read_rdm_1e(f, D)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading one-body RDM: '//trim(err_msg)
stop
end if
#+end_src
*** Read two-electron quantities
Reading is done with OpenMP. Each thread reads its own buffer, and
the buffers are then processed in parallel.
Reading the file requires a lock, so it is done in a critical
section. The ~offset~ variable is shared, and it is incremented in
the critical section. For each read, the function returns in
~icount~ the number of read integrals, so this variable needs also
to be protected in the critical section when modified.
**** Electron repulsion integrals
#+begin_src f90
rc = trexio_has_mo_2e_int_eri(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No electron repulsion integrals in file'
end if
rc = trexio_read_mo_2e_int_eri_size (f, size_max)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading number of ERIs: '//trim(err_msg)
stop
end if
offset = 0_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
!$OMP buffer_index, buffer_values, m)
icount = BUFSIZE
do while (icount == BUFSIZE)
!$OMP CRITICAL
if (offset < size_max) then
rc = trexio_read_mo_2e_int_eri(f, offset, icount, buffer_index, buffer_values)
offset = offset + icount
else
icount = 0
end if
!$OMP END CRITICAL
do m=1,icount
i = buffer_index(1,m)
j = buffer_index(2,m)
k = buffer_index(3,m)
l = buffer_index(4,m)
W(i,j,k,l) = buffer_values(m)
W(k,j,i,l) = buffer_values(m)
W(i,l,k,j) = buffer_values(m)
W(k,l,i,j) = buffer_values(m)
W(j,i,l,k) = buffer_values(m)
W(j,k,l,i) = buffer_values(m)
W(l,i,j,k) = buffer_values(m)
W(l,k,j,i) = buffer_values(m)
end do
end do
!$OMP END PARALLEL
#+end_src
**** Reduced density matrix
#+begin_src f90
rc = trexio_has_rdm_2e(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No two-body density matrix in file'
end if
rc = trexio_read_rdm_2e_size (f, size_max)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading number of 2-RDM elements: '//trim(err_msg)
stop
end if
offset = 0_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
!$OMP buffer_index, buffer_values, m)
icount = bufsize
do while (offset < size_max)
!$OMP CRITICAL
if (offset < size_max) then
rc = trexio_read_rdm_2e(f, offset, icount, buffer_index, buffer_values)
offset = offset + icount
else
icount = 0
end if
!$OMP END CRITICAL
do m=1,icount
i = buffer_index(1,m)
j = buffer_index(2,m)
k = buffer_index(3,m)
l = buffer_index(4,m)
G(i,j,k,l) = buffer_values(m)
end do
end do
!$OMP END PARALLEL
#+end_src
*** Compute the energy
As $(n,m)$ 2D arrays are stored in memory as $(\n times m)$ 1D
arrays, we could pass the matrices to the ~ddot~ BLAS function to
perform the summations in a single call for the 1-electron quantities.
Instead, we prefer to interleave the 1-electron (negative) and
2-electron (positive) summations to have a better cancellation of
numerical errors.
Here $n^4$ can be larger than the largest possible 32-bit integer,
so it is not safe to pass $n^4$ to the ~ddot~ BLAS
function. Hence, we perform $n^2$ loops, using vectors of size $n^2$.
#+begin_src f90
E = 0.d0
do l=1,n
E = E + ddot( n, D(1,l), 1, h0(1,l), 1 )
do k=1,n
E = E + 0.5d0 * ddot( n*n, G(1,1,k,l), 1, W(1,1,k,l), 1 )
end do
end do
E = E + E_nn
print *, 'Energy: ', E
#+end_src
*** Terminate
#+begin_src f90
deallocate( D, h0, G, W )
end program
#+end_src

View File

@ -188,10 +188,16 @@ __trexio_path__ = None
| ~TREXIO_DSET_MISSING~ | 25 | 'Dataset does not exist in the file' |
| ~TREXIO_BACK_END_MISSING~ | 26 | 'Requested back end is disabled' |
| ~TREXIO_INVALID_STR_LEN~ | 30 | 'Invalid max_str_len' |
| ~TREXIO_INT_SIZE_OVERFLOW~ | 31 | 'Possible integer overflow' |
# We need to force Emacs not to indent the Python code:
# -*- org-src-preserve-indentation: t
*IMPORTANT!*
The code below has to be executed within Emacs each time
a new error code is added to the table above. Otherwise, the codes
and the corresponding message are not propagated to the source code.
#+begin_src python :var table=table-exit-codes :results drawer
""" This script generates the C and Fortran constants for the error
codes from the org-mode table.
@ -257,6 +263,7 @@ return '\n'.join(result)
#define TREXIO_DSET_MISSING ((trexio_exit_code) 25)
#define TREXIO_BACK_END_MISSING ((trexio_exit_code) 26)
#define TREXIO_INVALID_STR_LEN ((trexio_exit_code) 30)
#define TREXIO_INT_SIZE_OVERFLOW ((trexio_exit_code) 31)
#+end_src
#+begin_src f90 :tangle prefix_fortran.f90 :exports none
@ -289,6 +296,7 @@ return '\n'.join(result)
integer(trexio_exit_code), parameter :: TREXIO_DSET_MISSING = 25
integer(trexio_exit_code), parameter :: TREXIO_BACK_END_MISSING = 26
integer(trexio_exit_code), parameter :: TREXIO_INVALID_STR_LEN = 30
integer(trexio_exit_code), parameter :: TREXIO_INT_SIZE_OVERFLOW = 31
#+end_src
#+begin_src python :tangle prefix_python.py :exports none
@ -322,6 +330,7 @@ return '\n'.join(result)
TREXIO_DSET_MISSING = 25
TREXIO_BACK_END_MISSING = 26
TREXIO_INVALID_STR_LEN = 30
TREXIO_INT_SIZE_OVERFLOW = 31
#+end_src
:END:
@ -342,7 +351,10 @@ const char* trexio_string_of_error(const trexio_exit_code error);
void trexio_string_of_error_f(const trexio_exit_code error, char result[<<MAX_STRING_LENGTH()>>]);
#+end_src
The text strings are extracted from the previous table.
*IMPORTANT!*
The code below has to be executed within Emacs each time
a new error code is added to the table above. Otherwise, the codes
and the corresponding message are not propagated to the source code.
#+NAME:cases
#+begin_src python :var table=table-exit-codes :exports none :noweb yes
@ -442,9 +454,15 @@ return '\n'.join(result)
case TREXIO_DSET_MISSING:
return "Dataset does not exist in the file";
break;
case TREXIO_BACK_END_MISSING:
return "Requested back end is disabled";
break;
case TREXIO_INVALID_STR_LEN:
return "Invalid max_str_len";
break;
case TREXIO_INT_SIZE_OVERFLOW:
return "Possible integer overflow";
break;
#+end_example
**** C source code
@ -862,42 +880,6 @@ trexio_open(const char* file_name, const char mode,
return NULL;
}
rc = trexio_has_metadata_package_version(result);
if (rc == TREXIO_FAILURE) {
if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
free(result);
return NULL;
}
if (rc == TREXIO_HAS_NOT) {
switch (back_end) {
case TREXIO_TEXT:
rc = trexio_text_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
rc = trexio_hdf5_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
break;
#else
if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING;
return NULL;
#endif
/*
case TREXIO_JSON:
rc = trexio_json_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
break;
,*/
}
}
if (rc != TREXIO_SUCCESS) {
if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
free(result);
return NULL;
}
/* File locking */
@ -930,7 +912,42 @@ trexio_open(const char* file_name, const char mode,
return NULL;
}
/* Write metadata (i.e. package version) upon creation */
rc = trexio_has_metadata_package_version(result);
if (rc == TREXIO_FAILURE) {
if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
free(result);
return NULL;
}
if (rc == TREXIO_HAS_NOT) {
switch (back_end) {
case TREXIO_TEXT:
rc = trexio_text_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
rc = trexio_hdf5_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
break;
#else
if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING;
return NULL;
#endif
}
}
if (rc != TREXIO_SUCCESS) {
if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
free(result);
return NULL;
}
/* Exit upon success */
if (rc_open != NULL) *rc_open = TREXIO_SUCCESS;
return result;
}
#+end_src
@ -1182,8 +1199,8 @@ def close(trexio_file):
| ~$group_dset_dim_list$~ | All dimensions of the dataset | ~{nucleus_num, 3}~ |
| ~$group_dset_dtype$~ | Basic type of the dataset (int/float/char) | ~float~ |
| ~$group_dset_h5_dtype$~ | Type of the dataset in HDF5 | ~double~ |
| ~$group_dset_std_dtype_in$~ | Input type of the dataset in TEXT [fscanf] | ~%lf~ |
| ~$group_dset_std_dtype_out$~ | Output type of the dataset in TEXT [fprintf] | ~%24.16e~ |
| ~$group_dset_format_scanf$~ | Input type of the dataset in TEXT [fscanf] | ~%lf~ |
| ~$group_dset_format_printf$~ | Output type of the dataset in TEXT [fprintf] | ~%24.16e~ |
| ~$group_dset_dtype_default$~ | Default datatype of the dataset [C] | ~double/int32_t~ |
| ~$group_dset_dtype_single$~ | Single precision datatype of the dataset [C] | ~float/int32_t~ |
| ~$group_dset_dtype_double$~ | Double precision datatype of the dataset [C] | ~double/int64_t~ |
@ -1628,7 +1645,6 @@ def has_$group_num$(trexio_file) -> bool:
**** Function declarations
#+begin_src c :tangle hrw_dset_data_front.h :exports none
trexio_exit_code trexio_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, $group_dset_dtype_default$* const $group_dset$);
@ -1647,7 +1663,6 @@ trexio_exit_code trexio_write_safe_$group_dset$_64(trexio_t* const file, const $
**** Source code for double precision functions
#+begin_src c :tangle read_dset_data_64_front.c
trexio_exit_code
trexio_read_$group_dset$_64 (trexio_t* const file, $group_dset_dtype_double$* const $group_dset$)
@ -2350,7 +2365,8 @@ def has_$group_dset$(trexio_file) -> bool:
return False
#+end_src
** Sparse data structures
** Templates for front end has/read/write a dataset of sparse data
*** Introduction
Sparse data structures are used typically for large tensors such as
two-electron integrals. For example, in the =trex.json= file sparse
@ -2358,12 +2374,11 @@ def has_$group_dset$(trexio_file) -> bool:
#+begin_src python
"ao_2e_int" : {
"eri_num" : [ "int", [ ] ]
"eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ]
}
#+end_src
The electron repulsion integral $\langle ij | kl \rangle$ is
The electron repulsion integral (eri) $\langle ij | kl \rangle$ is
represented as a quartet of integers $(i,j,k,l)$ and a floating
point value.
@ -2384,85 +2399,291 @@ def has_$group_dset$(trexio_file) -> bool:
As the number of integrals to store can be prohibitively large, we
provide the possibility to read/write the integrals in chunks. So the
functions take two extra parameters:
- ~offset~ : the index of the 1st integral we want to read. An
offset of zero implies to read the first integral.
- ~num~ : the number of integrals to read.
- ~offset~ : how many integrals in the file should be skipped when reading.
An offset of zero implies to read the first integral.
- ~size~ : the number of integrals to read.
We provide a function to read a chunk of indices, and a function to
read a chunk of values, because some users might want to read only
the values of the integrals, or only the indices.
Here is an example for the indices:
*** C templates for front end
**** Function declarations
#+BEGIN_SRC c
#+begin_src c :tangle hrw_dset_sparse_front.h :exports none
trexio_exit_code trexio_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, const int64_t offset_file, int64_t* const buffer_size, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
trexio_exit_code trexio_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t buffer_size, const int32_t* index_sparse, const double* value_sparse);
#+end_src
**** Source code for default functions
#+begin_src c :tangle read_dset_sparse_front.c
trexio_exit_code
trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file,
const int64_t offset,
const int64_t num,
int32_t* buffer)
trexio_read_$group_dset$(trexio_t* const file,
const int64_t offset_file,
int64_t* const buffer_size,
int32_t* const index_sparse,
double* const value_sparse
)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (offset < 0L) return TREXIO_INVALID_ARG_2;
if (num < 0L) return TREXIO_INVALID_ARG_3;
if (offset_file < 0L) return TREXIO_INVALID_ARG_2;
if (*buffer_size <= 0L) return TREXIO_INVALID_ARG_3;
if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
const uint32_t rank = 4; // To be set by generator : number of indices
const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices
int64_t nmax; // Max number of integrals
int64_t size_max; // Max number of integrals (already in the file)
trexio_exit_code rc;
rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
/* Read the max number of integrals stored in the file */
rc = trexio_read_$group_dset$_size(file, &size_max);
if (rc != TREXIO_SUCCESS) return rc;
int64_t num;
rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
if (rc != TREXIO_SUCCESS) return rc;
// introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
int64_t eof_read_size = 0UL;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse);
break;
case TREXIO_HDF5:
return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
#ifdef HAVE_HDF5
rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse);
break;
#else
rc = TREXIO_BACK_END_MISSING;
#endif
/*
case TREXIO_JSON:
return trexio_json_read_$group_dset$(...);
break;
,*/
default:
return TREXIO_FAILURE; /* Impossible case */
rc = TREXIO_FAILURE; /* Impossible case */
}
if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc;
if (rc == TREXIO_END) *buffer_size = eof_read_size;
// shift indices to be one-based if Fortran API is used
if (file->one_based) {
// if EOF is reached - shift only indices that have been read, not an entire buffer
uint64_t index_size = rank*(*buffer_size) ;
for (uint64_t i=0; i<index_size; ++i){
index_sparse[i] += 1;
}
}
#+END_SRC
For the values,
return rc;
}
#+end_src
#+BEGIN_SRC c
#+begin_src c :tangle read_dset_sparse_size_front.c
trexio_exit_code
trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,
const int64_t offset,
const int64_t num,
double* buffer)
trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (offset < 0L) return TREXIO_INVALID_ARG_2;
if (num < 0L) return TREXIO_INVALID_ARG_3;
int64_t nmax; // Max number of integrals
trexio_exit_code rc;
rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
if (rc != TREXIO_SUCCESS) return rc;
if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax);
return trexio_text_read_$group_dset$_size(file, size_max);
break;
case TREXIO_HDF5:
return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
#ifdef HAVE_HDF5
return trexio_hdf5_read_$group_dset$_size(file, size_max);
break;
#else
return TREXIO_BACK_END_MISSING;
#endif
/*
case TREXIO_JSON:
return trexio_json_read_$group_dset$_size(...);
break;
,*/
default:
return TREXIO_FAILURE; /* Impossible case */
}
}
#+END_SRC
#+end_src
#+begin_src c :tangle write_dset_sparse_front.c
trexio_exit_code
trexio_write_$group_dset$(trexio_t* const file,
const int64_t offset_file,
const int64_t buffer_size,
const int32_t* index_sparse,
const double* value_sparse
)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (offset_file < 0L) return TREXIO_INVALID_ARG_2;
if (buffer_size <= 0L) return TREXIO_INVALID_ARG_3;
if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
const uint32_t rank = $group_dset_rank$; // To be set by generator : number of indices
int64_t size_max=0L; // Max number of integrals (already in the file)
trexio_exit_code rc;
/* Read the max number of integrals stored in the file */
rc = trexio_read_$group_dset$_size(file, &size_max);
if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc;
if (rc == TREXIO_DSET_MISSING) size_max = 0L;
int64_t num;
rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
if (rc != TREXIO_SUCCESS) return rc;
int32_t* index_sparse_p = (int32_t*) index_sparse;
// shift indices to be zero-based if Fortran API is used
if (file->one_based) {
uint64_t index_size = rank * buffer_size;
index_sparse_p = CALLOC(index_size, int32_t);
if (index_sparse_p == NULL) return TREXIO_ALLOCATION_FAILED;
for (uint64_t i=0; i<index_size; ++i){
index_sparse_p[i] = index_sparse[i] - 1;
}
}
switch (file->back_end) {
case TREXIO_TEXT:
rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num, size_max, index_sparse_p, value_sparse);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, index_sparse_p, value_sparse);
break;
#else
rc = TREXIO_BACK_END_MISSING;
#endif
/*
case TREXIO_JSON:
rc = trexio_json_write_$group_dset$(...);
break;
,*/
default:
rc = TREXIO_FAILURE; /* Impossible case */
}
// free the memory allocated to shift indices to be zero-based
if (file->one_based) FREE(index_sparse_p);
return rc;
}
#+end_src
#+begin_src c :tangle has_dset_sparse_front.c
trexio_exit_code
trexio_has_$group_dset$ (trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
assert(file->back_end < TREXIO_INVALID_BACK_END);
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_has_$group_dset$(file);
break;
case TREXIO_HDF5:
#ifdef HAVE_HDF5
return trexio_hdf5_has_$group_dset$(file);
break;
#else
return TREXIO_BACK_END_MISSING;
#endif
/*
case TREXIO_JSON:
return trexio_json_has_$group_dset$(file);
break;
,*/
}
return TREXIO_FAILURE;
}
#+end_src
*** Fortran templates for front end
The ~Fortran~ templates that provide an access to the ~C~ API calls from ~Fortran~.
These templates are based on the use of ~iso_c_binding~. Pointers have to be passed by value.
#+begin_src f90 :tangle write_dset_sparse_front_fortran.f90
interface
integer function trexio_write_$group_dset$ (trex_file, &
offset_file, buffer_size, &
index_sparse, value_sparse) bind(C)
use, intrinsic :: iso_c_binding
integer(8), intent(in), value :: trex_file
integer(8), intent(in), value :: offset_file
integer(8), intent(in), value :: buffer_size
integer(4), intent(in) :: index_sparse(*)
double precision, intent(in) :: value_sparse(*)
end function trexio_write_$group_dset$
end interface
#+end_src
#+begin_src f90 :tangle read_dset_sparse_front_fortran.f90
interface
integer function trexio_read_$group_dset$ (trex_file, &
offset_file, buffer_size, &
index_sparse, value_sparse) bind(C)
use, intrinsic :: iso_c_binding
integer(8), intent(in), value :: trex_file
integer(8), intent(in), value :: offset_file
integer(8), intent(inout) :: buffer_size
integer(4), intent(out) :: index_sparse(*)
double precision, intent(out) :: value_sparse(*)
end function trexio_read_$group_dset$
end interface
#+end_src
#+begin_src f90 :tangle read_dset_sparse_size_front_fortran.f90
interface
integer function trexio_read_$group_dset$_size (trex_file, &
size_max) bind(C)
use, intrinsic :: iso_c_binding
integer(8), intent(in), value :: trex_file
integer(8), intent(out) :: size_max
end function trexio_read_$group_dset$_size
end interface
#+end_src
#+begin_src f90 :tangle has_dset_sparse_front_fortran.f90
interface
integer function trexio_has_$group_dset$ (trex_file) bind(C)
use, intrinsic :: iso_c_binding
integer(8), intent(in), value :: trex_file
end function trexio_has_$group_dset$
end interface
#+end_src
** Templates for front end has/read/write a dataset of strings
*** Introduction

View File

@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c
cat populated/pop_write_*.c >> trexio_hdf5.c
cat populated/pop_hrw_*.h >> trexio_hdf5.h
cat helpers_hdf5.c >> trexio_hdf5.c
cat suffix_hdf5.h >> trexio_hdf5.h

View File

@ -372,6 +372,207 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
}
#+end_src
** Template for HDF5 has/read/write the dataset of sparse data
Sparse data is stored using extensible datasets of HDF5. Extensibility is required
due to the fact that the sparse data will be written in chunks of user-defined size.
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
#+end_src
#+begin_src c :tangle write_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
const int32_t* index_sparse,
const double* value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
hid_t index_dtype;
void* index_p;
uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) {
uint8_t* index = CALLOC(size_ranked, uint8_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
for (int64_t i=0; i<size_ranked; ++i){
index[i] = (uint8_t) index_sparse[i];
}
index_p = index;
index_dtype = H5T_NATIVE_UINT8;
} else if (size_max < UINT16_MAX) {
uint16_t* index = CALLOC(size_ranked, uint16_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
for (int64_t i=0; i<size_ranked; ++i){
index[i] = (uint16_t) index_sparse[i];
}
index_p = index;
index_dtype = H5T_NATIVE_UINT16;
} else {
index_p = (int32_t*) index_sparse;
index_dtype = H5T_NATIVE_INT32;
}
/* Store float values in double precision */
hid_t value_dtype = H5T_NATIVE_DOUBLE;
/* Arrays of chunk dims that will be used for chunking the dataset */
const hsize_t chunk_i_dims[1] = {size_ranked};
const hsize_t chunk_v_dims[1] = {size};
/* Indices and values are stored as 2 independent datasets in the HDF5 file */
char dset_index_name[256] = "\0";
char dset_value_name[256] = "\0";
/* Build the names of the datasets */
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
trexio_exit_code rc_write = TREXIO_FAILURE;
/* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
/* If the file does not exist -> create it and write */
/* Create chunked dataset with index_dtype datatype and write indices into it */
rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
if (index_p != index_sparse) FREE(index_p);
if (rc_write != TREXIO_SUCCESS) return rc_write;
/* Create chunked dataset with value_dtype datatype and write values into it */
rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
if (rc_write != TREXIO_SUCCESS) return rc_write;
} else {
/* If the file exists -> open it and write */
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
hsize_t offset_v[1] = {(hsize_t) offset_file};
/* Create chunked dataset with index_dtype datatype and write indices into it */
rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
if (index_p != index_sparse) FREE(index_p);
if (rc_write != TREXIO_SUCCESS) return rc_write;
/* Create chunked dataset with value_dtype datatype and write values into it */
rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
if (rc_write != TREXIO_SUCCESS) return rc_write;
}
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
int64_t* const eof_read_size,
int32_t* const index_read,
double* const value_read)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
/* Indices and values are stored as 2 independent datasets in the HDF5 file */
char dset_index_name[256] = "\0";
char dset_value_name[256] = "\0";
/* Build the names of the datasets */
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$};
hsize_t offset_v[1] = {(hsize_t) offset_file};
hsize_t count_v[1] = {(hsize_t) size};
int is_index = 1, is_value = 0;
trexio_exit_code rc_read;
// attempt to read indices
rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read);
if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
// attempt to read values
// when EOF is encountered - the count_v[0] is modified and contains the number of elements being read
rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read);
if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
return rc_read;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
hid_t fspace_id = H5Dget_space(dset_id);
if (fspace_id < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
H5Dclose(dset_id);
H5Sclose(fspace_id);
*size_max = (int64_t) ddims[0];
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle has_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values");
/* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
if (status == 1){
return TREXIO_SUCCESS;
} else if (status == 0) {
return TREXIO_HAS_NOT;
} else {
return TREXIO_FAILURE;
}
}
#+end_src
** Template for HDF5 has/read/write the dataset of strings
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
@ -703,11 +904,256 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
}
#+end_src
** Helper functions
#+begin_src c :tangle helpers_hdf5.c
trexio_exit_code
trexio_hdf5_create_write_dset_sparse (const hid_t group_id,
const char* dset_name,
const hid_t dtype_id,
const hsize_t* chunk_dims,
const void* data_sparse)
{
const int h5_rank = 1;
const hsize_t maxdims[1] = {H5S_UNLIMITED};
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
if (dspace < 0) return TREXIO_INVALID_ID;
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
if (prop < 0) {
H5Sclose(dspace);
return TREXIO_INVALID_ID;
}
herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
if (status < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
hid_t dset_id = H5Dcreate(group_id,
dset_name,
dtype_id,
dspace,
H5P_DEFAULT,
prop,
H5P_DEFAULT);
if (dset_id < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype_id,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
data_sparse);
H5Sclose(dspace);
H5Pclose(prop);
H5Dclose(dset_id);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_open_write_dset_sparse (const hid_t group_id,
const char* dset_name,
const hid_t dtype_id,
const hsize_t* chunk_dims,
const hsize_t* offset_file,
const void* data_sparse)
{
const int h5_rank = 1;
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
hid_t fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
ddims[0] += chunk_dims[0];
// extend the dset size
herr_t status = H5Dset_extent(dset_id, ddims);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// close and reopen the file dataspace to take into account the extension
H5Sclose(fspace);
fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// select hyperslab to be written using chunk_dims and offset values
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// create memory dataspace to write from
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
if (dspace < 0) {
H5Sclose(fspace);
H5Sclose(dspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype_id,
dspace, fspace, H5P_DEFAULT,
data_sparse);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Sclose(fspace);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_open_read_dset_sparse (const hid_t group_id,
const char* dset_name,
const hsize_t* offset_file,
hsize_t* const size_read,
int64_t* const eof_read_size,
const int is_index,
void* const data_sparse
)
{
const int h5_rank = 1;
// get the dataset handle
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
// get the dataspace of the dataset
hid_t fspace_id = H5Dget_space(dset_id);
if (fspace_id < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
/* get dims of the dset stored in the file to check whether reading with user-provided chunk size
will reach end of the dataset (i.e. EOF in TEXT back end)
,*/
hsize_t ddims[1] = {0};
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
hsize_t max_offset = offset_file[0] + size_read[0];
int is_EOF = 0;
// if max_offset exceed current dim of the dset => EOF
if (max_offset > ddims[0]) {
is_EOF = 1;
// lower the value of count to reduce the number of elements which will be read
size_read[0] -= max_offset - ddims[0];
// modified the value of eof_read_size passed by address
if (eof_read_size != NULL) *eof_read_size = size_read[0];
}
// special case when reading int indices
int64_t size_ranked = (int64_t) size_read[0];
void* index_p;
// read the datatype from the dataset and compare with the pre-defined values
hid_t dtype = H5Dget_type(dset_id);
if (is_index == 1) {
if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
uint8_t* index = CALLOC(size_ranked, uint8_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
index_p = index;
} else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
uint16_t* index = CALLOC(size_ranked, uint16_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
index_p = index;
} else {
index_p = data_sparse;
}
}
herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL);
if (status < 0) {
H5Sclose(fspace_id);
H5Dclose(dset_id);
if (index_p != data_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL);
if (memspace_id < 0) {
H5Sclose(fspace_id);
H5Dclose(dset_id);
if (index_p != data_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
if (is_index == 1) {
status = H5Dread(dset_id,
dtype,
memspace_id, fspace_id, H5P_DEFAULT,
index_p);
} else {
status = H5Dread(dset_id,
dtype,
memspace_id, fspace_id, H5P_DEFAULT,
data_sparse);
}
H5Sclose(fspace_id);
H5Sclose(memspace_id);
H5Dclose(dset_id);
if (status < 0) {
if (index_p != data_sparse) FREE(index_p);
return TREXIO_FAILURE;
}
if (is_index == 1) {
if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
uint8_t* index = (uint8_t*) index_p;
for (int64_t i=0; i<size_ranked; ++i){
((int32_t*)data_sparse)[i] = (int32_t) index[i];
}
FREE(index_p);
} else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
uint16_t* index = (uint16_t*) index_p;
for (int64_t i=0; i<size_ranked; ++i){
((int32_t*)data_sparse)[i] = (int32_t) index[i];
}
FREE(index_p);
}
}
if (is_EOF == 1) return TREXIO_END;
return TREXIO_SUCCESS;
}
#+end_src
* Constant file suffixes (not used by the generator) :noexport:
#+begin_src c :tangle suffix_hdf5.h
trexio_exit_code trexio_hdf5_create_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse);
trexio_exit_code trexio_hdf5_open_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse);
trexio_exit_code trexio_hdf5_open_read_dset_sparse (const hid_t group_id, const char* dset_name, const hsize_t* offset_file, hsize_t* const size_read, int64_t* const eof_read_size, const int is_index, void* const data_sparse);
#endif
#+end_src

View File

@ -19,23 +19,26 @@ cat populated/pop_flush_group_text.h >> trexio_text.h
cat populated/pop_has_dset_data_text.c >> trexio_text.c
cat populated/pop_has_dset_str_text.c >> trexio_text.c
cat populated/pop_has_dset_sparse_text.c >> trexio_text.c
cat populated/pop_has_attr_num_text.c >> trexio_text.c
cat populated/pop_has_attr_str_text.c >> trexio_text.c
cat populated/pop_read_dset_data_text.c >> trexio_text.c
cat populated/pop_read_dset_str_text.c >> trexio_text.c
cat populated/pop_read_dset_sparse_text.c >> trexio_text.c
cat populated/pop_read_attr_str_text.c >> trexio_text.c
cat populated/pop_read_attr_num_text.c >> trexio_text.c
cat populated/pop_write_dset_data_text.c >> trexio_text.c
cat populated/pop_write_dset_str_text.c >> trexio_text.c
cat populated/pop_write_dset_sparse_text.c >> trexio_text.c
cat populated/pop_write_attr_str_text.c >> trexio_text.c
cat populated/pop_write_attr_num_text.c >> trexio_text.c
cat populated/pop_hrw_dset_data_text.h >> trexio_text.h
cat populated/pop_hrw_dset_str_text.h >> trexio_text.h
cat populated/pop_hrw_dset_sparse_text.h >> trexio_text.h
cat populated/pop_hrw_attr_num_text.h >> trexio_text.h
cat populated/pop_hrw_attr_str_text.h >> trexio_text.h
cat rdm_text.c >> trexio_text.c
cat rdm_text.h >> trexio_text.h
cat suffix_text.h >> trexio_text.h

View File

@ -93,22 +93,10 @@ typedef struct $group$_s {
** Template for general structure in text back end
#+begin_src c :tangle struct_text_group.h
typedef struct rdm_s {
uint64_t dim_one_e;
uint32_t to_flush;
uint32_t padding;
double* one_e;
char file_name[TREXIO_MAX_FILENAME_LENGTH];
char two_e_file_name[TREXIO_MAX_FILENAME_LENGTH];
} rdm_t;
#+end_src
#+begin_src c :tangle struct_text_group.h
typedef struct trexio_text_s {
trexio_t parent ;
$group$_t* $group$;
rdm_t* rdm;
int lock_file;
} trexio_text_t;
#+end_src
@ -269,9 +257,6 @@ trexio_text_deinit (trexio_t* const file)
/* Error handling for this call is added by the generator */
rc = trexio_text_free_$group$( (trexio_text_t*) file);
rc = trexio_text_free_rdm( (trexio_text_t*) file);
if (rc != TREXIO_SUCCESS) return rc;
return TREXIO_SUCCESS;
}
@ -411,7 +396,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
return NULL;
}
rc = fscanf(f, "%$group_num_std_dtype_in$", &($group$->$group_num$));
rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$));
assert(!(rc != 1));
if (rc != 1) {
FREE(buffer);
@ -499,7 +484,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
}
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
rc = fscanf(f, "%$group_dset_std_dtype_in$", &($group$->$group_dset$[i]));
rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i]));
assert(!(rc != 1));
if (rc != 1) {
FREE(buffer);
@ -535,16 +520,16 @@ trexio_text_read_$group$ (trexio_text_t* const file)
}
/* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
, BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
,*/
BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
*/
char* tmp_$group_dset$;
tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
$group$->$group_dset$[i] = tmp_$group_dset$;
/* conventional fcanf with "%s" only return the string before the first space character
,* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
,* Q: depending on what ? */
* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
* Q: depending on what ? */
rc = fscanf(f, " %1023[^\n]", tmp_$group_dset$);
assert(!(rc != 1));
if (rc != 1) {
@ -613,7 +598,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)
// START REPEAT GROUP_NUM
fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet);
if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_std_dtype_out$ \n", $group$->$group_num$);
if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$);
// END REPEAT GROUP_NUM
// START REPEAT GROUP_ATTR_STR
@ -627,7 +612,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)
fprintf(f, "$group_dset$\n");
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
fprintf(f, "%$group_dset_std_dtype_out$\n", $group$->$group_dset$[i]);
fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]);
}
// END REPEAT GROUP_DSET_ALL
@ -1016,321 +1001,275 @@ trexio_text_has_$group_str$ (trexio_t* const file)
}
#+end_src
** RDM struct (hard-coded)
*** Read the complete struct
** Template for has/read/write the dataset of sparse data
#+begin_src c :tangle rdm_text.h
rdm_t* trexio_text_read_rdm(trexio_text_t* const file);
Each sparse array is stored in a separate =.txt= file due to the fact that sparse I/O has to be decoupled
from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data
to prevent memory overflow. Chunks have a given ~int64_t size~
(size specifies the number of sparse data items, e.g. integrals).
User provides indices and values of the sparse array as two separate variables.
#+begin_src c :tangle hrw_dset_sparse_text.h :exports none
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int64_t size_start, const int32_t* index_sparse, const double* value_sparse);
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
#+end_src
#+begin_src c :tangle rdm_text.c
rdm_t* trexio_text_read_rdm(trexio_text_t* const file) {
if (file == NULL) return NULL;
if (file->rdm != NULL) return file->rdm;
#+begin_src c :tangle write_dset_sparse_text.c
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
const int64_t size_start,
const int32_t* index_sparse,
const double* value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
/* Allocate the data structure */
rdm_t* rdm = MALLOC(rdm_t);
assert (rdm != NULL);
/* Build the name of the file with sparse data*/
/* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
rdm->one_e = NULL;
rdm->two_e_file_name[0] = '\0';
rdm->to_flush = 0;
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
/* Try to open the file. If the file does not exist, return */
const char* rdm_file_name = "/rdm.txt";
/* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
FILE* f = fopen(file_full_path, "a");
if(f == NULL) return TREXIO_FILE_ERROR;
strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH);
strncat (rdm->file_name, rdm_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name));
/* Specify the line length in order to offset properly. For example, for 4-index quantities
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
CURRENTLY NO OFFSET IS USED WHEN WRITING !
,*/
int64_t line_length = 0L;
char format_str[256] = "\0";
if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
FREE(rdm);
return NULL;
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) {
line_length = $sparse_line_length_8$; // 41 for 4 indices
strncpy(format_str, $sparse_format_printf_8$, 256);
} else if (size_max < UINT16_MAX) {
line_length = $sparse_line_length_16$; // 49 for 4 indices
strncpy(format_str, $sparse_format_printf_16$, 256);
} else {
line_length = $sparse_line_length_32$; //69 for 4 indices
strncpy(format_str, $sparse_format_printf_32$, 256);
}
/* If the file exists, read it */
FILE* f = fopen(rdm->file_name,"r");
if (f != NULL) {
strncat(format_str, "\n", 2);
/* Find size of file to allocate the max size of the string buffer */
fseek(f, 0L, SEEK_END);
size_t sz = ftell(f);
fseek(f, 0L, SEEK_SET);
sz = (sz < 1024) ? (1024) : (sz);
char* buffer = CALLOC(sz, char);
/* Get the starting position of the IO stream to be written in the .size file.
This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
One can use ftello function which is adapted for large files.
For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow.
*/
int64_t io_start_pos = size_start * line_length;
/* Read the dimensioning variables */
/* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
int rc;
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
assert (strcmp(buffer, "dim_one_e") == 0);
rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e));
assert (rc == 1);
/* Allocate arrays */
rdm->one_e = CALLOC(rdm->dim_one_e, double);
assert (rdm->one_e != NULL);
/* Read one_e */
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
assert (strcmp(buffer, "one_e") == 0);
for (uint64_t i=0 ; i<rdm->dim_one_e; ++i) {
rc = fscanf(f, "%lf", &(rdm->one_e[i]));
assert (rc == 1);
}
/* Read two_e */
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
assert (strcmp(buffer, "two_e_file_name") == 0);
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
strncpy(rdm->two_e_file_name, buffer, 1024);
if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
FREE(buffer);
FREE(rdm->one_e);
FREE(rdm);
for (uint64_t i=0UL; i<size; ++i) {
rc = fprintf(f, format_str,
$group_dset_sparse_indices_printf$,
*(value_sparse + i));
if(rc <= 0) {
fclose(f);
return NULL;
return TREXIO_FAILURE;
}
}
FREE(buffer);
fclose(f);
f = NULL;
}
file->rdm = rdm ;
return rdm;
}
#+end_src
/* Close the TXT file */
rc = fclose(f);
if (rc != 0) return TREXIO_FILE_ERROR;
*** Flush the complete struct
/* Append .size to the file_full_path in order to write additional info about the written buffer of data */
strncat(file_full_path, ".size", 6);
#+begin_src c :tangle rdm_text.h
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file);
#+end_src
/* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
FILE *f_wSize = fopen(file_full_path, "a");
if (f_wSize == NULL) return TREXIO_FILE_ERROR;
#+begin_src c :tangle rdm_text.c
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) {
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (file->parent.mode == 'r') return TREXIO_READONLY;
rdm_t* const rdm = file->rdm;
if (rdm == NULL) return TREXIO_SUCCESS;
if (rdm->to_flush == 0) return TREXIO_SUCCESS;
FILE* f = fopen(rdm->file_name,"w");
assert (f != NULL);
/* Write the dimensioning variables */
fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e);
/* Write arrays */
fprintf(f, "one_e\n");
for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) {
fprintf(f, "%lf\n", rdm->one_e[i]);
/* Write the buffer_size */
rc = fprintf(f_wSize, "%" PRId64 " %" PRId64 "\n", size, io_start_pos);
if (rc <= 0) {
fclose(f_wSize);
return TREXIO_FAILURE;
}
fprintf(f, "two_e_file_name\n");
fprintf(f, "%s\n", rdm->two_e_file_name);
/* Close the TXT file */
rc = fclose(f_wSize);
if (rc != 0) return TREXIO_FILE_ERROR;
fclose(f);
rdm->to_flush = 0;
/* Exit upon success */
return TREXIO_SUCCESS;
}
#+end_src
*** Free memory
Memory is allocated when reading. The followig function frees memory.
#+begin_src c :tangle rdm_text.h
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) {
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (file->parent.mode != 'r') {
trexio_exit_code rc = trexio_text_flush_rdm(file);
if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
}
rdm_t* const rdm = file->rdm;
if (rdm == NULL) return TREXIO_SUCCESS;
if (rdm->one_e != NULL) {
FREE (rdm->one_e);
}
free (rdm);
file->rdm = NULL;
return TREXIO_SUCCESS;
}
#+end_src
*** Read/Write the one_e attribute
The ~one_e~ array is assumed allocated with the appropriate size.
#+begin_src c :tangle rdm_text.h
trexio_exit_code
trexio_text_read_rdm_one_e(trexio_t* const file,
double* const one_e,
const uint64_t dim_one_e);
trexio_exit_code
trexio_text_write_rdm_one_e(trexio_t* const file,
const double* one_e,
const uint64_t dim_one_e);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code
trexio_text_read_rdm_one_e(trexio_t* const file,
double* const one_e,
const uint64_t dim_one_e)
#+begin_src c :tangle read_dset_sparse_text.c
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
int64_t* const eof_read_size,
int32_t* const index_sparse,
double* const value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (one_e == NULL) return TREXIO_INVALID_ARG_2;
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
/* Build the name of the file with sparse data.
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
,*/
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3;
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
for (uint64_t i=0 ; i<dim_one_e ; ++i) {
one_e[i] = rdm->one_e[i];
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "r");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Specify the line length in order to offset properly. For example, for 4-index quantities
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
,*/
uint64_t line_length = 0UL;
/* Determine the line length depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) {
line_length = $sparse_line_length_8$; // 41 for 4 indices
} else if (size_max < UINT16_MAX) {
line_length = $sparse_line_length_16$; // 49 for 4 indices
} else {
line_length = $sparse_line_length_32$; //69 for 4 indices
}
return TREXIO_SUCCESS;
}
/* Offset in the file according to the provided value of offset_file and optimal line_length */
fseek(f, (long) offset_file * line_length, SEEK_SET);
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
int rc;
char buffer[1024];
uint64_t count = 0UL;
for (uint64_t i=0UL; i<size; ++i) {
trexio_exit_code
trexio_text_write_rdm_one_e(trexio_t* const file,
const double* one_e,
const uint64_t dim_one_e)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (one_e == NULL) return TREXIO_INVALID_ARG_2;
if (file->mode != 'r') return TREXIO_READONLY;
memset(buffer,0,sizeof(buffer));
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
if(fgets(buffer, 1023, f) == NULL){
rdm->dim_one_e = dim_one_e;
for (uint64_t i=0 ; i<dim_one_e ; ++i) {
rdm->one_e[i] = one_e[i];
}
rdm->to_flush = 1;
return TREXIO_SUCCESS;
}
#+end_src
*** Read/Write the two_e attribute
~two_e~ is a sparse data structure, which can be too large to fit
in memory. So we provide functions to read and write it by
chunks.
In the text back end, the easiest way to do it is to create a
file for each sparse float structure.
#+begin_src c :tangle rdm_text.h
trexio_exit_code
trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
int64_t* const index,
double* const value);
trexio_exit_code
trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
const int64_t* index,
const double* value);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code
trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
int64_t* const index,
double* const value)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (index == NULL) return TREXIO_INVALID_ARG_4;
if (value == NULL) return TREXIO_INVALID_ARG_5;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
FILE* f = fopen(rdm->two_e_file_name, "r");
if (f == NULL) return TREXIO_END;
const uint64_t line_length = 64L;
fseek(f, (long) offset * line_length, SEEK_SET);
for (uint64_t i=0 ; i<size ; ++i) {
int rc = fscanf(f, "%9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %24le\n",
&index[4*i],
&index[4*i+1],
&index[4*i+2],
&index[4*i+3],
&value[i]);
if (rc == 5) {
/* Do nothing */
} else if (rc == EOF) {
fclose(f);
*eof_read_size = count;
return TREXIO_END;
} else {
rc = sscanf(buffer, "$group_dset_format_scanf$",
$group_dset_sparse_indices_scanf$,
value_sparse + i);
if(rc <= 0) {
fclose(f);
return TREXIO_FAILURE;
}
count += 1UL;
}
}
/* Close the TXT file */
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
return TREXIO_SUCCESS;
}
#+end_src
trexio_exit_code
trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
const int64_t* index,
const double* value)
#+begin_src c :tangle read_dset_sparse_text.c
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (index == NULL) return TREXIO_INVALID_ARG_4;
if (value == NULL) return TREXIO_INVALID_ARG_5;
if (file->mode != 'r') return TREXIO_READONLY;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
/* Build the name of the file with sparse data.
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
*/
const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
FILE* f = fopen(rdm->two_e_file_name, "w");
if (f == NULL) return TREXIO_FAILURE;
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
const uint64_t line_length = 64L;
fseek(f, (long) offset * line_length, SEEK_SET);
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "r");
if(f == NULL) return TREXIO_FILE_ERROR;
for (uint64_t i=0 ; i<size ; ++i) {
int rc = fprintf(f, "%9" PRId64 " %9" PRId64 " %9" PRId64 " %9" PRId64 " %24le\n",
index[4*i],
index[4*i+1],
index[4*i+2],
index[4*i+3],
value[i]);
if (rc != 5) return TREXIO_FAILURE;
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
int rc;
int64_t size_item, offset_item, size_accum=0L;
/* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) {
/* Check that summation will not overflow the int64_t value */
if (INT64_MAX - size_accum > size_item) {
size_accum += size_item;
} else {
fclose(f);
*size_max = -1L;
return TREXIO_INT_SIZE_OVERFLOW;
}
}
/* Close the TXT file */
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
/* Overwrite the value at the input address and return TREXIO_SUCCESS */
*size_max = size_accum;
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle has_dset_sparse_text.c
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
/* Build the name of the file with sparse data.
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
*/
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
/* Check the return code of access function to determine whether the file with sparse data exists or not */
if (access(file_full_path, F_OK) == 0){
return TREXIO_SUCCESS;
} else {
return TREXIO_HAS_NOT;
}
}
#+end_src
@ -1339,4 +1278,3 @@ trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
#+begin_src c :tangle suffix_text.h
#endif
#+end_src

View File

@ -6,6 +6,7 @@ set(Tests_text
open_text
io_dset_float_text
io_dset_str_text
io_dset_sparse_text
io_safe_dset_float_text
io_dset_int_text
io_num_text
@ -19,6 +20,7 @@ if(ENABLE_HDF5)
open_hdf5
io_dset_float_hdf5
io_dset_str_hdf5
io_dset_sparse_hdf5
io_safe_dset_float_hdf5
io_dset_int_hdf5
io_num_hdf5
@ -43,4 +45,3 @@ endforeach()
add_executable(test_f test_f.f90)
target_link_libraries(test_f PRIVATE trexio_f)
add_test(NAME test_f COMMAND $<TARGET_FILE:test_f>)

235
tests/io_dset_sparse_hdf5.c Normal file
View File

@ -0,0 +1,235 @@
#include "trexio.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TEST_BACKEND TREXIO_HDF5
#define TREXIO_FILE "test_dset_sparse.h5"
#define RM_COMMAND "rm -f -- " TREXIO_FILE
#define SIZE 100
#define N_CHUNKS 5
static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to write an array of sparse data into the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'w', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// parameters to be written
int32_t* index;
double* value;
index = calloc(4L*SIZE, sizeof(int32_t));
value = calloc(SIZE, sizeof(double));
for(int i=0; i<SIZE; i++){
index[4*i] = 4*i;
index[4*i+1] = 4*i+1;
index[4*i+2] = 4*i+2;
index[4*i+3] = 4*i+3;
value[i] = 3.14 + (double) i;
}
// write mo_num which will be used to determine the optimal size of int indices
if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
rc = trexio_write_mo_num(file, 1000);
assert(rc == TREXIO_SUCCESS);
}
// write dataset chunks of sparse data in the file (including FAKE statements)
uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
uint64_t offset_f = 0UL;
uint64_t offset_d = 0UL;
if (offset != 0L) offset_f += offset;
// write n_chunks times using write_sparse
for(int i=0; i<N_CHUNKS; ++i){
rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
assert(rc == TREXIO_SUCCESS);
offset_d += chunk_size;
offset_f += chunk_size;
}
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the allocated memeory
free(index);
free(value);
/*================= END OF TEST ==================*/
return 0;
}
static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
/* Try to check the existence of a dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// check that previous call to has_sparse did not create a file/dset
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// now check that previously written mo_2e_int_eri exists
rc = trexio_has_mo_2e_int_eri(file);
assert(rc==TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to read one chunk of dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define arrays to read into
int32_t* index_read;
double* value_read;
uint64_t size_r = 40L;
index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
value_read = (double*) calloc(size_r,sizeof(double));
// specify the read parameters, here:
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
int64_t chunk_read = 10L;
int64_t offset_file_read = 40L;
int offset_data_read = 5;
int64_t read_size_check;
read_size_check = chunk_read;
if (offset != 0L) offset_file_read += offset;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_SUCCESS);
assert(chunk_read == read_size_check);
assert(index_read[0] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
offset_file_read = 97;
offset_data_read = 1;
int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
if (offset != 0L) offset_file_read += offset;
// read one chunk that will reach EOF and return TREXIO_END code
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_END);
assert(chunk_read == eof_read_size_check);
assert(index_read[4*size_r-1] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
/*
for(int i=0; i<size_r; ++i){
printf("%d %lf\n", index_read[4*i], value_read[i]);
}
*/
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the memory
free(index_read);
free(value_read);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
/* Try to read a size of the dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define the variable to read into
int64_t size_written;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
assert(rc == TREXIO_SUCCESS);
assert(size_written == size_check);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
int main(){
/*============== Test launcher ================*/
int rc;
rc = system(RM_COMMAND);
assert (rc == 0);
// check the first write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
// check the second write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
rc = system(RM_COMMAND);
assert (rc == 0);
return 0;
}

230
tests/io_dset_sparse_text.c Normal file
View File

@ -0,0 +1,230 @@
#include "trexio.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TEST_BACKEND TREXIO_TEXT
#define TREXIO_FILE "test_dset_sparse.dir"
#define RM_COMMAND "rm -rf " TREXIO_FILE
#define SIZE 100
#define N_CHUNKS 5
static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to write an array of sparse data into the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'w', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// parameters to be written
int32_t* index;
double* value;
index = calloc(4L*SIZE, sizeof(int32_t));
value = calloc(SIZE, sizeof(double));
for(int i=0; i<SIZE; i++){
index[4*i] = 4*i;
index[4*i+1] = 4*i+1;
index[4*i+2] = 4*i+2;
index[4*i+3] = 4*i+3;
value[i] = 3.14 + (double) i;
}
// write mo_num which will be used to determine the optimal size of int indices
if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
rc = trexio_write_mo_num(file, 1000);
assert(rc == TREXIO_SUCCESS);
}
// write dataset chunks of sparse data in the file (including FAKE statements)
uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
uint64_t offset_f = 0UL;
uint64_t offset_d = 0UL;
if (offset != 0L) offset_f += offset;
// write n_chunks times using write_sparse
for(int i=0; i<N_CHUNKS; ++i){
rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
assert(rc == TREXIO_SUCCESS);
offset_d += chunk_size;
offset_f += chunk_size;
}
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the allocated memeory
free(index);
free(value);
/*================= END OF TEST ==================*/
return 0;
}
static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
/* Try to check the existence of a dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// check that previous call to has_sparse did not create a file/dset
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// now check that previously written mo_2e_int_eri exists
rc = trexio_has_mo_2e_int_eri(file);
assert(rc==TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to read one chunk of dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define arrays to read into
int32_t* index_read;
double* value_read;
uint64_t size_r = 40L;
index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
value_read = (double*) calloc(size_r,sizeof(double));
// specify the read parameters, here:
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
int64_t chunk_read = 10L;
int64_t offset_file_read = 40L;
int offset_data_read = 5;
int64_t read_size_check;
read_size_check = chunk_read;
if (offset != 0L) offset_file_read += offset;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_SUCCESS);
assert(chunk_read == read_size_check);
assert(index_read[0] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
offset_file_read = 97L;
offset_data_read = 1;
int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
if (offset != 0L) offset_file_read += offset;
// read one chunk that will reach EOF and return TREXIO_END code
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_END);
assert(chunk_read == eof_read_size_check);
assert(index_read[4*size_r-1] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the memory
free(index_read);
free(value_read);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
/* Try to read a size of the dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define the variable to read into
int64_t size_written;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
assert(rc == TREXIO_SUCCESS);
assert(size_written == size_check);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
int main(){
/*============== Test launcher ================*/
int rc;
rc = system(RM_COMMAND);
assert (rc == 0);
// check the first write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
// check the second write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
rc = system(RM_COMMAND);
assert (rc == 0);
return 0;
}

View File

@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_repulsion(file, 2.14171677);
assert (rc == TREXIO_SUCCESS);
// attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
rc = trexio_write_mo_num(file, 0);
assert (rc == TREXIO_INVALID_NUM);
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
rc = trexio_has_nucleus_num(file);
assert (rc == TREXIO_SUCCESS);
rc = trexio_has_nucleus_repulsion(file);
assert (rc == TREXIO_SUCCESS);
// check that the num variable does not exist
rc = trexio_has_mo_num(file);
assert (rc == TREXIO_HAS_NOT);
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
// parameters to be read
int num;
int cartesian;
float repulsion_32;
double repulsion_64, d;
/*================= START OF TEST ==================*/
@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
assert (rc == TREXIO_SUCCESS);
assert (num == 12);
rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
assert (rc == TREXIO_SUCCESS);
d = repulsion_32 - 2.14171677;
assert( d*d < 1.e-8 );
rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
assert (rc == TREXIO_SUCCESS);
d = repulsion_64 - 2.14171677;
assert( d*d < 1.e-14 );
// read non-existing numerical attribute from the file
rc = trexio_read_mo_num(file, &num);
assert (rc == TREXIO_ATTR_MISSING);
@ -134,5 +152,3 @@ int main(void) {
return 0;
}

View File

@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_repulsion(file, 2.14171677);
assert (rc == TREXIO_SUCCESS);
// attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
rc = trexio_write_mo_num(file, 0);
assert (rc == TREXIO_INVALID_NUM);
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
rc = trexio_has_nucleus_num(file);
assert (rc == TREXIO_SUCCESS);
rc = trexio_has_nucleus_repulsion(file);
assert (rc == TREXIO_SUCCESS);
// check that the num variable does not exist
rc = trexio_has_mo_num(file);
assert (rc == TREXIO_HAS_NOT);
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
// parameters to be read
int num;
int cartesian;
float repulsion_32;
double repulsion_64, d;
/*================= START OF TEST ==================*/
@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
assert (rc == TREXIO_SUCCESS);
assert (num == 12);
rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
assert (rc == TREXIO_SUCCESS);
d = repulsion_32 - 2.14171677;
assert( d*d < 1.e-8 );
rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
assert (rc == TREXIO_SUCCESS);
d = repulsion_64 - 2.14171677;
assert( d*d < 1.e-14 );
// read non-existing numerical attribute from the file
rc = trexio_read_mo_num(file, &num);
assert (rc == TREXIO_ATTR_MISSING);
@ -134,5 +152,3 @@ int main(void) {
return 0;
}

View File

@ -11,12 +11,12 @@ program test_trexio
print'(a,i3)', " TREXIO MINOR VERSION : ", TREXIO_VERSION_MINOR
print * , "============================================"
call system('rm -rf test_write_f.dir')
call system('rm -rf -- test_write_f.dir')
print *, 'call test_write(''test_write_f.dir'', TREXIO_TEXT)'
call test_write('test_write_f.dir', TREXIO_TEXT)
print *, 'call test_read(''test_write_f.dir'', TREXIO_TEXT)'
call test_read('test_write_f.dir', TREXIO_TEXT)
call system('rm -rf test_write_f.dir')
call system('rm -rf -- test_write_f.dir')
call test_read_void('test_write_f.dir', TREXIO_TEXT)
@ -61,6 +61,22 @@ subroutine test_write(file_name, back_end)
character(len=:), allocatable :: sym_str
character(len=:), allocatable :: label(:)
! sparse data
integer(4) :: index_sparse_mo_2e_int_eri(4,100)
double precision :: value_sparse_mo_2e_int_eri(100)
integer :: i, n_buffers = 5
integer(8) :: buf_size, offset
buf_size = 100/n_buffers
do i = 1, 100
index_sparse_mo_2e_int_eri(1,i) = 4*i - 3
index_sparse_mo_2e_int_eri(2,i) = 4*i+1 - 3
index_sparse_mo_2e_int_eri(3,i) = 4*i+2 - 3
index_sparse_mo_2e_int_eri(4,i) = 4*i+3 - 3
value_sparse_mo_2e_int_eri(i) = 3.14 + float(i)
enddo
! parameters to be written
num = 12
charge = (/ 6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1. /)
@ -96,6 +112,9 @@ subroutine test_write(file_name, back_end)
rc = trexio_has_nucleus_charge(trex_file)
call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 2')
rc = trexio_has_mo_2e_int_eri(trex_file)
call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 3')
rc = trexio_write_nucleus_num(trex_file, num)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE NUM')
@ -106,8 +125,8 @@ subroutine test_write(file_name, back_end)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE COORD')
rc = trexio_write_nucleus_label(trex_file, label, 5)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
deallocate(label)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
rc = trexio_write_nucleus_point_group(trex_file, sym_str, 32)
deallocate(sym_str)
@ -119,6 +138,20 @@ subroutine test_write(file_name, back_end)
rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX')
! write mo_num which will be used to determine the optimal size of int indices
if (trexio_has_mo_num(trex_file) == TREXIO_HAS_NOT) then
rc = trexio_write_mo_num(trex_file, 1000)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE MO NUM')
endif
offset = 0
do i = 1,n_buffers
rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, &
index_sparse_mo_2e_int_eri(1,offset+1), &
value_sparse_mo_2e_int_eri(offset+1))
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE SPARSE')
offset = offset + buf_size
enddo
rc = trexio_has_nucleus_num(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 1')
@ -126,6 +159,9 @@ subroutine test_write(file_name, back_end)
rc = trexio_has_nucleus_coord(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 2')
rc = trexio_has_mo_2e_int_eri(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 3')
rc = trexio_close(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS CLOSE')
@ -160,11 +196,25 @@ subroutine test_read(file_name, back_end)
character(len=32) :: sym_str
! sparse data
integer(4) :: index_sparse_mo_2e_int_eri(4,20)
double precision :: value_sparse_mo_2e_int_eri(20)
integer(8) :: read_buf_size = 10
integer(8) :: read_buf_size_save = 10
integer(8) :: offset_read = 40
integer(8) :: offset_data_read = 5
integer(8) :: offset_eof = 97
integer(8) :: offset_data_eof = 1
integer(8) :: size_toread = 0
character*(128) :: str
num = 12
basis_shell_num = 24
index_sparse_mo_2e_int_eri = 0
value_sparse_mo_2e_int_eri = 0.0d0
! ================= START OF TEST ===================== !
trex_file = trexio_open(file_name, 'r', back_end, rc)
@ -230,6 +280,52 @@ subroutine test_read(file_name, back_end)
endif
rc = trexio_read_mo_2e_int_eri(trex_file, offset_read, read_buf_size, &
index_sparse_mo_2e_int_eri(1, offset_data_read + 1), &
value_sparse_mo_2e_int_eri(offset_data_read + 1))
!do i = 1,20
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
!enddo
call trexio_assert(rc, TREXIO_SUCCESS)
if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1) then
write(*,*) 'SUCCESS READ SPARSE DATA'
else
print *, 'FAILURE SPARSE DATA CHECK'
call exit(-1)
endif
! attempt to read reaching EOF: should return TREXIO_END and
! NOT increment the existing values in the buffer (only upd with what has been read)
rc = trexio_read_mo_2e_int_eri(trex_file, offset_eof, read_buf_size, &
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1), &
value_sparse_mo_2e_int_eri(offset_data_eof + 1))
!do i = 1,20
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
!enddo
call trexio_assert(rc, TREXIO_END)
if (read_buf_size == 3 .and. &
index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1 .and. &
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1) == offset_eof*4 + 1) then
write(*,*) 'SUCCESS READ SPARSE DATA EOF'
read_buf_size = read_buf_size_save
else
print *, 'FAILURE SPARSE DATA EOF CHECK'
call exit(-1)
endif
rc = trexio_read_mo_2e_int_eri_size(trex_file, size_toread)
call trexio_assert(rc, TREXIO_SUCCESS)
if (size_toread == 100) then
write(*,*) 'SUCCESS READ SPARSE SIZE'
else
print *, 'FAILURE SPARSE SIZE CHECK'
call exit(-1)
endif
rc = trexio_close(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS)
@ -254,6 +350,9 @@ subroutine test_read_void(file_name, back_end)
! ================= START OF TEST ===================== !
trex_file = trexio_open(file_name, 'r', back_end, rc)
if (rc /= TREXIO_OPEN_ERROR) then
rc = trexio_close(trex_file)
endif
call trexio_assert(rc, TREXIO_OPEN_ERROR)
call trexio_string_of_error(rc, str)
@ -262,4 +361,3 @@ subroutine test_read_void(file_name, back_end)
! ================= END OF TEST ===================== !
end subroutine test_read_void

View File

@ -6,20 +6,22 @@ config_file = 'trex.json'
trex_config = read_json(config_file)
# --------------------------------------------------------------------------- #
# -------------------------------- [WIP] ------------------------------------ #
# for now remove rdm from config because it functions are hardcoded
del trex_config['rdm']
# --------------------------------------------------------------------------- #
# -------------------- GET ATTRIBUTES FROM THE CONFIGURATION ---------------- #
group_dict = get_group_dict(trex_config)
detailed_nums = get_detailed_num_dict(trex_config)
detailed_strs = get_detailed_str_dict(trex_config)
# helper dictionaries that contain names of groups, nums or dsets as keys
dsets = get_dset_dict(trex_config)
detailed_dsets_nostr, detailed_dsets_str = split_dset_dict_detailed(dsets)
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
detailed_dsets = detailed_dsets_nostr.copy()
detailed_dsets.update(detailed_dsets_str)
# build a big dictionary with all pre-processed data
detailed_all = {
'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse),
'groups' : group_dict,
'numbers' : detailed_nums,
'strings' : detailed_strs
}
# consistency check for dimensioning variables
check_dim_consistency(detailed_nums, dsets)
# --------------------------------------------------------------------------- #
@ -38,7 +40,7 @@ files_todo = get_files_todo(source_files)
# populate files with iterative scheme, i.e. for unique functions
for fname in files_todo['auxiliary']:
iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
iterative_populate_file(fname, template_paths, detailed_all)
# populate has/read/write_num functions with recursive scheme
for fname in files_todo['attr_num']:
@ -56,6 +58,10 @@ for fname in files_todo['dset_data']:
for fname in files_todo['dset_str']:
recursive_populate_file(fname, template_paths, detailed_dsets_str)
# populate has/read/write_dset (sparse) functions with recursive scheme
for fname in files_todo['dset_sparse']:
recursive_populate_file(fname, template_paths, detailed_dsets_sparse)
# populate group-related functions with mixed (iterative+recursive) scheme [text backend]
for fname in files_todo['group']:
special_populate_text_group(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)

View File

@ -39,7 +39,7 @@ def get_files_todo(source_files: dict) -> dict:
files_todo = {}
#files_todo['all'] = list(filter(lambda x: 'read' in x or 'write' in x or 'has' in x or 'hrw' in x or 'flush' in x or 'free' in x, all_files))
files_todo['all'] = [f for f in all_files if 'read' in f or 'write' in f or 'has' in f or 'flush' in f or 'free' in f or 'hrw' in f]
for key in ['dset_data', 'dset_str', 'attr_num', 'attr_str', 'group']:
for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group']:
files_todo[key] = list(filter(lambda x: key in x, files_todo['all']))
files_todo['group'].append('struct_text_group_dset.h')
@ -107,6 +107,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
'group_num_h5_dtype', 'group_num_py_dtype',
'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
'group_dset', 'group_num', 'group_str', 'group']
for item in detailed_source.keys():
@ -179,17 +183,18 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
return output_line
def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
"""
Iteratively populate files with unique functions that contain templated variables.
Parameters:
filename (str) : template file to be populated
paths (dict) : dictionary of paths per source directory
groups (dict) : dictionary of groups
datasets (dict) : dictionary of datasets with substitution details
numbers (dict) : dictionary of numbers with substitution details
strings (dict) : dictionary of strings with substitution details
detailed_all(dict) : dictionary with substitution details with the following keys:
'groups' : dictionary of groups with substitution details
'datasets' : dictionary of datasets with substitution details
'numbers' : dictionary of numbers with substitution details
'strings' : dictionary of strings with substitution details
Returns:
None
@ -209,19 +214,19 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
if id == 0:
# special case for proper error handling when deallocting text groups
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
f_out.write(populated_line)
elif id == 1:
populated_line = iterative_replace_line(line, triggers[id], datasets, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
f_out.write(populated_line)
elif id == 2:
populated_line = iterative_replace_line(line, triggers[id], numbers, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
f_out.write(populated_line)
elif id == 3:
populated_line = iterative_replace_line(line, triggers[id], strings, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
f_out.write(populated_line)
elif id == 4:
populated_line = iterative_replace_line(line, triggers[id], groups, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
f_out.write(populated_line)
else:
f_out.write(line)
@ -292,8 +297,8 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
fname_new = join('populated',f'pop_{fname}')
templ_path = get_template_path(fname, paths)
triggers = ['group_dset_dtype', 'group_dset_std_dtype_out', 'group_dset_std_dtype_in',
'group_num_dtype_double', 'group_num_std_dtype_out', 'group_num_std_dtype_in',
triggers = ['group_dset_dtype', 'group_dset_format_printf', 'group_dset_format_scanf',
'group_num_dtype_double', 'group_num_format_printf', 'group_num_format_scanf',
'group_dset', 'group_num', 'group_str', 'group']
for group in group_dict.keys():
@ -323,9 +328,9 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
if group != detailed_dset[dset]['group']:
continue
if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['dtype'] != 'char*'):
if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['group_dset_dtype'] != 'char*'):
continue
if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['dtype'] == 'char*'):
if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['group_dset_dtype'] == 'char*'):
continue
dset_allocated.append(dset)
@ -456,6 +461,122 @@ def get_group_dict (configuration: dict) -> dict:
return group_dict
def get_dtype_dict (dtype: str, target: str, rank = None, int_len_printf = None) -> dict:
"""
Returns the dictionary of dtype-related templated variables set for a given `dtype`.
Keys are names of templated variables, values are strings to be used by the generator.
Parameters:
dtype (str) : dtype corresponding to the trex.json (i.e. int/dim/float/float sparse/str)
target (str) : `num` or `dset`
rank (int) : [optional] value of n in n-index (sparse) dset; needed to build the printf/scanf format string
int_len_printf(dict): [optional]
keys: precision (e.g. 32 for int32_t)
values: lengths reserved for one index when printing n-index (sparse) dset (e.g. 10 for int32_t)
Returns:
dtype_dict (dict) : dictionary dtype-related substitutions
"""
if not target in ['num', 'dset']:
raise Exception('Only num or dset target can be set.')
if 'sparse' in dtype:
if rank is None or int_len_printf is None:
raise Exception("Both rank and int_len_printf arguments has to be provided to build the dtype_dict for sparse data.")
if rank is not None and rank <= 1:
raise Exception('Rank of sparse quantity cannot be lower than 2.')
if int_len_printf is not None and not isinstance(int_len_printf, dict):
raise Exception('int_len_printf has to be a dictionary of lengths for different precisions.')
dtype_dict = {}
# set up the key-value pairs dependending on the dtype
if dtype == 'float':
dtype_dict.update({
'default_prec' : '64',
f'group_{target}_dtype' : 'double',
f'group_{target}_h5_dtype' : 'native_double',
f'group_{target}_f_dtype_default' : 'real(8)',
f'group_{target}_f_dtype_double' : 'real(8)',
f'group_{target}_f_dtype_single' : 'real(4)',
f'group_{target}_dtype_default' : 'double',
f'group_{target}_dtype_double' : 'double',
f'group_{target}_dtype_single' : 'float',
f'group_{target}_format_printf' : '24.16e',
f'group_{target}_format_scanf' : 'lf',
f'group_{target}_py_dtype' : 'float'
})
elif dtype in ['int', 'dim', 'index']:
dtype_dict.update({
'default_prec' : '32',
f'group_{target}_dtype' : 'int64_t',
f'group_{target}_h5_dtype' : 'native_int64',
f'group_{target}_f_dtype_default' : 'integer(4)',
f'group_{target}_f_dtype_double' : 'integer(8)',
f'group_{target}_f_dtype_single' : 'integer(4)',
f'group_{target}_dtype_default' : 'int32_t',
f'group_{target}_dtype_double' : 'int64_t',
f'group_{target}_dtype_single' : 'int32_t',
f'group_{target}_format_printf' : '" PRId64 "',
f'group_{target}_format_scanf' : '" SCNd64 "',
f'group_{target}_py_dtype' : 'int'
})
elif dtype == 'str':
dtype_dict.update({
'default_prec' : '',
f'group_{target}_dtype' : 'char*',
f'group_{target}_h5_dtype' : '',
f'group_{target}_f_dtype_default': '',
f'group_{target}_f_dtype_double' : '',
f'group_{target}_f_dtype_single' : '',
f'group_{target}_dtype_default' : 'char*',
f'group_{target}_dtype_double' : '',
f'group_{target}_dtype_single' : '',
f'group_{target}_format_printf' : 's',
f'group_{target}_format_scanf' : 's',
f'group_{target}_py_dtype' : 'str'
})
elif 'sparse' in dtype:
# build format string for n-index sparse quantity
item_printf_8 = f'%{int_len_printf[8]}" PRIu8 " '
item_printf_16 = f'%{int_len_printf[16]}" PRIu16 " '
item_printf_32 = f'%{int_len_printf[32]}" PRId32 " '
item_scanf = '%" SCNd32 " '
group_dset_format_printf_8 = '"'
group_dset_format_printf_16 = '"'
group_dset_format_printf_32 = '"'
group_dset_format_scanf = ''
for i in range(rank):
group_dset_format_printf_8 += item_printf_8
group_dset_format_printf_16 += item_printf_16
group_dset_format_printf_32 += item_printf_32
group_dset_format_scanf += item_scanf
# append the format string for float values
group_dset_format_printf_8 += '%24.16e" '
group_dset_format_printf_16 += '%24.16e" '
group_dset_format_printf_32 += '%24.16e" '
group_dset_format_scanf += '%lf'
# set up the dictionary for sparse
dtype_dict.update({
'default_prec' : '',
f'group_{target}_dtype' : 'double',
f'group_{target}_h5_dtype' : '',
f'group_{target}_f_dtype_default': '',
f'group_{target}_f_dtype_double' : '',
f'group_{target}_f_dtype_single' : '',
f'group_{target}_dtype_default' : '',
f'group_{target}_dtype_double' : '',
f'group_{target}_dtype_single' : '',
f'sparse_format_printf_8' : group_dset_format_printf_8,
f'sparse_format_printf_16' : group_dset_format_printf_16,
f'sparse_format_printf_32' : group_dset_format_printf_32,
f'group_{target}_format_scanf' : group_dset_format_scanf,
f'group_{target}_py_dtype' : ''
})
return dtype_dict
def get_detailed_num_dict (configuration: dict) -> dict:
"""
Returns the dictionary of all `num`-suffixed variables.
@ -472,40 +593,17 @@ def get_detailed_num_dict (configuration: dict) -> dict:
for k2,v2 in v1.items():
if len(v2[1]) == 0:
tmp_num = f'{k1}_{k2}'
if 'str' not in v2[0]:
if not 'str' in v2[0]:
tmp_dict = {}
tmp_dict['group'] = k1
tmp_dict['group_num'] = tmp_num
num_dict[tmp_num] = tmp_dict
# TODO the arguments below are almost the same as for group_dset (except for trex_json_int_type) and can be exported from somewhere
if v2[0] == 'float':
tmp_dict['datatype'] = 'double'
tmp_dict['group_num_h5_dtype'] = 'native_double'
tmp_dict['group_num_f_dtype_default']= 'real(8)'
tmp_dict['group_num_f_dtype_double'] = 'real(8)'
tmp_dict['group_num_f_dtype_single'] = 'real(4)'
tmp_dict['group_num_dtype_default']= 'double'
tmp_dict['group_num_dtype_double'] = 'double'
tmp_dict['group_num_dtype_single'] = 'float'
tmp_dict['default_prec'] = '64'
tmp_dict['group_num_std_dtype_out'] = '24.16e'
tmp_dict['group_num_std_dtype_in'] = 'lf'
tmp_dict['group_num_py_dtype'] = 'float'
elif v2[0] in ['int', 'dim']:
tmp_dict['datatype'] = 'int64_t'
tmp_dict['group_num_h5_dtype'] = 'native_int64'
tmp_dict['group_num_f_dtype_default']= 'integer(4)'
tmp_dict['group_num_f_dtype_double'] = 'integer(8)'
tmp_dict['group_num_f_dtype_single'] = 'integer(4)'
tmp_dict['group_num_dtype_default']= 'int32_t'
tmp_dict['group_num_dtype_double'] = 'int64_t'
tmp_dict['group_num_dtype_single'] = 'int32_t'
tmp_dict['default_prec'] = '32'
tmp_dict['group_num_std_dtype_out'] = '" PRId64 "'
tmp_dict['group_num_std_dtype_in'] = '" SCNd64 "'
tmp_dict['group_num_py_dtype'] = 'int'
tmp_dict.update(get_dtype_dict(v2[0], 'num'))
if v2[0] in ['int', 'dim']:
tmp_dict['trex_json_int_type'] = v2[0]
else:
tmp_dict['trex_json_int_type'] = ''
return num_dict
@ -571,101 +669,101 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
"""
dset_numeric_dict = {}
dset_string_dict = {}
dset_sparse_dict = {}
for k,v in datasets.items():
# create a temp dictionary
tmp_dict = {}
# specify details required to replace templated variables later
if v[0] == 'float':
datatype = 'double'
group_dset_h5_dtype = 'native_double'
group_dset_f_dtype_default= 'real(8)'
group_dset_f_dtype_double = 'real(8)'
group_dset_f_dtype_single = 'real(4)'
group_dset_dtype_default= 'double'
group_dset_dtype_double = 'double'
group_dset_dtype_single = 'float'
default_prec = '64'
group_dset_std_dtype_out = '24.16e'
group_dset_std_dtype_in = 'lf'
group_dset_py_dtype = 'float'
elif v[0] in ['int', 'index']:
datatype = 'int64_t'
group_dset_h5_dtype = 'native_int64'
group_dset_f_dtype_default= 'integer(4)'
group_dset_f_dtype_double = 'integer(8)'
group_dset_f_dtype_single = 'integer(4)'
group_dset_dtype_default= 'int32_t'
group_dset_dtype_double = 'int64_t'
group_dset_dtype_single = 'int32_t'
default_prec = '32'
group_dset_std_dtype_out = '" PRId64 "'
group_dset_std_dtype_in = '" SCNd64 "'
group_dset_py_dtype = 'int'
elif v[0] == 'str':
datatype = 'char*'
group_dset_h5_dtype = ''
group_dset_f_dtype_default = ''
group_dset_f_dtype_double = ''
group_dset_f_dtype_single = ''
group_dset_dtype_default = 'char*'
group_dset_dtype_double = ''
group_dset_dtype_single = ''
default_prec = ''
group_dset_std_dtype_out = 's'
group_dset_std_dtype_in = 's'
group_dset_py_dtype = 'str'
rank = len(v[1])
datatype = v[0]
# add the dset name for templates
# define whether the dset is sparse
is_sparse = False
int_len_printf = {}
if 'sparse' in datatype:
is_sparse = True
int_len_printf[32] = 10
int_len_printf[16] = 5
int_len_printf[8] = 3
# get the dtype-related substitutions required to replace templated variables later
if not is_sparse:
dtype_dict = get_dtype_dict(datatype, 'dset')
else:
dtype_dict = get_dtype_dict(datatype, 'dset', rank, int_len_printf)
tmp_dict.update(dtype_dict)
# set the group_dset key to the full name of the dset
tmp_dict['group_dset'] = k
# add flag to detect index types
if 'index' == v[0]:
if 'index' in datatype:
tmp_dict['is_index'] = 'file->one_based'
else:
tmp_dict['is_index'] = 'false'
# add the datatypes for templates
tmp_dict['dtype'] = datatype
tmp_dict['group_dset_dtype'] = datatype
tmp_dict['group_dset_h5_dtype'] = group_dset_h5_dtype
tmp_dict['group_dset_f_dtype_default'] = group_dset_f_dtype_default
tmp_dict['group_dset_f_dtype_double'] = group_dset_f_dtype_double
tmp_dict['group_dset_f_dtype_single'] = group_dset_f_dtype_single
tmp_dict['group_dset_dtype_default'] = group_dset_dtype_default
tmp_dict['group_dset_dtype_double'] = group_dset_dtype_double
tmp_dict['group_dset_dtype_single'] = group_dset_dtype_single
tmp_dict['default_prec'] = default_prec
tmp_dict['group_dset_std_dtype_in'] = group_dset_std_dtype_in
tmp_dict['group_dset_std_dtype_out'] = group_dset_std_dtype_out
tmp_dict['group_dset_py_dtype'] = group_dset_py_dtype
# add the rank
tmp_dict['rank'] = len(v[1])
tmp_dict['group_dset_rank'] = str(tmp_dict['rank'])
tmp_dict['rank'] = rank
tmp_dict['group_dset_rank'] = str(rank)
# add the list of dimensions
tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
# build a list of dimensions to be inserted in the dims array initialization, e.g. {ao_num, ao_num}
dim_list = tmp_dict['dims'][0]
if tmp_dict['rank'] > 1:
for i in range(1, tmp_dict['rank']):
if rank > 1:
for i in range(1, rank):
dim_toadd = tmp_dict['dims'][i]
dim_list += f', {dim_toadd}'
tmp_dict['group_dset_dim_list'] = dim_list
if tmp_dict['rank'] == 0:
if rank == 0:
dim_f_list = ""
else:
dim_f_list = "(*)"
tmp_dict['group_dset_f_dims'] = dim_f_list
if is_sparse:
# store the max possible dim of the sparse dset (e.g. mo_num)
tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
# build printf/scanf sequence and compute line length for n-index sparse quantity
index_printf = f'*(index_sparse + {str(rank)}*i'
index_scanf = f'index_sparse + {str(rank)}*i'
# one index item consumes up to index_length characters (int32_len_printf for int32 + 1 for space)
group_dset_sparse_indices_printf = index_printf + ')'
group_dset_sparse_indices_scanf = index_scanf
sparse_line_length_32 = int_len_printf[32] + 1
sparse_line_length_16 = int_len_printf[16] + 1
sparse_line_length_8 = int_len_printf[8] + 1
# loop from 1 because we already have stored one index
for index_count in range(1,rank):
group_dset_sparse_indices_printf += f', {index_printf} + {index_count})'
group_dset_sparse_indices_scanf += f', {index_scanf} + {index_count}'
sparse_line_length_32 += int_len_printf[32] + 1
sparse_line_length_16 += int_len_printf[16] + 1
sparse_line_length_8 += int_len_printf[8] + 1
# add 24 chars occupied by the floating point value of sparse dataset + 1 char for "\n"
sparse_line_length_32 += 24 + 1
sparse_line_length_16 += 24 + 1
sparse_line_length_8 += 24 + 1
tmp_dict['sparse_line_length_32'] = str(sparse_line_length_32)
tmp_dict['sparse_line_length_16'] = str(sparse_line_length_16)
tmp_dict['sparse_line_length_8'] = str(sparse_line_length_8)
tmp_dict['group_dset_sparse_indices_printf'] = group_dset_sparse_indices_printf
tmp_dict['group_dset_sparse_indices_scanf'] = group_dset_sparse_indices_scanf
# add group name as a key-value pair to the dset dict
tmp_dict['group'] = v[2]
# split datasets in numeric- and string- based
if (datatype == 'char*'):
if 'str' in datatype:
dset_string_dict[k] = tmp_dict
elif is_sparse:
dset_sparse_dict[k] = tmp_dict
else:
dset_numeric_dict[k] = tmp_dict
return (dset_numeric_dict, dset_string_dict)
return (dset_numeric_dict, dset_string_dict, dset_sparse_dict)
def check_dim_consistency(num: dict, dset: dict) -> None:

View File

@ -5,29 +5,37 @@
This page contains information about the general structure of the
TREXIO library. The source code of the library can be automatically
generated based on the contents of the ~trex.json~ configuration file,
which itself is compiled from different sections (groups) presented below.
which itself is compiled from different sections (groups) presented
below.
For more information about the automatic generation on the source code
or regarding possible modifications, please contact the TREXIO developers.
or regarding possible modifications, please contact the TREXIO
developers.
All quantities are saved in TREXIO file in atomic units.
The dimensions of the arrays in the tables below are given in
column-major order (as in Fortran), and the ordering of the dimensions
is reversed in the produced ~trex.json~ configuration file as the library is
All quantities are saved in TREXIO file in atomic units. The
dimensions of the arrays in the tables below are given in column-major
order (as in Fortran), and the ordering of the dimensions is reversed
in the produced ~trex.json~ configuration file as the library is
written in C.
TREXIO currently supports ~int~, ~float~ and ~str~ types for both single attributes and arrays.
Note, that some attributes might have ~dim~ type (e.g. ~num~ of the ~nucleus~ group).
This type is treated exactly the same as ~int~ with the only difference that ~dim~ variables
cannot be negative or zero. This additional constraint is required because ~dim~ attributes
are used internally to allocate memory and to check array boundaries in the memory-safe API.
Most of the times, the ~dim~ variables contain ~num~ suffix.
TREXIO currently supports ~int~, ~float~ and ~str~ types for both
single attributes and arrays. Note, that some attributes might have
~dim~ type (e.g. ~num~ of the ~nucleus~ group). This type is treated
exactly the same as ~int~ with the only difference that ~dim~
variables cannot be negative. This additional constraint is required
because ~dim~ attributes are used internally to allocate memory and to
check array boundaries in the memory-safe API. Most of the times, the
~dim~ variables contain the ~num~ suffix.
In Fortran, the arrays are 1-based and in most other languages the
arrays are 0-based. Hence, we introduce the ~index~ type which is an
1-based ~int~ in the Fortran interface and 0-based otherwise.
For sparse data structures such as electron replusion integrals,
the data can be too large to fit in memory and the data needs to be
fetched using multiple function calls to perform I/O on buffers.
#+begin_src python :tangle trex.json :exports none
{
#+end_src
@ -78,14 +86,14 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
#+CALL: json(data=electron, title="electron")
#+RESULTS:
:RESULTS:
:results:
#+begin_src python :tangle trex.json
"electron": {
"up_num" : [ "int", [] ]
, "dn_num" : [ "int", [] ]
} ,
#+end_src
:END:
:end:
* Nucleus (nucleus group)
@ -100,10 +108,11 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
| ~coord~ | ~float~ | ~(3,nucleus.num)~ | Coordinates of the atoms |
| ~label~ | ~str~ | ~(nucleus.num)~ | Atom labels |
| ~point_group~ | ~str~ | | Symmetry point group |
| ~repulsion~ | ~float~ | | Nuclear repulsion energy |
#+CALL: json(data=nucleus, title="nucleus")
#+RESULTS:
:RESULTS:
:results:
#+begin_src python :tangle trex.json
"nucleus": {
"num" : [ "dim" , [] ]
@ -111,9 +120,10 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
, "coord" : [ "float", [ "nucleus.num", "3" ] ]
, "label" : [ "str" , [ "nucleus.num" ] ]
, "point_group" : [ "str" , [] ]
, "repulsion" : [ "float", [] ]
} ,
#+end_src
:END:
:end:
* Effective core potentials (ecp group)
@ -617,15 +627,18 @@ prim_factor =
:end:
* TODO Slater determinants
* TODO Reduced density matrices (rdm group)
* Reduced density matrices (rdm group)
#+NAME: rdm
| Variable | Type | Dimensions | Description |
|------------+----------------+------------------------------------+-------------|
| ~one_e~ | ~float~ | ~(mo.num, mo.num)~ | |
| ~one_e_up~ | ~float~ | ~(mo.num, mo.num)~ | |
| ~one_e_dn~ | ~float~ | ~(mo.num, mo.num)~ | |
| ~two_e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | |
|-----------+----------------+------------------------------------+-----------------------------------------------------------------------|
| ~1e~ | ~float~ | ~(mo.num, mo.num)~ | One body density matrix |
| ~1e_up~ | ~float~ | ~(mo.num, mo.num)~ | \uparrow-spin component of the one body density matrix |
| ~1e_dn~ | ~float~ | ~(mo.num, mo.num)~ | \downarrow-spin component of the one body density matrix |
| ~2e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | Two-body reduced density matrix (spin trace) |
| ~2e_upup~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\uparrow component of the two-body reduced density matrix |
| ~2e_dndn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \downarrow\downarrow component of the two-body reduced density matrix |
| ~2e_updn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\downarrow component of the two-body reduced density matrix |
#+CALL: json(data=rdm, title="rdm", last=1)
@ -633,10 +646,13 @@ prim_factor =
:results:
#+begin_src python :tangle trex.json
"rdm": {
"one_e" : [ "float" , [ "mo.num", "mo.num" ] ]
, "one_e_up" : [ "float" , [ "mo.num", "mo.num" ] ]
, "one_e_dn" : [ "float" , [ "mo.num", "mo.num" ] ]
, "two_e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
"1e" : [ "float" , [ "mo.num", "mo.num" ] ]
, "1e_up" : [ "float" , [ "mo.num", "mo.num" ] ]
, "1e_dn" : [ "float" , [ "mo.num", "mo.num" ] ]
, "2e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
, "2e_upup" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
, "2e_dndn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
, "2e_updn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
}
#+end_src
:end: