1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-11-03 20:54:07 +01:00

Merge pull request #70 from TREX-CoE/add-sparse-datasets

- Add sparse datasets
- Modularize generator_tools.py
This commit is contained in:
Evgeny Posenitskiy 2021-12-17 17:13:22 +01:00 committed by GitHub
commit 8ca74ffef1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 2349 additions and 783 deletions

4
.gitignore vendored
View File

@ -11,6 +11,8 @@ m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
autom4te.cache/
build-config/
ar-lib
compile
config.guess
config.log
@ -38,5 +40,3 @@ test-suite.log
*.h5
trexio-*.tar.gz
trex.json

View File

@ -90,6 +90,7 @@ TESTS_C = \
tests/io_num_text \
tests/io_dset_float_text \
tests/io_dset_int_text \
tests/io_dset_sparse_text \
tests/io_safe_dset_float_text \
tests/io_str_text \
tests/io_dset_str_text \
@ -102,6 +103,7 @@ TESTS_C += \
tests/io_num_hdf5 \
tests/io_dset_float_hdf5 \
tests/io_dset_int_hdf5 \
tests/io_dset_sparse_hdf5 \
tests/io_safe_dset_float_hdf5 \
tests/io_str_hdf5 \
tests/io_dset_str_hdf5 \
@ -117,8 +119,8 @@ check_PROGRAMS = $(TESTS)
# specify common LDADD options for all tests
LDADD = src/libtrexio.la
test_trexio_f = $(srcdir)/tests/trexio_f.f90
CLEANFILES += $(test_trexio_f)
$(test_trexio_f): $(trexio_f)
cp $(trexio_f) $(test_trexio_f)
@ -126,7 +128,6 @@ $(test_trexio_f): $(trexio_f)
trexio.mod: tests/trexio_f.o
tests_test_f_SOURCES = $(test_trexio_f) tests/test_f.f90
tests_test_f_LDFLAGS = -no-install
clean-local:
-rm -rf -- *.dir/ *.h5 __pycache__/
@ -134,7 +135,7 @@ clean-local:
# =============== DOCUMENTATION =============== #
HTML_TANGLED = docs/index.html \
docs/Sparse.html \
docs/examples.html \
docs/templator_hdf5.html \
docs/trex.html \
docs/README.html \
@ -179,11 +180,13 @@ BUILT_SOURCES += $(SOURCES) $(trexio_f) $(test_trexio_f)
all: .git_hash
GENERATOR_FILES = $(srcdir)/tools/generator.py \
$(srcdir)/tools/generator_tools.py
$(SOURCES): $(trexio_f)
src/trexio.c: $(trexio_h)
$(trexio_f): $(ORG_FILES)
$(trexio_f): $(ORG_FILES) $(GENERATOR_FILES)
cd $(srcdir)/tools && ./build_trexio.sh
$(htmlizer): $(ORG_FILES) $(srcdir)/src/README.org
@ -227,7 +230,7 @@ $(pytrexio_py): $(pytrexio_c)
# Build Python module and C wrapper code for TREXIO using SWIG
# [?] swig -python -threads pytrexio.i ----> Add thread support for all the interface
$(pytrexio_c): $(ORG_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
$(pytrexio_c): $(ORG_FILES) $(GENERATOR_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
cp $(trexio_h) src/
cd src/ && \
$(SWIG) -python -py3 -o pytrexio_wrap.c pytrexio.i
@ -248,4 +251,3 @@ CLEANFILES += $(pytrexio_c) \
.PHONY: cppcheck python-test python-install python-sdist check-numpy FORCE
endif

View File

@ -1,22 +0,0 @@
See templator_front.org
* Text back end
As the size of the dataset should be extensible, the simplest
solution is to use one file for each sparse data set, and store a
the name of this file in the group.
Each integral can be a line in the file:
i j k l x
which can be read with "%10ld %10ld %10ld %10ld %24.16e".
The offset can be used with ~fseek(69L*offset, SEEK_SET)~
* HDF5 Back end
We need to declare the number of rows of the dataset as
~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
chunks should absolutely not be larger than 1MB.
To extend the storage, see :
https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
(figure 17)
If the offset+num > nmax, we need to extend the dataset.

262
examples.org Normal file
View File

@ -0,0 +1,262 @@
#+TITLE: Examples
#+STARTUP: latexpreview
#+SETUPFILE: docs/theme.setup
* Accessing sparse quantities
** Fortran
:PROPERTIES:
:header-args: :tangle print_energy.f90
:END:
#+begin_src f90
program print_energy
use trexio
implicit none
character*(128) :: filename ! Name of the input file
integer :: rc ! Return code for error checking
integer(8) :: f ! TREXIO file handle
character*(128) :: err_msg ! Error message
#+end_src
This program computes the energy as:
\[
E = E_{\text{NN}} + \sum_{ij} D_{ij}\, \langle i | h | j \rangle\,
+\, \frac{1}{2} \sum_{ijkl} \Gamma_{ijkl}\, \langle i j | k l
\rangle\; \textrm{ with } \; 0 < i,j,k,l \le n
\]
One needs to read from the TREXIO file:
- $n$ :: The number of molecular orbitals
- $E_{\text{NN}}$ :: The nuclear repulsion energy
- $D_{ij}$ :: The one-body reduced density matrix
- $\langle i |h| j \rangle$ :: The one-electron Hamiltonian integrals
- $\Gamma_{ijkl}$ :: The two-body reduced density matrix
- $\langle i j | k l \rangle$ :: The electron repulsion integrals
#+begin_src f90
integer :: n
double precision :: E, E_nn
double precision, allocatable :: D(:,:), h0(:,:)
double precision, allocatable :: G(:,:,:,:), W(:,:,:,:)
#+end_src
*** Declare Temporary variables
#+begin_src f90
integer :: i, j, k, l, m
integer(8), parameter :: BUFSIZE = 100000_8
integer(8) :: offset, icount, size_max
integer :: buffer_index(4,BUFSIZE)
double precision :: buffer_values(BUFSIZE)
double precision, external :: ddot ! BLAS dot product
#+end_src
*** Obtain the name of the TREXIO file from the command line, and open it for reading
#+begin_src f90
call getarg(1, filename)
f = trexio_open (filename, 'r', TREXIO_HDF5, rc)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error opening TREXIO file: '//trim(err_msg)
stop
end if
#+end_src
*** Read the nuclear repulsion energy
#+begin_src f90
rc = trexio_read_nucleus_repulsion(f, E_nn)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading nuclear repulsion: '//trim(err_msg)
stop
end if
#+end_src
*** Read the number of molecular orbitals
#+begin_src f90
rc = trexio_read_mo_num(f, n)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading number of MOs: '//trim(err_msg)
stop
end if
#+end_src
*** Allocate memory
#+begin_src f90
allocate( D(n,n), h0(n,n) )
allocate( G(n,n,n,n), W(n,n,n,n) )
G(:,:,:,:) = 0.d0
W(:,:,:,:) = 0.d0
#+end_src
*** Read one-electron quantities
#+begin_src f90
rc = trexio_has_mo_1e_int_core_hamiltonian(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No core hamiltonian in file'
end if
rc = trexio_read_mo_1e_int_core_hamiltonian(f, h0)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading core Hamiltonian: '//trim(err_msg)
stop
end if
rc = trexio_has_rdm_1e(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No 1e RDM in file'
end if
rc = trexio_read_rdm_1e(f, D)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading one-body RDM: '//trim(err_msg)
stop
end if
#+end_src
*** Read two-electron quantities
Reading is done with OpenMP. Each thread reads its own buffer, and
the buffers are then processed in parallel.
Reading the file requires a lock, so it is done in a critical
section. The ~offset~ variable is shared, and it is incremented in
the critical section. For each read, the function returns in
~icount~ the number of read integrals, so this variable needs also
to be protected in the critical section when modified.
**** Electron repulsion integrals
#+begin_src f90
rc = trexio_has_mo_2e_int_eri(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No electron repulsion integrals in file'
end if
rc = trexio_read_mo_2e_int_eri_size (f, size_max)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading number of ERIs: '//trim(err_msg)
stop
end if
offset = 0_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
!$OMP buffer_index, buffer_values, m)
icount = BUFSIZE
do while (icount == BUFSIZE)
!$OMP CRITICAL
if (offset < size_max) then
rc = trexio_read_mo_2e_int_eri(f, offset, icount, buffer_index, buffer_values)
offset = offset + icount
else
icount = 0
end if
!$OMP END CRITICAL
do m=1,icount
i = buffer_index(1,m)
j = buffer_index(2,m)
k = buffer_index(3,m)
l = buffer_index(4,m)
W(i,j,k,l) = buffer_values(m)
W(k,j,i,l) = buffer_values(m)
W(i,l,k,j) = buffer_values(m)
W(k,l,i,j) = buffer_values(m)
W(j,i,l,k) = buffer_values(m)
W(j,k,l,i) = buffer_values(m)
W(l,i,j,k) = buffer_values(m)
W(l,k,j,i) = buffer_values(m)
end do
end do
!$OMP END PARALLEL
#+end_src
**** Reduced density matrix
#+begin_src f90
rc = trexio_has_rdm_2e(f)
if (rc /= TREXIO_SUCCESS) then
stop 'No two-body density matrix in file'
end if
rc = trexio_read_rdm_2e_size (f, size_max)
if (rc /= TREXIO_SUCCESS) then
call trexio_string_of_error(rc, err_msg)
print *, 'Error reading number of 2-RDM elements: '//trim(err_msg)
stop
end if
offset = 0_8
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
!$OMP buffer_index, buffer_values, m)
icount = bufsize
do while (offset < size_max)
!$OMP CRITICAL
if (offset < size_max) then
rc = trexio_read_rdm_2e(f, offset, icount, buffer_index, buffer_values)
offset = offset + icount
else
icount = 0
end if
!$OMP END CRITICAL
do m=1,icount
i = buffer_index(1,m)
j = buffer_index(2,m)
k = buffer_index(3,m)
l = buffer_index(4,m)
G(i,j,k,l) = buffer_values(m)
end do
end do
!$OMP END PARALLEL
#+end_src
*** Compute the energy
As $(n,m)$ 2D arrays are stored in memory as $(\n times m)$ 1D
arrays, we could pass the matrices to the ~ddot~ BLAS function to
perform the summations in a single call for the 1-electron quantities.
Instead, we prefer to interleave the 1-electron (negative) and
2-electron (positive) summations to have a better cancellation of
numerical errors.
Here $n^4$ can be larger than the largest possible 32-bit integer,
so it is not safe to pass $n^4$ to the ~ddot~ BLAS
function. Hence, we perform $n^2$ loops, using vectors of size $n^2$.
#+begin_src f90
E = 0.d0
do l=1,n
E = E + ddot( n, D(1,l), 1, h0(1,l), 1 )
do k=1,n
E = E + 0.5d0 * ddot( n*n, G(1,1,k,l), 1, W(1,1,k,l), 1 )
end do
end do
E = E + E_nn
print *, 'Energy: ', E
#+end_src
*** Terminate
#+begin_src f90
deallocate( D, h0, G, W )
end program
#+end_src

File diff suppressed because it is too large Load Diff

View File

@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c
cat populated/pop_write_*.c >> trexio_hdf5.c
cat populated/pop_hrw_*.h >> trexio_hdf5.h
cat helpers_hdf5.c >> trexio_hdf5.c
cat suffix_hdf5.h >> trexio_hdf5.h

View File

@ -201,15 +201,15 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
/* Write the dimensioning variables */
const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$);
const hid_t dspace = H5Screate(H5S_SCALAR);
const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME,
const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME,
dtype, dspace, H5P_DEFAULT, H5P_DEFAULT);
if (num_id <= 0) {
H5Sclose(dspace);
H5Tclose(dtype);
return TREXIO_INVALID_ID;
}
const herr_t status = H5Awrite(num_id, dtype, &(num));
if (status < 0) {
H5Aclose(num_id);
@ -217,7 +217,7 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
H5Tclose(dtype);
return TREXIO_FAILURE;
}
H5Sclose(dspace);
H5Aclose(num_id);
H5Tclose(dtype);
@ -262,7 +262,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
@ -317,7 +317,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$*
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
@ -372,6 +372,207 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
}
#+end_src
** Template for HDF5 has/read/write the dataset of sparse data
Sparse data is stored using extensible datasets of HDF5. Extensibility is required
due to the fact that the sparse data will be written in chunks of user-defined size.
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
#+end_src
#+begin_src c :tangle write_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
const int32_t* index_sparse,
const double* value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
hid_t index_dtype;
void* index_p;
uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) {
uint8_t* index = CALLOC(size_ranked, uint8_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
for (int64_t i=0; i<size_ranked; ++i){
index[i] = (uint8_t) index_sparse[i];
}
index_p = index;
index_dtype = H5T_NATIVE_UINT8;
} else if (size_max < UINT16_MAX) {
uint16_t* index = CALLOC(size_ranked, uint16_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
for (int64_t i=0; i<size_ranked; ++i){
index[i] = (uint16_t) index_sparse[i];
}
index_p = index;
index_dtype = H5T_NATIVE_UINT16;
} else {
index_p = (int32_t*) index_sparse;
index_dtype = H5T_NATIVE_INT32;
}
/* Store float values in double precision */
hid_t value_dtype = H5T_NATIVE_DOUBLE;
/* Arrays of chunk dims that will be used for chunking the dataset */
const hsize_t chunk_i_dims[1] = {size_ranked};
const hsize_t chunk_v_dims[1] = {size};
/* Indices and values are stored as 2 independent datasets in the HDF5 file */
char dset_index_name[256] = "\0";
char dset_value_name[256] = "\0";
/* Build the names of the datasets */
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
trexio_exit_code rc_write = TREXIO_FAILURE;
/* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
/* If the file does not exist -> create it and write */
/* Create chunked dataset with index_dtype datatype and write indices into it */
rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
if (index_p != index_sparse) FREE(index_p);
if (rc_write != TREXIO_SUCCESS) return rc_write;
/* Create chunked dataset with value_dtype datatype and write values into it */
rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
if (rc_write != TREXIO_SUCCESS) return rc_write;
} else {
/* If the file exists -> open it and write */
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
hsize_t offset_v[1] = {(hsize_t) offset_file};
/* Create chunked dataset with index_dtype datatype and write indices into it */
rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
if (index_p != index_sparse) FREE(index_p);
if (rc_write != TREXIO_SUCCESS) return rc_write;
/* Create chunked dataset with value_dtype datatype and write values into it */
rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
if (rc_write != TREXIO_SUCCESS) return rc_write;
}
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
int64_t* const eof_read_size,
int32_t* const index_read,
double* const value_read)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
/* Indices and values are stored as 2 independent datasets in the HDF5 file */
char dset_index_name[256] = "\0";
char dset_value_name[256] = "\0";
/* Build the names of the datasets */
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$};
hsize_t offset_v[1] = {(hsize_t) offset_file};
hsize_t count_v[1] = {(hsize_t) size};
int is_index = 1, is_value = 0;
trexio_exit_code rc_read;
// attempt to read indices
rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read);
if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
// attempt to read values
// when EOF is encountered - the count_v[0] is modified and contains the number of elements being read
rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read);
if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
return rc_read;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
hid_t fspace_id = H5Dget_space(dset_id);
if (fspace_id < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
H5Dclose(dset_id);
H5Sclose(fspace_id);
*size_max = (int64_t) ddims[0];
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle has_dset_sparse_hdf5.c
trexio_exit_code
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values");
/* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
if (status == 1){
return TREXIO_SUCCESS;
} else if (status == 0) {
return TREXIO_HAS_NOT;
} else {
return TREXIO_FAILURE;
}
}
#+end_src
** Template for HDF5 has/read/write the dataset of strings
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
@ -403,10 +604,10 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
return TREXIO_ALLOCATION_FAILED;
}
hid_t dspace = H5Dget_space(dset_id);
hid_t dspace = H5Dget_space(dset_id);
if (dset_id <= 0) {
FREE(ddims);
H5Dclose(dset_id);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
@ -442,7 +643,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
if (rdata == NULL) {
H5Dclose(dset_id);
H5Sclose(dspace);
H5Tclose(memtype);
H5Tclose(memtype);
return TREXIO_ALLOCATION_FAILED;
}
@ -451,7 +652,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
FREE(rdata);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Tclose(memtype);
H5Tclose(memtype);
return TREXIO_FAILURE;
}
@ -474,11 +675,11 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
FREE(rdata);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Tclose(memtype);
H5Tclose(memtype);
return TREXIO_FAILURE;
}
FREE(rdata);
FREE(rdata);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Tclose(memtype);
@ -509,7 +710,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$,
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {
/* code to create dataset */
/* code to create dataset */
hid_t filetype = H5Tcopy (H5T_FORTRAN_S1);
if (filetype <= 0) return TREXIO_INVALID_ID;
@ -577,7 +778,7 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
}
#+end_src
** Template for HDF5 has/read/write the string attribute
#+begin_src c :tangle hrw_attr_str_hdf5.h :exports none
@ -655,7 +856,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
const hid_t dspace_id = H5Screate(H5S_SCALAR);
if (dspace_id <= 0) return TREXIO_INVALID_ID;
/* Create the $group_str$ attribute of $group$ group */
const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id,
H5P_DEFAULT, H5P_DEFAULT);
@ -665,7 +866,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
H5Tclose(dtype_id);
return TREXIO_INVALID_ID;
}
status = H5Awrite(str_id, dtype_id, str);
if (status < 0) {
H5Aclose(str_id);
@ -673,7 +874,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
H5Tclose(dtype_id);
return TREXIO_FAILURE;
}
H5Aclose(str_id);
H5Sclose(dspace_id);
H5Tclose(dtype_id);
@ -703,11 +904,256 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
}
#+end_src
** Helper functions
#+begin_src c :tangle helpers_hdf5.c
trexio_exit_code
trexio_hdf5_create_write_dset_sparse (const hid_t group_id,
const char* dset_name,
const hid_t dtype_id,
const hsize_t* chunk_dims,
const void* data_sparse)
{
const int h5_rank = 1;
const hsize_t maxdims[1] = {H5S_UNLIMITED};
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
if (dspace < 0) return TREXIO_INVALID_ID;
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
if (prop < 0) {
H5Sclose(dspace);
return TREXIO_INVALID_ID;
}
herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
if (status < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
hid_t dset_id = H5Dcreate(group_id,
dset_name,
dtype_id,
dspace,
H5P_DEFAULT,
prop,
H5P_DEFAULT);
if (dset_id < 0) {
H5Sclose(dspace);
H5Pclose(prop);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype_id,
H5S_ALL, H5S_ALL, H5P_DEFAULT,
data_sparse);
H5Sclose(dspace);
H5Pclose(prop);
H5Dclose(dset_id);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_open_write_dset_sparse (const hid_t group_id,
const char* dset_name,
const hid_t dtype_id,
const hsize_t* chunk_dims,
const hsize_t* offset_file,
const void* data_sparse)
{
const int h5_rank = 1;
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
hid_t fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// allocate space for the dimensions to be read
hsize_t ddims[1] = {0};
// get the rank and dimensions of the dataset
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
ddims[0] += chunk_dims[0];
// extend the dset size
herr_t status = H5Dset_extent(dset_id, ddims);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// close and reopen the file dataspace to take into account the extension
H5Sclose(fspace);
fspace = H5Dget_space(dset_id);
if (fspace < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// select hyperslab to be written using chunk_dims and offset values
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
if (status < 0) {
H5Sclose(fspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
// create memory dataspace to write from
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
if (dspace < 0) {
H5Sclose(fspace);
H5Sclose(dspace);
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
status = H5Dwrite(dset_id,
dtype_id,
dspace, fspace, H5P_DEFAULT,
data_sparse);
H5Dclose(dset_id);
H5Sclose(dspace);
H5Sclose(fspace);
if (status < 0) return TREXIO_FAILURE;
return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_hdf5_open_read_dset_sparse (const hid_t group_id,
const char* dset_name,
const hsize_t* offset_file,
hsize_t* const size_read,
int64_t* const eof_read_size,
const int is_index,
void* const data_sparse
)
{
const int h5_rank = 1;
// get the dataset handle
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
if (dset_id <= 0) return TREXIO_INVALID_ID;
// get the dataspace of the dataset
hid_t fspace_id = H5Dget_space(dset_id);
if (fspace_id < 0) {
H5Dclose(dset_id);
return TREXIO_INVALID_ID;
}
/* get dims of the dset stored in the file to check whether reading with user-provided chunk size
will reach end of the dataset (i.e. EOF in TEXT back end)
,*/
hsize_t ddims[1] = {0};
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
hsize_t max_offset = offset_file[0] + size_read[0];
int is_EOF = 0;
// if max_offset exceed current dim of the dset => EOF
if (max_offset > ddims[0]) {
is_EOF = 1;
// lower the value of count to reduce the number of elements which will be read
size_read[0] -= max_offset - ddims[0];
// modified the value of eof_read_size passed by address
if (eof_read_size != NULL) *eof_read_size = size_read[0];
}
// special case when reading int indices
int64_t size_ranked = (int64_t) size_read[0];
void* index_p;
// read the datatype from the dataset and compare with the pre-defined values
hid_t dtype = H5Dget_type(dset_id);
if (is_index == 1) {
if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
uint8_t* index = CALLOC(size_ranked, uint8_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
index_p = index;
} else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
uint16_t* index = CALLOC(size_ranked, uint16_t);
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
index_p = index;
} else {
index_p = data_sparse;
}
}
herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL);
if (status < 0) {
H5Sclose(fspace_id);
H5Dclose(dset_id);
if (index_p != data_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL);
if (memspace_id < 0) {
H5Sclose(fspace_id);
H5Dclose(dset_id);
if (index_p != data_sparse) FREE(index_p);
return TREXIO_INVALID_ID;
}
if (is_index == 1) {
status = H5Dread(dset_id,
dtype,
memspace_id, fspace_id, H5P_DEFAULT,
index_p);
} else {
status = H5Dread(dset_id,
dtype,
memspace_id, fspace_id, H5P_DEFAULT,
data_sparse);
}
H5Sclose(fspace_id);
H5Sclose(memspace_id);
H5Dclose(dset_id);
if (status < 0) {
if (index_p != data_sparse) FREE(index_p);
return TREXIO_FAILURE;
}
if (is_index == 1) {
if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
uint8_t* index = (uint8_t*) index_p;
for (int64_t i=0; i<size_ranked; ++i){
((int32_t*)data_sparse)[i] = (int32_t) index[i];
}
FREE(index_p);
} else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
uint16_t* index = (uint16_t*) index_p;
for (int64_t i=0; i<size_ranked; ++i){
((int32_t*)data_sparse)[i] = (int32_t) index[i];
}
FREE(index_p);
}
}
if (is_EOF == 1) return TREXIO_END;
return TREXIO_SUCCESS;
}
#+end_src
* Constant file suffixes (not used by the generator) :noexport:
#+begin_src c :tangle suffix_hdf5.h
trexio_exit_code trexio_hdf5_create_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse);
trexio_exit_code trexio_hdf5_open_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse);
trexio_exit_code trexio_hdf5_open_read_dset_sparse (const hid_t group_id, const char* dset_name, const hsize_t* offset_file, hsize_t* const size_read, int64_t* const eof_read_size, const int is_index, void* const data_sparse);
#endif
#+end_src

View File

@ -19,23 +19,26 @@ cat populated/pop_flush_group_text.h >> trexio_text.h
cat populated/pop_has_dset_data_text.c >> trexio_text.c
cat populated/pop_has_dset_str_text.c >> trexio_text.c
cat populated/pop_has_dset_sparse_text.c >> trexio_text.c
cat populated/pop_has_attr_num_text.c >> trexio_text.c
cat populated/pop_has_attr_str_text.c >> trexio_text.c
cat populated/pop_read_dset_data_text.c >> trexio_text.c
cat populated/pop_read_dset_str_text.c >> trexio_text.c
cat populated/pop_read_dset_sparse_text.c >> trexio_text.c
cat populated/pop_read_attr_str_text.c >> trexio_text.c
cat populated/pop_read_attr_num_text.c >> trexio_text.c
cat populated/pop_write_dset_data_text.c >> trexio_text.c
cat populated/pop_write_dset_str_text.c >> trexio_text.c
cat populated/pop_write_dset_sparse_text.c >> trexio_text.c
cat populated/pop_write_attr_str_text.c >> trexio_text.c
cat populated/pop_write_attr_num_text.c >> trexio_text.c
cat populated/pop_hrw_dset_data_text.h >> trexio_text.h
cat populated/pop_hrw_dset_str_text.h >> trexio_text.h
cat populated/pop_hrw_dset_sparse_text.h >> trexio_text.h
cat populated/pop_hrw_attr_num_text.h >> trexio_text.h
cat populated/pop_hrw_attr_str_text.h >> trexio_text.h
cat rdm_text.c >> trexio_text.c
cat rdm_text.h >> trexio_text.h
cat suffix_text.h >> trexio_text.h

View File

@ -93,22 +93,10 @@ typedef struct $group$_s {
** Template for general structure in text back end
#+begin_src c :tangle struct_text_group.h
typedef struct rdm_s {
uint64_t dim_one_e;
uint32_t to_flush;
uint32_t padding;
double* one_e;
char file_name[TREXIO_MAX_FILENAME_LENGTH];
char two_e_file_name[TREXIO_MAX_FILENAME_LENGTH];
} rdm_t;
#+end_src
#+begin_src c :tangle struct_text_group.h
typedef struct trexio_text_s {
trexio_t parent ;
$group$_t* $group$;
rdm_t* rdm;
int lock_file;
} trexio_text_t;
#+end_src
@ -269,9 +257,6 @@ trexio_text_deinit (trexio_t* const file)
/* Error handling for this call is added by the generator */
rc = trexio_text_free_$group$( (trexio_text_t*) file);
rc = trexio_text_free_rdm( (trexio_text_t*) file);
if (rc != TREXIO_SUCCESS) return rc;
return TREXIO_SUCCESS;
}
@ -411,7 +396,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
return NULL;
}
rc = fscanf(f, "%$group_num_std_dtype_in$", &($group$->$group_num$));
rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$));
assert(!(rc != 1));
if (rc != 1) {
FREE(buffer);
@ -499,7 +484,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
}
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
rc = fscanf(f, "%$group_dset_std_dtype_in$", &($group$->$group_dset$[i]));
rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i]));
assert(!(rc != 1));
if (rc != 1) {
FREE(buffer);
@ -535,16 +520,16 @@ trexio_text_read_$group$ (trexio_text_t* const file)
}
/* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
, BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
,*/
BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
*/
char* tmp_$group_dset$;
tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
$group$->$group_dset$[i] = tmp_$group_dset$;
/* conventional fcanf with "%s" only return the string before the first space character
,* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
,* Q: depending on what ? */
* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
* Q: depending on what ? */
rc = fscanf(f, " %1023[^\n]", tmp_$group_dset$);
assert(!(rc != 1));
if (rc != 1) {
@ -613,7 +598,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)
// START REPEAT GROUP_NUM
fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet);
if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_std_dtype_out$ \n", $group$->$group_num$);
if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$);
// END REPEAT GROUP_NUM
// START REPEAT GROUP_ATTR_STR
@ -627,7 +612,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)
fprintf(f, "$group_dset$\n");
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
fprintf(f, "%$group_dset_std_dtype_out$\n", $group$->$group_dset$[i]);
fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]);
}
// END REPEAT GROUP_DSET_ALL
@ -1016,327 +1001,280 @@ trexio_text_has_$group_str$ (trexio_t* const file)
}
#+end_src
** RDM struct (hard-coded)
*** Read the complete struct
** Template for has/read/write the dataset of sparse data
#+begin_src c :tangle rdm_text.h
rdm_t* trexio_text_read_rdm(trexio_text_t* const file);
#+end_src
Each sparse array is stored in a separate =.txt= file due to the fact that sparse I/O has to be decoupled
from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data
to prevent memory overflow. Chunks have a given ~int64_t size~
(size specifies the number of sparse data items, e.g. integrals).
#+begin_src c :tangle rdm_text.c
rdm_t* trexio_text_read_rdm(trexio_text_t* const file) {
if (file == NULL) return NULL;
User provides indices and values of the sparse array as two separate variables.
if (file->rdm != NULL) return file->rdm;
/* Allocate the data structure */
rdm_t* rdm = MALLOC(rdm_t);
assert (rdm != NULL);
#+begin_src c :tangle hrw_dset_sparse_text.h :exports none
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int64_t size_start, const int32_t* index_sparse, const double* value_sparse);
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
#+end_src
rdm->one_e = NULL;
rdm->two_e_file_name[0] = '\0';
rdm->to_flush = 0;
/* Try to open the file. If the file does not exist, return */
const char* rdm_file_name = "/rdm.txt";
#+begin_src c :tangle write_dset_sparse_text.c
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
const int64_t size_start,
const int32_t* index_sparse,
const double* value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Build the name of the file with sparse data*/
/* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
strncat (rdm->file_name, rdm_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name));
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
FREE(rdm);
return NULL;
/* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
FILE* f = fopen(file_full_path, "a");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Specify the line length in order to offset properly. For example, for 4-index quantities
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
CURRENTLY NO OFFSET IS USED WHEN WRITING !
,*/
int64_t line_length = 0L;
char format_str[256] = "\0";
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) {
line_length = $sparse_line_length_8$; // 41 for 4 indices
strncpy(format_str, $sparse_format_printf_8$, 256);
} else if (size_max < UINT16_MAX) {
line_length = $sparse_line_length_16$; // 49 for 4 indices
strncpy(format_str, $sparse_format_printf_16$, 256);
} else {
line_length = $sparse_line_length_32$; //69 for 4 indices
strncpy(format_str, $sparse_format_printf_32$, 256);
}
/* If the file exists, read it */
FILE* f = fopen(rdm->file_name,"r");
if (f != NULL) {
strncat(format_str, "\n", 2);
/* Find size of file to allocate the max size of the string buffer */
fseek(f, 0L, SEEK_END);
size_t sz = ftell(f);
fseek(f, 0L, SEEK_SET);
sz = (sz < 1024) ? (1024) : (sz);
char* buffer = CALLOC(sz, char);
/* Get the starting position of the IO stream to be written in the .size file.
This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
One can use ftello function which is adapted for large files.
For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow.
*/
int64_t io_start_pos = size_start * line_length;
/* Read the dimensioning variables */
int rc;
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
assert (strcmp(buffer, "dim_one_e") == 0);
rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e));
assert (rc == 1);
/* Allocate arrays */
rdm->one_e = CALLOC(rdm->dim_one_e, double);
assert (rdm->one_e != NULL);
/* Read one_e */
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
assert (strcmp(buffer, "one_e") == 0);
for (uint64_t i=0 ; i<rdm->dim_one_e; ++i) {
rc = fscanf(f, "%lf", &(rdm->one_e[i]));
assert (rc == 1);
}
/* Read two_e */
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
assert (strcmp(buffer, "two_e_file_name") == 0);
rc = fscanf(f, "%1023s", buffer);
assert (rc == 1);
strncpy(rdm->two_e_file_name, buffer, 1024);
if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
FREE(buffer);
FREE(rdm->one_e);
FREE(rdm);
/* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
int rc;
for (uint64_t i=0UL; i<size; ++i) {
rc = fprintf(f, format_str,
$group_dset_sparse_indices_printf$,
*(value_sparse + i));
if(rc <= 0) {
fclose(f);
return NULL;
}
FREE(buffer);
fclose(f);
f = NULL;
}
file->rdm = rdm ;
return rdm;
}
#+end_src
*** Flush the complete struct
#+begin_src c :tangle rdm_text.h
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) {
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (file->parent.mode == 'r') return TREXIO_READONLY;
rdm_t* const rdm = file->rdm;
if (rdm == NULL) return TREXIO_SUCCESS;
if (rdm->to_flush == 0) return TREXIO_SUCCESS;
FILE* f = fopen(rdm->file_name,"w");
assert (f != NULL);
/* Write the dimensioning variables */
fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e);
/* Write arrays */
fprintf(f, "one_e\n");
for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) {
fprintf(f, "%lf\n", rdm->one_e[i]);
}
fprintf(f, "two_e_file_name\n");
fprintf(f, "%s\n", rdm->two_e_file_name);
fclose(f);
rdm->to_flush = 0;
return TREXIO_SUCCESS;
}
#+end_src
*** Free memory
Memory is allocated when reading. The followig function frees memory.
#+begin_src c :tangle rdm_text.h
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) {
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (file->parent.mode != 'r') {
trexio_exit_code rc = trexio_text_flush_rdm(file);
if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
}
rdm_t* const rdm = file->rdm;
if (rdm == NULL) return TREXIO_SUCCESS;
if (rdm->one_e != NULL) {
FREE (rdm->one_e);
}
free (rdm);
file->rdm = NULL;
return TREXIO_SUCCESS;
}
#+end_src
*** Read/Write the one_e attribute
The ~one_e~ array is assumed allocated with the appropriate size.
#+begin_src c :tangle rdm_text.h
trexio_exit_code
trexio_text_read_rdm_one_e(trexio_t* const file,
double* const one_e,
const uint64_t dim_one_e);
trexio_exit_code
trexio_text_write_rdm_one_e(trexio_t* const file,
const double* one_e,
const uint64_t dim_one_e);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code
trexio_text_read_rdm_one_e(trexio_t* const file,
double* const one_e,
const uint64_t dim_one_e)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (one_e == NULL) return TREXIO_INVALID_ARG_2;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3;
for (uint64_t i=0 ; i<dim_one_e ; ++i) {
one_e[i] = rdm->one_e[i];
}
return TREXIO_SUCCESS;
}
trexio_exit_code
trexio_text_write_rdm_one_e(trexio_t* const file,
const double* one_e,
const uint64_t dim_one_e)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (one_e == NULL) return TREXIO_INVALID_ARG_2;
if (file->mode != 'r') return TREXIO_READONLY;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
rdm->dim_one_e = dim_one_e;
for (uint64_t i=0 ; i<dim_one_e ; ++i) {
rdm->one_e[i] = one_e[i];
}
rdm->to_flush = 1;
return TREXIO_SUCCESS;
}
#+end_src
*** Read/Write the two_e attribute
~two_e~ is a sparse data structure, which can be too large to fit
in memory. So we provide functions to read and write it by
chunks.
In the text back end, the easiest way to do it is to create a
file for each sparse float structure.
#+begin_src c :tangle rdm_text.h
trexio_exit_code
trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
int64_t* const index,
double* const value);
trexio_exit_code
trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
const int64_t* index,
const double* value);
#+end_src
#+begin_src c :tangle rdm_text.c
trexio_exit_code
trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
int64_t* const index,
double* const value)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (index == NULL) return TREXIO_INVALID_ARG_4;
if (value == NULL) return TREXIO_INVALID_ARG_5;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
FILE* f = fopen(rdm->two_e_file_name, "r");
if (f == NULL) return TREXIO_END;
const uint64_t line_length = 64L;
fseek(f, (long) offset * line_length, SEEK_SET);
for (uint64_t i=0 ; i<size ; ++i) {
int rc = fscanf(f, "%9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %24le\n",
&index[4*i],
&index[4*i+1],
&index[4*i+2],
&index[4*i+3],
&value[i]);
if (rc == 5) {
/* Do nothing */
} else if (rc == EOF) {
return TREXIO_END;
return TREXIO_FAILURE;
}
}
return TREXIO_SUCCESS;
}
/* Close the TXT file */
rc = fclose(f);
if (rc != 0) return TREXIO_FILE_ERROR;
/* Append .size to the file_full_path in order to write additional info about the written buffer of data */
strncat(file_full_path, ".size", 6);
trexio_exit_code
trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
const uint64_t offset,
const uint64_t size,
const int64_t* index,
const double* value)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (index == NULL) return TREXIO_INVALID_ARG_4;
if (value == NULL) return TREXIO_INVALID_ARG_5;
if (file->mode != 'r') return TREXIO_READONLY;
/* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
FILE *f_wSize = fopen(file_full_path, "a");
if (f_wSize == NULL) return TREXIO_FILE_ERROR;
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
if (rdm == NULL) return TREXIO_FAILURE;
FILE* f = fopen(rdm->two_e_file_name, "w");
if (f == NULL) return TREXIO_FAILURE;
const uint64_t line_length = 64L;
fseek(f, (long) offset * line_length, SEEK_SET);
for (uint64_t i=0 ; i<size ; ++i) {
int rc = fprintf(f, "%9" PRId64 " %9" PRId64 " %9" PRId64 " %9" PRId64 " %24le\n",
index[4*i],
index[4*i+1],
index[4*i+2],
index[4*i+3],
value[i]);
if (rc != 5) return TREXIO_FAILURE;
/* Write the buffer_size */
rc = fprintf(f_wSize, "%" PRId64 " %" PRId64 "\n", size, io_start_pos);
if (rc <= 0) {
fclose(f_wSize);
return TREXIO_FAILURE;
}
/* Close the TXT file */
rc = fclose(f_wSize);
if (rc != 0) return TREXIO_FILE_ERROR;
/* Exit upon success */
return TREXIO_SUCCESS;
}
#+end_src
#+end_src
#+begin_src c :tangle read_dset_sparse_text.c
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
const int64_t offset_file,
const int64_t size,
const int64_t size_max,
int64_t* const eof_read_size,
int32_t* const index_sparse,
double* const value_sparse)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
/* Build the name of the file with sparse data.
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
,*/
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "r");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Specify the line length in order to offset properly. For example, for 4-index quantities
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
,*/
uint64_t line_length = 0UL;
/* Determine the line length depending on the size_max (usually mo_num or ao_num) */
if (size_max < UINT8_MAX) {
line_length = $sparse_line_length_8$; // 41 for 4 indices
} else if (size_max < UINT16_MAX) {
line_length = $sparse_line_length_16$; // 49 for 4 indices
} else {
line_length = $sparse_line_length_32$; //69 for 4 indices
}
/* Offset in the file according to the provided value of offset_file and optimal line_length */
fseek(f, (long) offset_file * line_length, SEEK_SET);
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
int rc;
char buffer[1024];
uint64_t count = 0UL;
for (uint64_t i=0UL; i<size; ++i) {
memset(buffer,0,sizeof(buffer));
if(fgets(buffer, 1023, f) == NULL){
fclose(f);
*eof_read_size = count;
return TREXIO_END;
} else {
rc = sscanf(buffer, "$group_dset_format_scanf$",
$group_dset_sparse_indices_scanf$,
value_sparse + i);
if(rc <= 0) {
fclose(f);
return TREXIO_FAILURE;
}
count += 1UL;
}
}
/* Close the TXT file */
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle read_dset_sparse_text.c
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
/* Build the name of the file with sparse data.
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
*/
const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
FILE* f = fopen(file_full_path, "r");
if(f == NULL) return TREXIO_FILE_ERROR;
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
int rc;
int64_t size_item, offset_item, size_accum=0L;
/* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) {
/* Check that summation will not overflow the int64_t value */
if (INT64_MAX - size_accum > size_item) {
size_accum += size_item;
} else {
fclose(f);
*size_max = -1L;
return TREXIO_INT_SIZE_OVERFLOW;
}
}
/* Close the TXT file */
rc = fclose(f);
if(rc != 0) return TREXIO_FILE_ERROR;
/* Overwrite the value at the input address and return TREXIO_SUCCESS */
*size_max = size_accum;
return TREXIO_SUCCESS;
}
#+end_src
#+begin_src c :tangle has_dset_sparse_text.c
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
/* Build the name of the file with sparse data.
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
*/
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
/* Copy directory name in file_full_path */
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
/* Append name of the file with sparse data */
strncat (file_full_path, $group_dset$_file_name,
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
/* Check the return code of access function to determine whether the file with sparse data exists or not */
if (access(file_full_path, F_OK) == 0){
return TREXIO_SUCCESS;
} else {
return TREXIO_HAS_NOT;
}
}
#+end_src
* Constant file suffixes (not used by the generator) :noexport:
#+begin_src c :tangle suffix_text.h
#endif
#+end_src

View File

@ -1,11 +1,12 @@
# ================= TESTING =================
# ================= TESTING =================
# Create a list of tests for TEXT back end.
set(Tests_text
open_text
io_dset_float_text
io_dset_str_text
io_dset_sparse_text
io_safe_dset_float_text
io_dset_int_text
io_num_text
@ -19,6 +20,7 @@ if(ENABLE_HDF5)
open_hdf5
io_dset_float_hdf5
io_dset_str_hdf5
io_dset_sparse_hdf5
io_safe_dset_float_hdf5
io_dset_int_hdf5
io_num_hdf5
@ -43,4 +45,3 @@ endforeach()
add_executable(test_f test_f.f90)
target_link_libraries(test_f PRIVATE trexio_f)
add_test(NAME test_f COMMAND $<TARGET_FILE:test_f>)

235
tests/io_dset_sparse_hdf5.c Normal file
View File

@ -0,0 +1,235 @@
#include "trexio.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TEST_BACKEND TREXIO_HDF5
#define TREXIO_FILE "test_dset_sparse.h5"
#define RM_COMMAND "rm -f -- " TREXIO_FILE
#define SIZE 100
#define N_CHUNKS 5
static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to write an array of sparse data into the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'w', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// parameters to be written
int32_t* index;
double* value;
index = calloc(4L*SIZE, sizeof(int32_t));
value = calloc(SIZE, sizeof(double));
for(int i=0; i<SIZE; i++){
index[4*i] = 4*i;
index[4*i+1] = 4*i+1;
index[4*i+2] = 4*i+2;
index[4*i+3] = 4*i+3;
value[i] = 3.14 + (double) i;
}
// write mo_num which will be used to determine the optimal size of int indices
if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
rc = trexio_write_mo_num(file, 1000);
assert(rc == TREXIO_SUCCESS);
}
// write dataset chunks of sparse data in the file (including FAKE statements)
uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
uint64_t offset_f = 0UL;
uint64_t offset_d = 0UL;
if (offset != 0L) offset_f += offset;
// write n_chunks times using write_sparse
for(int i=0; i<N_CHUNKS; ++i){
rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
assert(rc == TREXIO_SUCCESS);
offset_d += chunk_size;
offset_f += chunk_size;
}
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the allocated memeory
free(index);
free(value);
/*================= END OF TEST ==================*/
return 0;
}
static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
/* Try to check the existence of a dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// check that previous call to has_sparse did not create a file/dset
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// now check that previously written mo_2e_int_eri exists
rc = trexio_has_mo_2e_int_eri(file);
assert(rc==TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to read one chunk of dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define arrays to read into
int32_t* index_read;
double* value_read;
uint64_t size_r = 40L;
index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
value_read = (double*) calloc(size_r,sizeof(double));
// specify the read parameters, here:
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
int64_t chunk_read = 10L;
int64_t offset_file_read = 40L;
int offset_data_read = 5;
int64_t read_size_check;
read_size_check = chunk_read;
if (offset != 0L) offset_file_read += offset;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_SUCCESS);
assert(chunk_read == read_size_check);
assert(index_read[0] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
offset_file_read = 97;
offset_data_read = 1;
int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
if (offset != 0L) offset_file_read += offset;
// read one chunk that will reach EOF and return TREXIO_END code
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_END);
assert(chunk_read == eof_read_size_check);
assert(index_read[4*size_r-1] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
/*
for(int i=0; i<size_r; ++i){
printf("%d %lf\n", index_read[4*i], value_read[i]);
}
*/
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the memory
free(index_read);
free(value_read);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
/* Try to read a size of the dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define the variable to read into
int64_t size_written;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
assert(rc == TREXIO_SUCCESS);
assert(size_written == size_check);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
int main(){
/*============== Test launcher ================*/
int rc;
rc = system(RM_COMMAND);
assert (rc == 0);
// check the first write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
// check the second write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
rc = system(RM_COMMAND);
assert (rc == 0);
return 0;
}

230
tests/io_dset_sparse_text.c Normal file
View File

@ -0,0 +1,230 @@
#include "trexio.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TEST_BACKEND TREXIO_TEXT
#define TREXIO_FILE "test_dset_sparse.dir"
#define RM_COMMAND "rm -rf " TREXIO_FILE
#define SIZE 100
#define N_CHUNKS 5
static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to write an array of sparse data into the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file in 'write' mode
file = trexio_open(file_name, 'w', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// parameters to be written
int32_t* index;
double* value;
index = calloc(4L*SIZE, sizeof(int32_t));
value = calloc(SIZE, sizeof(double));
for(int i=0; i<SIZE; i++){
index[4*i] = 4*i;
index[4*i+1] = 4*i+1;
index[4*i+2] = 4*i+2;
index[4*i+3] = 4*i+3;
value[i] = 3.14 + (double) i;
}
// write mo_num which will be used to determine the optimal size of int indices
if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
rc = trexio_write_mo_num(file, 1000);
assert(rc == TREXIO_SUCCESS);
}
// write dataset chunks of sparse data in the file (including FAKE statements)
uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
uint64_t offset_f = 0UL;
uint64_t offset_d = 0UL;
if (offset != 0L) offset_f += offset;
// write n_chunks times using write_sparse
for(int i=0; i<N_CHUNKS; ++i){
rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
assert(rc == TREXIO_SUCCESS);
offset_d += chunk_size;
offset_f += chunk_size;
}
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the allocated memeory
free(index);
free(value);
/*================= END OF TEST ==================*/
return 0;
}
static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
/* Try to check the existence of a dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// check that previous call to has_sparse did not create a file/dset
rc = trexio_has_mo_2e_int_eri_lr(file);
assert(rc==TREXIO_HAS_NOT);
// now check that previously written mo_2e_int_eri exists
rc = trexio_has_mo_2e_int_eri(file);
assert(rc==TREXIO_SUCCESS);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
/* Try to read one chunk of dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define arrays to read into
int32_t* index_read;
double* value_read;
uint64_t size_r = 40L;
index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
value_read = (double*) calloc(size_r,sizeof(double));
// specify the read parameters, here:
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
int64_t chunk_read = 10L;
int64_t offset_file_read = 40L;
int offset_data_read = 5;
int64_t read_size_check;
read_size_check = chunk_read;
if (offset != 0L) offset_file_read += offset;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_SUCCESS);
assert(chunk_read == read_size_check);
assert(index_read[0] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
offset_file_read = 97L;
offset_data_read = 1;
int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
if (offset != 0L) offset_file_read += offset;
// read one chunk that will reach EOF and return TREXIO_END code
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
assert(rc == TREXIO_END);
assert(chunk_read == eof_read_size_check);
assert(index_read[4*size_r-1] == 0);
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
// free the memory
free(index_read);
free(value_read);
/*================= END OF TEST ==================*/
return 0;
}
static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
/* Try to read a size of the dataset of sparse data in the TREXIO file */
trexio_t* file = NULL;
trexio_exit_code rc;
/*================= START OF TEST ==================*/
// open file
file = trexio_open(file_name, 'r', backend, &rc);
assert (file != NULL);
assert (rc == TREXIO_SUCCESS);
// define the variable to read into
int64_t size_written;
// read one chunk using the aforementioned parameters
rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
assert(rc == TREXIO_SUCCESS);
assert(size_written == size_check);
// close current session
rc = trexio_close(file);
assert (rc == TREXIO_SUCCESS);
/*================= END OF TEST ==================*/
return 0;
}
int main(){
/*============== Test launcher ================*/
int rc;
rc = system(RM_COMMAND);
assert (rc == 0);
// check the first write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
// check the second write attempt (SIZE elements written in N_CHUNKS chunks)
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
rc = system(RM_COMMAND);
assert (rc == 0);
return 0;
}

View File

@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_repulsion(file, 2.14171677);
assert (rc == TREXIO_SUCCESS);
// attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
rc = trexio_write_mo_num(file, 0);
assert (rc == TREXIO_INVALID_NUM);
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
rc = trexio_has_nucleus_num(file);
assert (rc == TREXIO_SUCCESS);
rc = trexio_has_nucleus_repulsion(file);
assert (rc == TREXIO_SUCCESS);
// check that the num variable does not exist
rc = trexio_has_mo_num(file);
assert (rc == TREXIO_HAS_NOT);
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
// parameters to be read
int num;
int cartesian;
float repulsion_32;
double repulsion_64, d;
/*================= START OF TEST ==================*/
@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
assert (rc == TREXIO_SUCCESS);
assert (num == 12);
rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
assert (rc == TREXIO_SUCCESS);
d = repulsion_32 - 2.14171677;
assert( d*d < 1.e-8 );
rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
assert (rc == TREXIO_SUCCESS);
d = repulsion_64 - 2.14171677;
assert( d*d < 1.e-14 );
// read non-existing numerical attribute from the file
rc = trexio_read_mo_num(file, &num);
assert (rc == TREXIO_ATTR_MISSING);
@ -134,5 +152,3 @@ int main(void) {
return 0;
}

View File

@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
rc = trexio_write_nucleus_num(file, num);
assert (rc == TREXIO_SUCCESS);
rc = trexio_write_nucleus_repulsion(file, 2.14171677);
assert (rc == TREXIO_SUCCESS);
// attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
rc = trexio_write_mo_num(file, 0);
assert (rc == TREXIO_INVALID_NUM);
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
rc = trexio_has_nucleus_num(file);
assert (rc == TREXIO_SUCCESS);
rc = trexio_has_nucleus_repulsion(file);
assert (rc == TREXIO_SUCCESS);
// check that the num variable does not exist
rc = trexio_has_mo_num(file);
assert (rc == TREXIO_HAS_NOT);
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
// parameters to be read
int num;
int cartesian;
float repulsion_32;
double repulsion_64, d;
/*================= START OF TEST ==================*/
@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
assert (rc == TREXIO_SUCCESS);
assert (num == 12);
rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
assert (rc == TREXIO_SUCCESS);
d = repulsion_32 - 2.14171677;
assert( d*d < 1.e-8 );
rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
assert (rc == TREXIO_SUCCESS);
d = repulsion_64 - 2.14171677;
assert( d*d < 1.e-14 );
// read non-existing numerical attribute from the file
rc = trexio_read_mo_num(file, &num);
assert (rc == TREXIO_ATTR_MISSING);
@ -134,5 +152,3 @@ int main(void) {
return 0;
}

View File

@ -2,25 +2,25 @@ program test_trexio
use trexio
use, intrinsic :: iso_c_binding
implicit none
logical :: have_hdf5
print * , "============================================"
print'(a,a)' , " TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION
print * , "============================================"
print'(a,a)' , " TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION
print'(a,i3)', " TREXIO MAJOR VERSION : ", TREXIO_VERSION_MAJOR
print'(a,i3)', " TREXIO MINOR VERSION : ", TREXIO_VERSION_MINOR
print * , "============================================"
print * , "============================================"
call system('rm -rf test_write_f.dir')
call system('rm -rf -- test_write_f.dir')
print *, 'call test_write(''test_write_f.dir'', TREXIO_TEXT)'
call test_write('test_write_f.dir', TREXIO_TEXT)
print *, 'call test_read(''test_write_f.dir'', TREXIO_TEXT)'
call test_read('test_write_f.dir', TREXIO_TEXT)
call system('rm -rf test_write_f.dir')
call system('rm -rf -- test_write_f.dir')
call test_read_void('test_write_f.dir', TREXIO_TEXT)
! No way to conditionally check whether compilation was done with HDF5
! No way to conditionally check whether compilation was done with HDF5
! So temporarily disable the test for HDF5 back end at the moment
have_hdf5 = trexio_has_backend(TREXIO_HDF5)
if (have_hdf5) then
@ -30,7 +30,7 @@ program test_trexio
print *, 'call test_read(''test_write_f.h5'', TREXIO_HDF5)'
call test_read('test_write_f.h5', TREXIO_HDF5)
call system('rm -f -- test_write_f.h5')
call test_read_void('test_write_f.h5', TREXIO_HDF5)
endif
@ -61,6 +61,22 @@ subroutine test_write(file_name, back_end)
character(len=:), allocatable :: sym_str
character(len=:), allocatable :: label(:)
! sparse data
integer(4) :: index_sparse_mo_2e_int_eri(4,100)
double precision :: value_sparse_mo_2e_int_eri(100)
integer :: i, n_buffers = 5
integer(8) :: buf_size, offset
buf_size = 100/n_buffers
do i = 1, 100
index_sparse_mo_2e_int_eri(1,i) = 4*i - 3
index_sparse_mo_2e_int_eri(2,i) = 4*i+1 - 3
index_sparse_mo_2e_int_eri(3,i) = 4*i+2 - 3
index_sparse_mo_2e_int_eri(4,i) = 4*i+3 - 3
value_sparse_mo_2e_int_eri(i) = 3.14 + float(i)
enddo
! parameters to be written
num = 12
charge = (/ 6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1. /)
@ -96,6 +112,9 @@ subroutine test_write(file_name, back_end)
rc = trexio_has_nucleus_charge(trex_file)
call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 2')
rc = trexio_has_mo_2e_int_eri(trex_file)
call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 3')
rc = trexio_write_nucleus_num(trex_file, num)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE NUM')
@ -106,8 +125,8 @@ subroutine test_write(file_name, back_end)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE COORD')
rc = trexio_write_nucleus_label(trex_file, label, 5)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
deallocate(label)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
rc = trexio_write_nucleus_point_group(trex_file, sym_str, 32)
deallocate(sym_str)
@ -119,6 +138,20 @@ subroutine test_write(file_name, back_end)
rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX')
! write mo_num which will be used to determine the optimal size of int indices
if (trexio_has_mo_num(trex_file) == TREXIO_HAS_NOT) then
rc = trexio_write_mo_num(trex_file, 1000)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE MO NUM')
endif
offset = 0
do i = 1,n_buffers
rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, &
index_sparse_mo_2e_int_eri(1,offset+1), &
value_sparse_mo_2e_int_eri(offset+1))
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE SPARSE')
offset = offset + buf_size
enddo
rc = trexio_has_nucleus_num(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 1')
@ -126,6 +159,9 @@ subroutine test_write(file_name, back_end)
rc = trexio_has_nucleus_coord(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 2')
rc = trexio_has_mo_2e_int_eri(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 3')
rc = trexio_close(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS CLOSE')
@ -160,11 +196,25 @@ subroutine test_read(file_name, back_end)
character(len=32) :: sym_str
! sparse data
integer(4) :: index_sparse_mo_2e_int_eri(4,20)
double precision :: value_sparse_mo_2e_int_eri(20)
integer(8) :: read_buf_size = 10
integer(8) :: read_buf_size_save = 10
integer(8) :: offset_read = 40
integer(8) :: offset_data_read = 5
integer(8) :: offset_eof = 97
integer(8) :: offset_data_eof = 1
integer(8) :: size_toread = 0
character*(128) :: str
num = 12
basis_shell_num = 24
index_sparse_mo_2e_int_eri = 0
value_sparse_mo_2e_int_eri = 0.0d0
! ================= START OF TEST ===================== !
trex_file = trexio_open(file_name, 'r', back_end, rc)
@ -199,7 +249,7 @@ subroutine test_read(file_name, back_end)
call exit(-1)
endif
rc = trexio_read_nucleus_label(trex_file, label, 2)
call trexio_assert(rc, TREXIO_SUCCESS)
if (trim(label(2)) == 'Na') then
@ -230,6 +280,52 @@ subroutine test_read(file_name, back_end)
endif
rc = trexio_read_mo_2e_int_eri(trex_file, offset_read, read_buf_size, &
index_sparse_mo_2e_int_eri(1, offset_data_read + 1), &
value_sparse_mo_2e_int_eri(offset_data_read + 1))
!do i = 1,20
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
!enddo
call trexio_assert(rc, TREXIO_SUCCESS)
if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1) then
write(*,*) 'SUCCESS READ SPARSE DATA'
else
print *, 'FAILURE SPARSE DATA CHECK'
call exit(-1)
endif
! attempt to read reaching EOF: should return TREXIO_END and
! NOT increment the existing values in the buffer (only upd with what has been read)
rc = trexio_read_mo_2e_int_eri(trex_file, offset_eof, read_buf_size, &
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1), &
value_sparse_mo_2e_int_eri(offset_data_eof + 1))
!do i = 1,20
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
!enddo
call trexio_assert(rc, TREXIO_END)
if (read_buf_size == 3 .and. &
index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1 .and. &
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1) == offset_eof*4 + 1) then
write(*,*) 'SUCCESS READ SPARSE DATA EOF'
read_buf_size = read_buf_size_save
else
print *, 'FAILURE SPARSE DATA EOF CHECK'
call exit(-1)
endif
rc = trexio_read_mo_2e_int_eri_size(trex_file, size_toread)
call trexio_assert(rc, TREXIO_SUCCESS)
if (size_toread == 100) then
write(*,*) 'SUCCESS READ SPARSE SIZE'
else
print *, 'FAILURE SPARSE SIZE CHECK'
call exit(-1)
endif
rc = trexio_close(trex_file)
call trexio_assert(rc, TREXIO_SUCCESS)
@ -254,6 +350,9 @@ subroutine test_read_void(file_name, back_end)
! ================= START OF TEST ===================== !
trex_file = trexio_open(file_name, 'r', back_end, rc)
if (rc /= TREXIO_OPEN_ERROR) then
rc = trexio_close(trex_file)
endif
call trexio_assert(rc, TREXIO_OPEN_ERROR)
call trexio_string_of_error(rc, str)
@ -262,4 +361,3 @@ subroutine test_read_void(file_name, back_end)
! ================= END OF TEST ===================== !
end subroutine test_read_void

View File

@ -6,29 +6,31 @@ config_file = 'trex.json'
trex_config = read_json(config_file)
# --------------------------------------------------------------------------- #
# -------------------------------- [WIP] ------------------------------------ #
# for now remove rdm from config because it functions are hardcoded
del trex_config['rdm']
# --------------------------------------------------------------------------- #
# -------------------- GET ATTRIBUTES FROM THE CONFIGURATION ---------------- #
group_dict = get_group_dict(trex_config)
detailed_nums = get_detailed_num_dict(trex_config)
detailed_strs = get_detailed_str_dict(trex_config)
# helper dictionaries that contain names of groups, nums or dsets as keys
dsets = get_dset_dict(trex_config)
detailed_dsets_nostr, detailed_dsets_str = split_dset_dict_detailed(dsets)
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
detailed_dsets = detailed_dsets_nostr.copy()
detailed_dsets.update(detailed_dsets_str)
# build a big dictionary with all pre-processed data
detailed_all = {
'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse),
'groups' : group_dict,
'numbers' : detailed_nums,
'strings' : detailed_strs
}
# consistency check for dimensioning variables
check_dim_consistency(detailed_nums, dsets)
# --------------------------------------------------------------------------- #
# -------------------- GET TEMPLATED FILES TO BE POPULATED ------------------ #
source = ['front', 'text', 'hdf5']
# build helper dictionaries with paths per source directory
# build helper dictionaries with paths per source directory
template_paths = get_template_paths(source)
# build helper dictionaries with source files per source directory
# build helper dictionaries with source files per source directory
source_files = get_source_files(template_paths)
# build helper dictionaries with templated files
files_todo = get_files_todo(source_files)
@ -38,7 +40,7 @@ files_todo = get_files_todo(source_files)
# populate files with iterative scheme, i.e. for unique functions
for fname in files_todo['auxiliary']:
iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
iterative_populate_file(fname, template_paths, detailed_all)
# populate has/read/write_num functions with recursive scheme
for fname in files_todo['attr_num']:
@ -48,14 +50,18 @@ for fname in files_todo['attr_num']:
for fname in files_todo['attr_str']:
recursive_populate_file(fname, template_paths, detailed_strs)
# populate has/read/write_dset (numerical) functions with recursive scheme
# populate has/read/write_dset (numerical) functions with recursive scheme
for fname in files_todo['dset_data']:
recursive_populate_file(fname, template_paths, detailed_dsets_nostr)
# populate has/read/write_dset (strings) functions with recursive scheme
# populate has/read/write_dset (strings) functions with recursive scheme
for fname in files_todo['dset_str']:
recursive_populate_file(fname, template_paths, detailed_dsets_str)
# populate has/read/write_dset (sparse) functions with recursive scheme
for fname in files_todo['dset_sparse']:
recursive_populate_file(fname, template_paths, detailed_dsets_sparse)
# populate group-related functions with mixed (iterative+recursive) scheme [text backend]
for fname in files_todo['group']:
special_populate_text_group(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)

View File

@ -4,7 +4,7 @@ from json import load as json_load
def read_json(fname: str) -> dict:
"""
"""
Read configuration from the input `fname` JSON file.
Parameters:
@ -23,7 +23,7 @@ def read_json(fname: str) -> dict:
def get_files_todo(source_files: dict) -> dict:
"""
"""
Build dictionaries of templated files per objective.
Parameters:
@ -36,21 +36,21 @@ def get_files_todo(source_files: dict) -> dict:
for key in source_files.keys():
all_files += source_files[key]
files_todo = {}
files_todo = {}
#files_todo['all'] = list(filter(lambda x: 'read' in x or 'write' in x or 'has' in x or 'hrw' in x or 'flush' in x or 'free' in x, all_files))
files_todo['all'] = [f for f in all_files if 'read' in f or 'write' in f or 'has' in f or 'flush' in f or 'free' in f or 'hrw' in f]
for key in ['dset_data', 'dset_str', 'attr_num', 'attr_str', 'group']:
for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group']:
files_todo[key] = list(filter(lambda x: key in x, files_todo['all']))
files_todo['group'].append('struct_text_group_dset.h')
# files that correspond to iterative population (e.g. the code is repeated within the function body but the function itself is unique)
files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h']
files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h']
return files_todo
def get_source_files(paths: dict) -> dict:
"""
"""
Build dictionaries of all files per source directory.
Parameters:
@ -67,7 +67,7 @@ def get_source_files(paths: dict) -> dict:
def get_template_paths(source: list) -> dict:
"""
"""
Build dictionary of the absolute paths to directory with templates per source.
Parameters:
@ -86,7 +86,7 @@ def get_template_paths(source: list) -> dict:
def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> None:
"""
"""
Populate files containing basic read/write/has functions.
Parameters:
@ -107,6 +107,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
'group_num_h5_dtype', 'group_num_py_dtype',
'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
'group_dset', 'group_num', 'group_str', 'group']
for item in detailed_source.keys():
@ -133,9 +137,9 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
elif 'uncommented by the generator for dimensioning' in line:
# only uncomment and write the line if `num` is in the name
if 'dim' in detailed_source[item]['trex_json_int_type']:
templine = line.replace('//', '')
templine = line.replace('//', '')
f_out.write(templine)
# general case of recursive replacement of inline triggers
# general case of recursive replacement of inline triggers
else:
populated_line = recursive_replace_line(line, triggers, detailed_source[item])
f_out.write(populated_line)
@ -144,8 +148,8 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
def recursive_replace_line (input_line: str, triggers: list, source: dict) -> str:
"""
Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`.
"""
Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`.
Parameters:
input_line (str) : input line
@ -154,10 +158,10 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
Returns:
output_line (str) : processed (replaced) line
"""
"""
is_triggered = False
output_line = input_line
if '$' in input_line:
for case in triggers:
test_case = f'${case}$'
@ -175,21 +179,22 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
else:
print(output_line)
raise ValueError('Recursion went wrong, not all cases considered')
return output_line
def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
"""
def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
"""
Iteratively populate files with unique functions that contain templated variables.
Parameters:
filename (str) : template file to be populated
paths (dict) : dictionary of paths per source directory
groups (dict) : dictionary of groups
datasets (dict) : dictionary of datasets with substitution details
numbers (dict) : dictionary of numbers with substitution details
strings (dict) : dictionary of strings with substitution details
detailed_all(dict) : dictionary with substitution details with the following keys:
'groups' : dictionary of groups with substitution details
'datasets' : dictionary of datasets with substitution details
'numbers' : dictionary of numbers with substitution details
'strings' : dictionary of strings with substitution details
Returns:
None
@ -200,7 +205,7 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
templ_path = get_template_path(filename, paths)
filename_out = join('populated',f'pop_{filename}')
# Note: it is important that special conditions like add_trigger above will be checked before standard triggers
# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed
# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed
# from the template and the special condition will never be met.
with open(join(templ_path,filename), 'r') as f_in :
with open(join(templ_path,filename_out), 'a') as f_out :
@ -209,29 +214,29 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
if id == 0:
# special case for proper error handling when deallocting text groups
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
f_out.write(populated_line)
elif id == 1:
populated_line = iterative_replace_line(line, triggers[id], datasets, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
f_out.write(populated_line)
elif id == 2:
populated_line = iterative_replace_line(line, triggers[id], numbers, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
f_out.write(populated_line)
elif id == 3:
populated_line = iterative_replace_line(line, triggers[id], strings, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
f_out.write(populated_line)
elif id == 4:
populated_line = iterative_replace_line(line, triggers[id], groups, None)
populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
f_out.write(populated_line)
else:
f_out.write(line)
f_out.write("\n")
def iterative_replace_line (input_line: str, case: str, source: dict, add_line: str) -> str:
"""
Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`.
"""
Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`.
Parameters:
input_line (str) : input line
@ -241,7 +246,7 @@ def iterative_replace_line (input_line: str, case: str, source: dict, add_line:
Returns:
output_block (str) : processed (replaced) block of text
"""
"""
output_block = ""
for item in source.keys():
templine1 = input_line.replace(case.upper(), item.upper())
@ -270,12 +275,12 @@ def check_triggers (input_line: str, triggers: list) -> int:
if trig in input_line or trig.upper() in input_line:
out_id = id
return out_id
return out_id
def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detailed_dset: dict, detailed_numbers: dict, detailed_strings: dict) -> None:
"""
"""
Special population for group-related functions in the TEXT back end.
Parameters:
@ -292,8 +297,8 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
fname_new = join('populated',f'pop_{fname}')
templ_path = get_template_path(fname, paths)
triggers = ['group_dset_dtype', 'group_dset_std_dtype_out', 'group_dset_std_dtype_in',
'group_num_dtype_double', 'group_num_std_dtype_out', 'group_num_std_dtype_in',
triggers = ['group_dset_dtype', 'group_dset_format_printf', 'group_dset_format_scanf',
'group_num_dtype_double', 'group_num_format_printf', 'group_num_format_scanf',
'group_dset', 'group_num', 'group_str', 'group']
for group in group_dict.keys():
@ -316,16 +321,16 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
elif 'START REPEAT GROUP_NUM' in line or 'START REPEAT GROUP_ATTR_STR' in line:
subloop_num = True
continue
if 'END REPEAT GROUP_DSET' in line:
for dset in detailed_dset.keys():
if group != detailed_dset[dset]['group']:
if group != detailed_dset[dset]['group']:
continue
if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['dtype'] != 'char*'):
if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['group_dset_dtype'] != 'char*'):
continue
if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['dtype'] == 'char*'):
if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['group_dset_dtype'] == 'char*'):
continue
dset_allocated.append(dset)
@ -351,7 +356,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
elif 'END REPEAT GROUP_NUM' in line:
for dim in detailed_numbers.keys():
if group != detailed_numbers[dim]['group']:
if group != detailed_numbers[dim]['group']:
continue
save_body = loop_body
@ -364,7 +369,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
elif 'END REPEAT GROUP_ATTR_STR' in line:
for str in detailed_strings.keys():
if group != detailed_strings[str]['group']:
if group != detailed_strings[str]['group']:
continue
str_allocated.append(str)
@ -390,22 +395,22 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
continue
if not subloop_num and not subloop_dset:
# NORMAL CASE WITHOUT SUBLOOPS
# NORMAL CASE WITHOUT SUBLOOPS
if '$group_dset' in line:
for dset in detailed_dset.keys():
if group != detailed_dset[dset]['group']:
if group != detailed_dset[dset]['group']:
continue
populated_line = recursive_replace_line(line, triggers, detailed_dset[dset])
f_out.write(populated_line)
elif '$group_str' in line:
for str in detailed_strings.keys():
if group != detailed_strings[str]['group']:
if group != detailed_strings[str]['group']:
continue
populated_line = recursive_replace_line(line, triggers, detailed_strings[str])
f_out.write(populated_line)
elif '$group_num$' in line:
for dim in detailed_numbers.keys():
if group != detailed_numbers[dim]['group']:
if group != detailed_numbers[dim]['group']:
continue
populated_line = recursive_replace_line(line, triggers, detailed_numbers[dim])
f_out.write(populated_line)
@ -421,7 +426,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
def get_template_path (filename: str, path_dict: dict) -> str:
"""
"""
Returns the absolute path to the directory with indicated `filename` template.
Parameters:
@ -435,12 +440,12 @@ def get_template_path (filename: str, path_dict: dict) -> str:
if dir_type in filename:
path = path_dict[dir_type]
return path
raise ValueError('Filename should contain one of the keywords')
def get_group_dict (configuration: dict) -> dict:
"""
"""
Returns the dictionary of all groups.
Parameters:
@ -456,10 +461,126 @@ def get_group_dict (configuration: dict) -> dict:
return group_dict
def get_dtype_dict (dtype: str, target: str, rank = None, int_len_printf = None) -> dict:
"""
Returns the dictionary of dtype-related templated variables set for a given `dtype`.
Keys are names of templated variables, values are strings to be used by the generator.
Parameters:
dtype (str) : dtype corresponding to the trex.json (i.e. int/dim/float/float sparse/str)
target (str) : `num` or `dset`
rank (int) : [optional] value of n in n-index (sparse) dset; needed to build the printf/scanf format string
int_len_printf(dict): [optional]
keys: precision (e.g. 32 for int32_t)
values: lengths reserved for one index when printing n-index (sparse) dset (e.g. 10 for int32_t)
Returns:
dtype_dict (dict) : dictionary dtype-related substitutions
"""
if not target in ['num', 'dset']:
raise Exception('Only num or dset target can be set.')
if 'sparse' in dtype:
if rank is None or int_len_printf is None:
raise Exception("Both rank and int_len_printf arguments has to be provided to build the dtype_dict for sparse data.")
if rank is not None and rank <= 1:
raise Exception('Rank of sparse quantity cannot be lower than 2.')
if int_len_printf is not None and not isinstance(int_len_printf, dict):
raise Exception('int_len_printf has to be a dictionary of lengths for different precisions.')
dtype_dict = {}
# set up the key-value pairs dependending on the dtype
if dtype == 'float':
dtype_dict.update({
'default_prec' : '64',
f'group_{target}_dtype' : 'double',
f'group_{target}_h5_dtype' : 'native_double',
f'group_{target}_f_dtype_default' : 'real(8)',
f'group_{target}_f_dtype_double' : 'real(8)',
f'group_{target}_f_dtype_single' : 'real(4)',
f'group_{target}_dtype_default' : 'double',
f'group_{target}_dtype_double' : 'double',
f'group_{target}_dtype_single' : 'float',
f'group_{target}_format_printf' : '24.16e',
f'group_{target}_format_scanf' : 'lf',
f'group_{target}_py_dtype' : 'float'
})
elif dtype in ['int', 'dim', 'index']:
dtype_dict.update({
'default_prec' : '32',
f'group_{target}_dtype' : 'int64_t',
f'group_{target}_h5_dtype' : 'native_int64',
f'group_{target}_f_dtype_default' : 'integer(4)',
f'group_{target}_f_dtype_double' : 'integer(8)',
f'group_{target}_f_dtype_single' : 'integer(4)',
f'group_{target}_dtype_default' : 'int32_t',
f'group_{target}_dtype_double' : 'int64_t',
f'group_{target}_dtype_single' : 'int32_t',
f'group_{target}_format_printf' : '" PRId64 "',
f'group_{target}_format_scanf' : '" SCNd64 "',
f'group_{target}_py_dtype' : 'int'
})
elif dtype == 'str':
dtype_dict.update({
'default_prec' : '',
f'group_{target}_dtype' : 'char*',
f'group_{target}_h5_dtype' : '',
f'group_{target}_f_dtype_default': '',
f'group_{target}_f_dtype_double' : '',
f'group_{target}_f_dtype_single' : '',
f'group_{target}_dtype_default' : 'char*',
f'group_{target}_dtype_double' : '',
f'group_{target}_dtype_single' : '',
f'group_{target}_format_printf' : 's',
f'group_{target}_format_scanf' : 's',
f'group_{target}_py_dtype' : 'str'
})
elif 'sparse' in dtype:
# build format string for n-index sparse quantity
item_printf_8 = f'%{int_len_printf[8]}" PRIu8 " '
item_printf_16 = f'%{int_len_printf[16]}" PRIu16 " '
item_printf_32 = f'%{int_len_printf[32]}" PRId32 " '
item_scanf = '%" SCNd32 " '
group_dset_format_printf_8 = '"'
group_dset_format_printf_16 = '"'
group_dset_format_printf_32 = '"'
group_dset_format_scanf = ''
for i in range(rank):
group_dset_format_printf_8 += item_printf_8
group_dset_format_printf_16 += item_printf_16
group_dset_format_printf_32 += item_printf_32
group_dset_format_scanf += item_scanf
# append the format string for float values
group_dset_format_printf_8 += '%24.16e" '
group_dset_format_printf_16 += '%24.16e" '
group_dset_format_printf_32 += '%24.16e" '
group_dset_format_scanf += '%lf'
# set up the dictionary for sparse
dtype_dict.update({
'default_prec' : '',
f'group_{target}_dtype' : 'double',
f'group_{target}_h5_dtype' : '',
f'group_{target}_f_dtype_default': '',
f'group_{target}_f_dtype_double' : '',
f'group_{target}_f_dtype_single' : '',
f'group_{target}_dtype_default' : '',
f'group_{target}_dtype_double' : '',
f'group_{target}_dtype_single' : '',
f'sparse_format_printf_8' : group_dset_format_printf_8,
f'sparse_format_printf_16' : group_dset_format_printf_16,
f'sparse_format_printf_32' : group_dset_format_printf_32,
f'group_{target}_format_scanf' : group_dset_format_scanf,
f'group_{target}_py_dtype' : ''
})
return dtype_dict
def get_detailed_num_dict (configuration: dict) -> dict:
"""
"""
Returns the dictionary of all `num`-suffixed variables.
Keys are names, values are subdictionaries containing corresponding group and group_num names.
Keys are names, values are subdictionaries containing corresponding group and group_num names.
Parameters:
configuration (dict) : configuration from `trex.json`
@ -472,40 +593,17 @@ def get_detailed_num_dict (configuration: dict) -> dict:
for k2,v2 in v1.items():
if len(v2[1]) == 0:
tmp_num = f'{k1}_{k2}'
if 'str' not in v2[0]:
if not 'str' in v2[0]:
tmp_dict = {}
tmp_dict['group'] = k1
tmp_dict['group_num'] = tmp_num
num_dict[tmp_num] = tmp_dict
# TODO the arguments below are almost the same as for group_dset (except for trex_json_int_type) and can be exported from somewhere
if v2[0] == 'float':
tmp_dict['datatype'] = 'double'
tmp_dict['group_num_h5_dtype'] = 'native_double'
tmp_dict['group_num_f_dtype_default']= 'real(8)'
tmp_dict['group_num_f_dtype_double'] = 'real(8)'
tmp_dict['group_num_f_dtype_single'] = 'real(4)'
tmp_dict['group_num_dtype_default']= 'double'
tmp_dict['group_num_dtype_double'] = 'double'
tmp_dict['group_num_dtype_single'] = 'float'
tmp_dict['default_prec'] = '64'
tmp_dict['group_num_std_dtype_out'] = '24.16e'
tmp_dict['group_num_std_dtype_in'] = 'lf'
tmp_dict['group_num_py_dtype'] = 'float'
elif v2[0] in ['int', 'dim']:
tmp_dict['datatype'] = 'int64_t'
tmp_dict['group_num_h5_dtype'] = 'native_int64'
tmp_dict['group_num_f_dtype_default']= 'integer(4)'
tmp_dict['group_num_f_dtype_double'] = 'integer(8)'
tmp_dict['group_num_f_dtype_single'] = 'integer(4)'
tmp_dict['group_num_dtype_default']= 'int32_t'
tmp_dict['group_num_dtype_double'] = 'int64_t'
tmp_dict['group_num_dtype_single'] = 'int32_t'
tmp_dict['default_prec'] = '32'
tmp_dict['group_num_std_dtype_out'] = '" PRId64 "'
tmp_dict['group_num_std_dtype_in'] = '" SCNd64 "'
tmp_dict['group_num_py_dtype'] = 'int'
tmp_dict.update(get_dtype_dict(v2[0], 'num'))
if v2[0] in ['int', 'dim']:
tmp_dict['trex_json_int_type'] = v2[0]
else:
tmp_dict['trex_json_int_type'] = ''
return num_dict
@ -536,8 +634,8 @@ def get_detailed_str_dict (configuration: dict) -> dict:
def get_dset_dict (configuration: dict) -> dict:
"""
Returns the dictionary of datasets.
"""
Returns the dictionary of datasets.
Keys are names, values are lists containing datatype, list of dimensions and group name
Parameters:
@ -559,8 +657,8 @@ def get_dset_dict (configuration: dict) -> dict:
def split_dset_dict_detailed (datasets: dict) -> tuple:
"""
Returns the detailed dictionary of datasets.
"""
Returns the detailed dictionary of datasets.
Keys are names, values are subdictionaries containing substitutes for templated variables
Parameters:
@ -571,106 +669,106 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
"""
dset_numeric_dict = {}
dset_string_dict = {}
dset_sparse_dict = {}
for k,v in datasets.items():
# create a temp dictionary
tmp_dict = {}
# specify details required to replace templated variables later
if v[0] == 'float':
datatype = 'double'
group_dset_h5_dtype = 'native_double'
group_dset_f_dtype_default= 'real(8)'
group_dset_f_dtype_double = 'real(8)'
group_dset_f_dtype_single = 'real(4)'
group_dset_dtype_default= 'double'
group_dset_dtype_double = 'double'
group_dset_dtype_single = 'float'
default_prec = '64'
group_dset_std_dtype_out = '24.16e'
group_dset_std_dtype_in = 'lf'
group_dset_py_dtype = 'float'
elif v[0] in ['int', 'index']:
datatype = 'int64_t'
group_dset_h5_dtype = 'native_int64'
group_dset_f_dtype_default= 'integer(4)'
group_dset_f_dtype_double = 'integer(8)'
group_dset_f_dtype_single = 'integer(4)'
group_dset_dtype_default= 'int32_t'
group_dset_dtype_double = 'int64_t'
group_dset_dtype_single = 'int32_t'
default_prec = '32'
group_dset_std_dtype_out = '" PRId64 "'
group_dset_std_dtype_in = '" SCNd64 "'
group_dset_py_dtype = 'int'
elif v[0] == 'str':
datatype = 'char*'
group_dset_h5_dtype = ''
group_dset_f_dtype_default = ''
group_dset_f_dtype_double = ''
group_dset_f_dtype_single = ''
group_dset_dtype_default = 'char*'
group_dset_dtype_double = ''
group_dset_dtype_single = ''
default_prec = ''
group_dset_std_dtype_out = 's'
group_dset_std_dtype_in = 's'
group_dset_py_dtype = 'str'
# add the dset name for templates
rank = len(v[1])
datatype = v[0]
# define whether the dset is sparse
is_sparse = False
int_len_printf = {}
if 'sparse' in datatype:
is_sparse = True
int_len_printf[32] = 10
int_len_printf[16] = 5
int_len_printf[8] = 3
# get the dtype-related substitutions required to replace templated variables later
if not is_sparse:
dtype_dict = get_dtype_dict(datatype, 'dset')
else:
dtype_dict = get_dtype_dict(datatype, 'dset', rank, int_len_printf)
tmp_dict.update(dtype_dict)
# set the group_dset key to the full name of the dset
tmp_dict['group_dset'] = k
# add flag to detect index types
if 'index' == v[0]:
if 'index' in datatype:
tmp_dict['is_index'] = 'file->one_based'
else:
tmp_dict['is_index'] = 'false'
# add the datatypes for templates
tmp_dict['dtype'] = datatype
tmp_dict['group_dset_dtype'] = datatype
tmp_dict['group_dset_h5_dtype'] = group_dset_h5_dtype
tmp_dict['group_dset_f_dtype_default'] = group_dset_f_dtype_default
tmp_dict['group_dset_f_dtype_double'] = group_dset_f_dtype_double
tmp_dict['group_dset_f_dtype_single'] = group_dset_f_dtype_single
tmp_dict['group_dset_dtype_default'] = group_dset_dtype_default
tmp_dict['group_dset_dtype_double'] = group_dset_dtype_double
tmp_dict['group_dset_dtype_single'] = group_dset_dtype_single
tmp_dict['default_prec'] = default_prec
tmp_dict['group_dset_std_dtype_in'] = group_dset_std_dtype_in
tmp_dict['group_dset_std_dtype_out'] = group_dset_std_dtype_out
tmp_dict['group_dset_py_dtype'] = group_dset_py_dtype
# add the rank
tmp_dict['rank'] = len(v[1])
tmp_dict['group_dset_rank'] = str(tmp_dict['rank'])
tmp_dict['rank'] = rank
tmp_dict['group_dset_rank'] = str(rank)
# add the list of dimensions
tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
# build a list of dimensions to be inserted in the dims array initialization, e.g. {ao_num, ao_num}
dim_list = tmp_dict['dims'][0]
if tmp_dict['rank'] > 1:
for i in range(1, tmp_dict['rank']):
if rank > 1:
for i in range(1, rank):
dim_toadd = tmp_dict['dims'][i]
dim_list += f', {dim_toadd}'
tmp_dict['group_dset_dim_list'] = dim_list
if tmp_dict['rank'] == 0:
if rank == 0:
dim_f_list = ""
else:
dim_f_list = "(*)"
tmp_dict['group_dset_f_dims'] = dim_f_list
if is_sparse:
# store the max possible dim of the sparse dset (e.g. mo_num)
tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
# build printf/scanf sequence and compute line length for n-index sparse quantity
index_printf = f'*(index_sparse + {str(rank)}*i'
index_scanf = f'index_sparse + {str(rank)}*i'
# one index item consumes up to index_length characters (int32_len_printf for int32 + 1 for space)
group_dset_sparse_indices_printf = index_printf + ')'
group_dset_sparse_indices_scanf = index_scanf
sparse_line_length_32 = int_len_printf[32] + 1
sparse_line_length_16 = int_len_printf[16] + 1
sparse_line_length_8 = int_len_printf[8] + 1
# loop from 1 because we already have stored one index
for index_count in range(1,rank):
group_dset_sparse_indices_printf += f', {index_printf} + {index_count})'
group_dset_sparse_indices_scanf += f', {index_scanf} + {index_count}'
sparse_line_length_32 += int_len_printf[32] + 1
sparse_line_length_16 += int_len_printf[16] + 1
sparse_line_length_8 += int_len_printf[8] + 1
# add 24 chars occupied by the floating point value of sparse dataset + 1 char for "\n"
sparse_line_length_32 += 24 + 1
sparse_line_length_16 += 24 + 1
sparse_line_length_8 += 24 + 1
tmp_dict['sparse_line_length_32'] = str(sparse_line_length_32)
tmp_dict['sparse_line_length_16'] = str(sparse_line_length_16)
tmp_dict['sparse_line_length_8'] = str(sparse_line_length_8)
tmp_dict['group_dset_sparse_indices_printf'] = group_dset_sparse_indices_printf
tmp_dict['group_dset_sparse_indices_scanf'] = group_dset_sparse_indices_scanf
# add group name as a key-value pair to the dset dict
tmp_dict['group'] = v[2]
# split datasets in numeric- and string- based
if (datatype == 'char*'):
if 'str' in datatype:
dset_string_dict[k] = tmp_dict
elif is_sparse:
dset_sparse_dict[k] = tmp_dict
else:
dset_numeric_dict[k] = tmp_dict
return (dset_numeric_dict, dset_string_dict)
return (dset_numeric_dict, dset_string_dict, dset_sparse_dict)
def check_dim_consistency(num: dict, dset: dict) -> None:
"""
Consistency check to make sure that each dimensioning variable exists as a num attribute of some group.
"""
Consistency check to make sure that each dimensioning variable exists as a num attribute of some group.
Parameters:
num (dict) : dictionary of numerical attributes

View File

@ -2,32 +2,40 @@
#+STARTUP: latexpreview
#+SETUPFILE: docs/theme.setup
This page contains information about the general structure of the
TREXIO library. The source code of the library can be automatically
generated based on the contents of the ~trex.json~ configuration file,
which itself is compiled from different sections (groups) presented below.
This page contains information about the general structure of the
TREXIO library. The source code of the library can be automatically
generated based on the contents of the ~trex.json~ configuration file,
which itself is compiled from different sections (groups) presented
below.
For more information about the automatic generation on the source code
or regarding possible modifications, please contact the TREXIO developers.
For more information about the automatic generation on the source code
or regarding possible modifications, please contact the TREXIO
developers.
All quantities are saved in TREXIO file in atomic units.
The dimensions of the arrays in the tables below are given in
column-major order (as in Fortran), and the ordering of the dimensions
is reversed in the produced ~trex.json~ configuration file as the library is
All quantities are saved in TREXIO file in atomic units. The
dimensions of the arrays in the tables below are given in column-major
order (as in Fortran), and the ordering of the dimensions is reversed
in the produced ~trex.json~ configuration file as the library is
written in C.
TREXIO currently supports ~int~, ~float~ and ~str~ types for both single attributes and arrays.
Note, that some attributes might have ~dim~ type (e.g. ~num~ of the ~nucleus~ group).
This type is treated exactly the same as ~int~ with the only difference that ~dim~ variables
cannot be negative or zero. This additional constraint is required because ~dim~ attributes
are used internally to allocate memory and to check array boundaries in the memory-safe API.
Most of the times, the ~dim~ variables contain ~num~ suffix.
TREXIO currently supports ~int~, ~float~ and ~str~ types for both
single attributes and arrays. Note, that some attributes might have
~dim~ type (e.g. ~num~ of the ~nucleus~ group). This type is treated
exactly the same as ~int~ with the only difference that ~dim~
variables cannot be negative. This additional constraint is required
because ~dim~ attributes are used internally to allocate memory and to
check array boundaries in the memory-safe API. Most of the times, the
~dim~ variables contain the ~num~ suffix.
In Fortran, the arrays are 1-based and in most other languages the
arrays are 0-based. Hence, we introduce the ~index~ type which is an
1-based ~int~ in the Fortran interface and 0-based otherwise.
For sparse data structures such as electron replusion integrals,
the data can be too large to fit in memory and the data needs to be
fetched using multiple function calls to perform I/O on buffers.
#+begin_src python :tangle trex.json :exports none
{
#+end_src
@ -78,14 +86,14 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
#+CALL: json(data=electron, title="electron")
#+RESULTS:
:RESULTS:
:results:
#+begin_src python :tangle trex.json
"electron": {
"up_num" : [ "int", [] ]
, "dn_num" : [ "int", [] ]
"up_num" : [ "int", [] ]
, "dn_num" : [ "int", [] ]
} ,
#+end_src
:END:
:end:
* Nucleus (nucleus group)
@ -100,20 +108,22 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
| ~coord~ | ~float~ | ~(3,nucleus.num)~ | Coordinates of the atoms |
| ~label~ | ~str~ | ~(nucleus.num)~ | Atom labels |
| ~point_group~ | ~str~ | | Symmetry point group |
| ~repulsion~ | ~float~ | | Nuclear repulsion energy |
#+CALL: json(data=nucleus, title="nucleus")
#+RESULTS:
:RESULTS:
:results:
#+begin_src python :tangle trex.json
"nucleus": {
"num" : [ "dim" , [] ]
, "charge" : [ "float", [ "nucleus.num" ] ]
, "coord" : [ "float", [ "nucleus.num", "3" ] ]
, "label" : [ "str" , [ "nucleus.num" ] ]
, "point_group" : [ "str" , [] ]
"num" : [ "dim" , [] ]
, "charge" : [ "float", [ "nucleus.num" ] ]
, "coord" : [ "float", [ "nucleus.num", "3" ] ]
, "label" : [ "str" , [ "nucleus.num" ] ]
, "point_group" : [ "str" , [] ]
, "repulsion" : [ "float", [] ]
} ,
#+end_src
:END:
:end:
* Effective core potentials (ecp group)
@ -617,15 +627,18 @@ prim_factor =
:end:
* TODO Slater determinants
* TODO Reduced density matrices (rdm group)
* Reduced density matrices (rdm group)
#+NAME: rdm
| Variable | Type | Dimensions | Description |
|------------+----------------+------------------------------------+-------------|
| ~one_e~ | ~float~ | ~(mo.num, mo.num)~ | |
| ~one_e_up~ | ~float~ | ~(mo.num, mo.num)~ | |
| ~one_e_dn~ | ~float~ | ~(mo.num, mo.num)~ | |
| ~two_e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | |
| Variable | Type | Dimensions | Description |
|-----------+----------------+------------------------------------+-----------------------------------------------------------------------|
| ~1e~ | ~float~ | ~(mo.num, mo.num)~ | One body density matrix |
| ~1e_up~ | ~float~ | ~(mo.num, mo.num)~ | \uparrow-spin component of the one body density matrix |
| ~1e_dn~ | ~float~ | ~(mo.num, mo.num)~ | \downarrow-spin component of the one body density matrix |
| ~2e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | Two-body reduced density matrix (spin trace) |
| ~2e_upup~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\uparrow component of the two-body reduced density matrix |
| ~2e_dndn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \downarrow\downarrow component of the two-body reduced density matrix |
| ~2e_updn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\downarrow component of the two-body reduced density matrix |
#+CALL: json(data=rdm, title="rdm", last=1)
@ -633,10 +646,13 @@ prim_factor =
:results:
#+begin_src python :tangle trex.json
"rdm": {
"one_e" : [ "float" , [ "mo.num", "mo.num" ] ]
, "one_e_up" : [ "float" , [ "mo.num", "mo.num" ] ]
, "one_e_dn" : [ "float" , [ "mo.num", "mo.num" ] ]
, "two_e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
"1e" : [ "float" , [ "mo.num", "mo.num" ] ]
, "1e_up" : [ "float" , [ "mo.num", "mo.num" ] ]
, "1e_dn" : [ "float" , [ "mo.num", "mo.num" ] ]
, "2e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
, "2e_upup" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
, "2e_dndn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
, "2e_updn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
}
#+end_src
:end: