mirror of
https://github.com/TREX-CoE/trexio.git
synced 2024-11-03 20:54:07 +01:00
Merge pull request #70 from TREX-CoE/add-sparse-datasets
- Add sparse datasets - Modularize generator_tools.py
This commit is contained in:
commit
8ca74ffef1
4
.gitignore
vendored
4
.gitignore
vendored
@ -11,6 +11,8 @@ m4/ltsugar.m4
|
||||
m4/ltversion.m4
|
||||
m4/lt~obsolete.m4
|
||||
autom4te.cache/
|
||||
build-config/
|
||||
ar-lib
|
||||
compile
|
||||
config.guess
|
||||
config.log
|
||||
@ -38,5 +40,3 @@ test-suite.log
|
||||
*.h5
|
||||
trexio-*.tar.gz
|
||||
trex.json
|
||||
|
||||
|
||||
|
14
Makefile.am
14
Makefile.am
@ -90,6 +90,7 @@ TESTS_C = \
|
||||
tests/io_num_text \
|
||||
tests/io_dset_float_text \
|
||||
tests/io_dset_int_text \
|
||||
tests/io_dset_sparse_text \
|
||||
tests/io_safe_dset_float_text \
|
||||
tests/io_str_text \
|
||||
tests/io_dset_str_text \
|
||||
@ -102,6 +103,7 @@ TESTS_C += \
|
||||
tests/io_num_hdf5 \
|
||||
tests/io_dset_float_hdf5 \
|
||||
tests/io_dset_int_hdf5 \
|
||||
tests/io_dset_sparse_hdf5 \
|
||||
tests/io_safe_dset_float_hdf5 \
|
||||
tests/io_str_hdf5 \
|
||||
tests/io_dset_str_hdf5 \
|
||||
@ -117,8 +119,8 @@ check_PROGRAMS = $(TESTS)
|
||||
# specify common LDADD options for all tests
|
||||
LDADD = src/libtrexio.la
|
||||
|
||||
|
||||
test_trexio_f = $(srcdir)/tests/trexio_f.f90
|
||||
CLEANFILES += $(test_trexio_f)
|
||||
|
||||
$(test_trexio_f): $(trexio_f)
|
||||
cp $(trexio_f) $(test_trexio_f)
|
||||
@ -126,7 +128,6 @@ $(test_trexio_f): $(trexio_f)
|
||||
trexio.mod: tests/trexio_f.o
|
||||
|
||||
tests_test_f_SOURCES = $(test_trexio_f) tests/test_f.f90
|
||||
tests_test_f_LDFLAGS = -no-install
|
||||
|
||||
clean-local:
|
||||
-rm -rf -- *.dir/ *.h5 __pycache__/
|
||||
@ -134,7 +135,7 @@ clean-local:
|
||||
# =============== DOCUMENTATION =============== #
|
||||
|
||||
HTML_TANGLED = docs/index.html \
|
||||
docs/Sparse.html \
|
||||
docs/examples.html \
|
||||
docs/templator_hdf5.html \
|
||||
docs/trex.html \
|
||||
docs/README.html \
|
||||
@ -179,11 +180,13 @@ BUILT_SOURCES += $(SOURCES) $(trexio_f) $(test_trexio_f)
|
||||
|
||||
all: .git_hash
|
||||
|
||||
GENERATOR_FILES = $(srcdir)/tools/generator.py \
|
||||
$(srcdir)/tools/generator_tools.py
|
||||
|
||||
$(SOURCES): $(trexio_f)
|
||||
src/trexio.c: $(trexio_h)
|
||||
|
||||
$(trexio_f): $(ORG_FILES)
|
||||
$(trexio_f): $(ORG_FILES) $(GENERATOR_FILES)
|
||||
cd $(srcdir)/tools && ./build_trexio.sh
|
||||
|
||||
$(htmlizer): $(ORG_FILES) $(srcdir)/src/README.org
|
||||
@ -227,7 +230,7 @@ $(pytrexio_py): $(pytrexio_c)
|
||||
|
||||
# Build Python module and C wrapper code for TREXIO using SWIG
|
||||
# [?] swig -python -threads pytrexio.i ----> Add thread support for all the interface
|
||||
$(pytrexio_c): $(ORG_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
|
||||
$(pytrexio_c): $(ORG_FILES) $(GENERATOR_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
|
||||
cp $(trexio_h) src/
|
||||
cd src/ && \
|
||||
$(SWIG) -python -py3 -o pytrexio_wrap.c pytrexio.i
|
||||
@ -248,4 +251,3 @@ CLEANFILES += $(pytrexio_c) \
|
||||
.PHONY: cppcheck python-test python-install python-sdist check-numpy FORCE
|
||||
|
||||
endif
|
||||
|
||||
|
22
Sparse.org
22
Sparse.org
@ -1,22 +0,0 @@
|
||||
See templator_front.org
|
||||
|
||||
* Text back end
|
||||
As the size of the dataset should be extensible, the simplest
|
||||
solution is to use one file for each sparse data set, and store a
|
||||
the name of this file in the group.
|
||||
Each integral can be a line in the file:
|
||||
i j k l x
|
||||
which can be read with "%10ld %10ld %10ld %10ld %24.16e".
|
||||
The offset can be used with ~fseek(69L*offset, SEEK_SET)~
|
||||
|
||||
* HDF5 Back end
|
||||
|
||||
We need to declare the number of rows of the dataset as
|
||||
~UNLIMITED~. This requires to use the ~Chunked~ storage, and the
|
||||
chunks should absolutely not be larger than 1MB.
|
||||
|
||||
To extend the storage, see :
|
||||
https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html
|
||||
(figure 17)
|
||||
|
||||
If the offset+num > nmax, we need to extend the dataset.
|
262
examples.org
Normal file
262
examples.org
Normal file
@ -0,0 +1,262 @@
|
||||
#+TITLE: Examples
|
||||
#+STARTUP: latexpreview
|
||||
#+SETUPFILE: docs/theme.setup
|
||||
|
||||
|
||||
* Accessing sparse quantities
|
||||
** Fortran
|
||||
:PROPERTIES:
|
||||
:header-args: :tangle print_energy.f90
|
||||
:END:
|
||||
|
||||
#+begin_src f90
|
||||
program print_energy
|
||||
use trexio
|
||||
implicit none
|
||||
|
||||
character*(128) :: filename ! Name of the input file
|
||||
integer :: rc ! Return code for error checking
|
||||
integer(8) :: f ! TREXIO file handle
|
||||
character*(128) :: err_msg ! Error message
|
||||
#+end_src
|
||||
|
||||
This program computes the energy as:
|
||||
|
||||
\[
|
||||
E = E_{\text{NN}} + \sum_{ij} D_{ij}\, \langle i | h | j \rangle\,
|
||||
+\, \frac{1}{2} \sum_{ijkl} \Gamma_{ijkl}\, \langle i j | k l
|
||||
\rangle\; \textrm{ with } \; 0 < i,j,k,l \le n
|
||||
\]
|
||||
|
||||
One needs to read from the TREXIO file:
|
||||
|
||||
- $n$ :: The number of molecular orbitals
|
||||
- $E_{\text{NN}}$ :: The nuclear repulsion energy
|
||||
- $D_{ij}$ :: The one-body reduced density matrix
|
||||
- $\langle i |h| j \rangle$ :: The one-electron Hamiltonian integrals
|
||||
- $\Gamma_{ijkl}$ :: The two-body reduced density matrix
|
||||
- $\langle i j | k l \rangle$ :: The electron repulsion integrals
|
||||
|
||||
#+begin_src f90
|
||||
integer :: n
|
||||
double precision :: E, E_nn
|
||||
double precision, allocatable :: D(:,:), h0(:,:)
|
||||
double precision, allocatable :: G(:,:,:,:), W(:,:,:,:)
|
||||
#+end_src
|
||||
|
||||
*** Declare Temporary variables
|
||||
|
||||
#+begin_src f90
|
||||
integer :: i, j, k, l, m
|
||||
integer(8), parameter :: BUFSIZE = 100000_8
|
||||
integer(8) :: offset, icount, size_max
|
||||
integer :: buffer_index(4,BUFSIZE)
|
||||
double precision :: buffer_values(BUFSIZE)
|
||||
|
||||
double precision, external :: ddot ! BLAS dot product
|
||||
#+end_src
|
||||
|
||||
*** Obtain the name of the TREXIO file from the command line, and open it for reading
|
||||
|
||||
#+begin_src f90
|
||||
call getarg(1, filename)
|
||||
|
||||
f = trexio_open (filename, 'r', TREXIO_HDF5, rc)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error opening TREXIO file: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
#+end_src
|
||||
|
||||
*** Read the nuclear repulsion energy
|
||||
|
||||
#+begin_src f90
|
||||
rc = trexio_read_nucleus_repulsion(f, E_nn)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error reading nuclear repulsion: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
#+end_src
|
||||
|
||||
*** Read the number of molecular orbitals
|
||||
|
||||
#+begin_src f90
|
||||
rc = trexio_read_mo_num(f, n)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error reading number of MOs: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
#+end_src
|
||||
|
||||
*** Allocate memory
|
||||
|
||||
#+begin_src f90
|
||||
allocate( D(n,n), h0(n,n) )
|
||||
allocate( G(n,n,n,n), W(n,n,n,n) )
|
||||
G(:,:,:,:) = 0.d0
|
||||
W(:,:,:,:) = 0.d0
|
||||
#+end_src
|
||||
|
||||
*** Read one-electron quantities
|
||||
|
||||
#+begin_src f90
|
||||
rc = trexio_has_mo_1e_int_core_hamiltonian(f)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
stop 'No core hamiltonian in file'
|
||||
end if
|
||||
|
||||
rc = trexio_read_mo_1e_int_core_hamiltonian(f, h0)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error reading core Hamiltonian: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
|
||||
|
||||
rc = trexio_has_rdm_1e(f)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
stop 'No 1e RDM in file'
|
||||
end if
|
||||
|
||||
rc = trexio_read_rdm_1e(f, D)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error reading one-body RDM: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
#+end_src
|
||||
|
||||
*** Read two-electron quantities
|
||||
|
||||
Reading is done with OpenMP. Each thread reads its own buffer, and
|
||||
the buffers are then processed in parallel.
|
||||
|
||||
Reading the file requires a lock, so it is done in a critical
|
||||
section. The ~offset~ variable is shared, and it is incremented in
|
||||
the critical section. For each read, the function returns in
|
||||
~icount~ the number of read integrals, so this variable needs also
|
||||
to be protected in the critical section when modified.
|
||||
|
||||
**** Electron repulsion integrals
|
||||
|
||||
#+begin_src f90
|
||||
rc = trexio_has_mo_2e_int_eri(f)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
stop 'No electron repulsion integrals in file'
|
||||
end if
|
||||
|
||||
rc = trexio_read_mo_2e_int_eri_size (f, size_max)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error reading number of ERIs: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
|
||||
offset = 0_8
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
|
||||
!$OMP buffer_index, buffer_values, m)
|
||||
icount = BUFSIZE
|
||||
do while (icount == BUFSIZE)
|
||||
!$OMP CRITICAL
|
||||
if (offset < size_max) then
|
||||
rc = trexio_read_mo_2e_int_eri(f, offset, icount, buffer_index, buffer_values)
|
||||
offset = offset + icount
|
||||
else
|
||||
icount = 0
|
||||
end if
|
||||
!$OMP END CRITICAL
|
||||
do m=1,icount
|
||||
i = buffer_index(1,m)
|
||||
j = buffer_index(2,m)
|
||||
k = buffer_index(3,m)
|
||||
l = buffer_index(4,m)
|
||||
W(i,j,k,l) = buffer_values(m)
|
||||
W(k,j,i,l) = buffer_values(m)
|
||||
W(i,l,k,j) = buffer_values(m)
|
||||
W(k,l,i,j) = buffer_values(m)
|
||||
W(j,i,l,k) = buffer_values(m)
|
||||
W(j,k,l,i) = buffer_values(m)
|
||||
W(l,i,j,k) = buffer_values(m)
|
||||
W(l,k,j,i) = buffer_values(m)
|
||||
end do
|
||||
end do
|
||||
!$OMP END PARALLEL
|
||||
#+end_src
|
||||
|
||||
**** Reduced density matrix
|
||||
|
||||
#+begin_src f90
|
||||
rc = trexio_has_rdm_2e(f)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
stop 'No two-body density matrix in file'
|
||||
end if
|
||||
|
||||
rc = trexio_read_rdm_2e_size (f, size_max)
|
||||
if (rc /= TREXIO_SUCCESS) then
|
||||
call trexio_string_of_error(rc, err_msg)
|
||||
print *, 'Error reading number of 2-RDM elements: '//trim(err_msg)
|
||||
stop
|
||||
end if
|
||||
|
||||
offset = 0_8
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
|
||||
!$OMP buffer_index, buffer_values, m)
|
||||
icount = bufsize
|
||||
do while (offset < size_max)
|
||||
!$OMP CRITICAL
|
||||
if (offset < size_max) then
|
||||
rc = trexio_read_rdm_2e(f, offset, icount, buffer_index, buffer_values)
|
||||
offset = offset + icount
|
||||
else
|
||||
icount = 0
|
||||
end if
|
||||
!$OMP END CRITICAL
|
||||
do m=1,icount
|
||||
i = buffer_index(1,m)
|
||||
j = buffer_index(2,m)
|
||||
k = buffer_index(3,m)
|
||||
l = buffer_index(4,m)
|
||||
G(i,j,k,l) = buffer_values(m)
|
||||
end do
|
||||
end do
|
||||
!$OMP END PARALLEL
|
||||
|
||||
#+end_src
|
||||
|
||||
*** Compute the energy
|
||||
|
||||
As $(n,m)$ 2D arrays are stored in memory as $(\n times m)$ 1D
|
||||
arrays, we could pass the matrices to the ~ddot~ BLAS function to
|
||||
perform the summations in a single call for the 1-electron quantities.
|
||||
Instead, we prefer to interleave the 1-electron (negative) and
|
||||
2-electron (positive) summations to have a better cancellation of
|
||||
numerical errors.
|
||||
|
||||
Here $n^4$ can be larger than the largest possible 32-bit integer,
|
||||
so it is not safe to pass $n^4$ to the ~ddot~ BLAS
|
||||
function. Hence, we perform $n^2$ loops, using vectors of size $n^2$.
|
||||
|
||||
#+begin_src f90
|
||||
|
||||
E = 0.d0
|
||||
do l=1,n
|
||||
E = E + ddot( n, D(1,l), 1, h0(1,l), 1 )
|
||||
do k=1,n
|
||||
E = E + 0.5d0 * ddot( n*n, G(1,1,k,l), 1, W(1,1,k,l), 1 )
|
||||
end do
|
||||
end do
|
||||
E = E + E_nn
|
||||
|
||||
print *, 'Energy: ', E
|
||||
#+end_src
|
||||
|
||||
*** Terminate
|
||||
|
||||
#+begin_src f90
|
||||
deallocate( D, h0, G, W )
|
||||
|
||||
end program
|
||||
#+end_src
|
File diff suppressed because it is too large
Load Diff
@ -12,5 +12,5 @@ cat populated/pop_read_*.c >> trexio_hdf5.c
|
||||
cat populated/pop_write_*.c >> trexio_hdf5.c
|
||||
cat populated/pop_hrw_*.h >> trexio_hdf5.h
|
||||
|
||||
cat helpers_hdf5.c >> trexio_hdf5.c
|
||||
cat suffix_hdf5.h >> trexio_hdf5.h
|
||||
|
||||
|
@ -201,15 +201,15 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
|
||||
/* Write the dimensioning variables */
|
||||
const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$);
|
||||
const hid_t dspace = H5Screate(H5S_SCALAR);
|
||||
|
||||
const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME,
|
||||
|
||||
const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME,
|
||||
dtype, dspace, H5P_DEFAULT, H5P_DEFAULT);
|
||||
if (num_id <= 0) {
|
||||
H5Sclose(dspace);
|
||||
H5Tclose(dtype);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
|
||||
const herr_t status = H5Awrite(num_id, dtype, &(num));
|
||||
if (status < 0) {
|
||||
H5Aclose(num_id);
|
||||
@ -217,7 +217,7 @@ trexio_hdf5_write_$group_num$ (trexio_t* const file, const $group_num_dtype_doub
|
||||
H5Tclose(dtype);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
H5Sclose(dspace);
|
||||
H5Aclose(num_id);
|
||||
H5Tclose(dtype);
|
||||
@ -262,7 +262,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, $group_dset_dtype$* const $
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
|
||||
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
|
||||
|
||||
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
|
||||
|
||||
@ -317,7 +317,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const $group_dset_dtype$*
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
|
||||
if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
|
||||
|
||||
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||
|
||||
@ -372,6 +372,207 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||
}
|
||||
#+end_src
|
||||
|
||||
** Template for HDF5 has/read/write the dataset of sparse data
|
||||
|
||||
Sparse data is stored using extensible datasets of HDF5. Extensibility is required
|
||||
due to the fact that the sparse data will be written in chunks of user-defined size.
|
||||
|
||||
#+begin_src c :tangle hrw_dset_sparse_hdf5.h :exports none
|
||||
trexio_exit_code trexio_hdf5_has_$group_dset$(trexio_t* const file);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_hdf5_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int32_t* index_sparse, const double* value_sparse);
|
||||
trexio_exit_code trexio_hdf5_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle write_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_write_$group_dset$ (trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
const int32_t* index_sparse,
|
||||
const double* value_sparse)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||
|
||||
hid_t index_dtype;
|
||||
void* index_p;
|
||||
uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
|
||||
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
|
||||
if (size_max < UINT8_MAX) {
|
||||
uint8_t* index = CALLOC(size_ranked, uint8_t);
|
||||
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
|
||||
for (int64_t i=0; i<size_ranked; ++i){
|
||||
index[i] = (uint8_t) index_sparse[i];
|
||||
}
|
||||
index_p = index;
|
||||
index_dtype = H5T_NATIVE_UINT8;
|
||||
} else if (size_max < UINT16_MAX) {
|
||||
uint16_t* index = CALLOC(size_ranked, uint16_t);
|
||||
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
|
||||
for (int64_t i=0; i<size_ranked; ++i){
|
||||
index[i] = (uint16_t) index_sparse[i];
|
||||
}
|
||||
index_p = index;
|
||||
index_dtype = H5T_NATIVE_UINT16;
|
||||
} else {
|
||||
index_p = (int32_t*) index_sparse;
|
||||
index_dtype = H5T_NATIVE_INT32;
|
||||
}
|
||||
|
||||
/* Store float values in double precision */
|
||||
hid_t value_dtype = H5T_NATIVE_DOUBLE;
|
||||
/* Arrays of chunk dims that will be used for chunking the dataset */
|
||||
const hsize_t chunk_i_dims[1] = {size_ranked};
|
||||
const hsize_t chunk_v_dims[1] = {size};
|
||||
|
||||
/* Indices and values are stored as 2 independent datasets in the HDF5 file */
|
||||
char dset_index_name[256] = "\0";
|
||||
char dset_value_name[256] = "\0";
|
||||
/* Build the names of the datasets */
|
||||
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
|
||||
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
|
||||
|
||||
trexio_exit_code rc_write = TREXIO_FAILURE;
|
||||
/* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
|
||||
if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
|
||||
/* If the file does not exist -> create it and write */
|
||||
|
||||
/* Create chunked dataset with index_dtype datatype and write indices into it */
|
||||
rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
|
||||
if (index_p != index_sparse) FREE(index_p);
|
||||
if (rc_write != TREXIO_SUCCESS) return rc_write;
|
||||
|
||||
/* Create chunked dataset with value_dtype datatype and write values into it */
|
||||
rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
|
||||
if (rc_write != TREXIO_SUCCESS) return rc_write;
|
||||
|
||||
} else {
|
||||
/* If the file exists -> open it and write */
|
||||
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
|
||||
hsize_t offset_v[1] = {(hsize_t) offset_file};
|
||||
|
||||
/* Create chunked dataset with index_dtype datatype and write indices into it */
|
||||
rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
|
||||
if (index_p != index_sparse) FREE(index_p);
|
||||
if (rc_write != TREXIO_SUCCESS) return rc_write;
|
||||
|
||||
/* Create chunked dataset with value_dtype datatype and write values into it */
|
||||
rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
|
||||
if (rc_write != TREXIO_SUCCESS) return rc_write;
|
||||
|
||||
}
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_read_$group_dset$ (trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
int64_t* const eof_read_size,
|
||||
int32_t* const index_read,
|
||||
double* const value_read)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
|
||||
|
||||
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
|
||||
|
||||
/* Indices and values are stored as 2 independent datasets in the HDF5 file */
|
||||
char dset_index_name[256] = "\0";
|
||||
char dset_value_name[256] = "\0";
|
||||
/* Build the names of the datasets */
|
||||
strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
|
||||
strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
|
||||
|
||||
hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
|
||||
hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$};
|
||||
|
||||
hsize_t offset_v[1] = {(hsize_t) offset_file};
|
||||
hsize_t count_v[1] = {(hsize_t) size};
|
||||
|
||||
int is_index = 1, is_value = 0;
|
||||
trexio_exit_code rc_read;
|
||||
|
||||
// attempt to read indices
|
||||
rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read);
|
||||
if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
|
||||
// attempt to read values
|
||||
// when EOF is encountered - the count_v[0] is modified and contains the number of elements being read
|
||||
rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read);
|
||||
if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
|
||||
|
||||
return rc_read;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
|
||||
|
||||
hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT);
|
||||
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
hid_t fspace_id = H5Dget_space(dset_id);
|
||||
if (fspace_id < 0) {
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
// allocate space for the dimensions to be read
|
||||
hsize_t ddims[1] = {0};
|
||||
|
||||
// get the rank and dimensions of the dataset
|
||||
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
|
||||
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(fspace_id);
|
||||
|
||||
*size_max = (int64_t) ddims[0];
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle has_dset_sparse_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||
{
|
||||
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
trexio_hdf5_t* f = (trexio_hdf5_t*) file;
|
||||
|
||||
herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values");
|
||||
/* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
|
||||
if (status == 1){
|
||||
return TREXIO_SUCCESS;
|
||||
} else if (status == 0) {
|
||||
return TREXIO_HAS_NOT;
|
||||
} else {
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
** Template for HDF5 has/read/write the dataset of strings
|
||||
|
||||
#+begin_src c :tangle hrw_dset_str_hdf5.h :exports none
|
||||
@ -403,10 +604,10 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
|
||||
return TREXIO_ALLOCATION_FAILED;
|
||||
}
|
||||
|
||||
hid_t dspace = H5Dget_space(dset_id);
|
||||
hid_t dspace = H5Dget_space(dset_id);
|
||||
if (dset_id <= 0) {
|
||||
FREE(ddims);
|
||||
H5Dclose(dset_id);
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
@ -442,7 +643,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
|
||||
if (rdata == NULL) {
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
H5Tclose(memtype);
|
||||
H5Tclose(memtype);
|
||||
return TREXIO_ALLOCATION_FAILED;
|
||||
}
|
||||
|
||||
@ -451,7 +652,7 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
|
||||
FREE(rdata);
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
H5Tclose(memtype);
|
||||
H5Tclose(memtype);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
@ -474,11 +675,11 @@ trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, c
|
||||
FREE(rdata);
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
H5Tclose(memtype);
|
||||
H5Tclose(memtype);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
FREE(rdata);
|
||||
FREE(rdata);
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
H5Tclose(memtype);
|
||||
@ -509,7 +710,7 @@ trexio_hdf5_write_$group_dset$ (trexio_t* const file, const char** $group_dset$,
|
||||
|
||||
if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {
|
||||
|
||||
/* code to create dataset */
|
||||
/* code to create dataset */
|
||||
hid_t filetype = H5Tcopy (H5T_FORTRAN_S1);
|
||||
if (filetype <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
@ -577,7 +778,7 @@ trexio_hdf5_has_$group_dset$ (trexio_t* const file)
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
** Template for HDF5 has/read/write the string attribute
|
||||
|
||||
#+begin_src c :tangle hrw_attr_str_hdf5.h :exports none
|
||||
@ -655,7 +856,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
|
||||
|
||||
const hid_t dspace_id = H5Screate(H5S_SCALAR);
|
||||
if (dspace_id <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
|
||||
/* Create the $group_str$ attribute of $group$ group */
|
||||
const hid_t str_id = H5Acreate(f->$group$_group, $GROUP_STR$_NAME, dtype_id, dspace_id,
|
||||
H5P_DEFAULT, H5P_DEFAULT);
|
||||
@ -665,7 +866,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
|
||||
H5Tclose(dtype_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
|
||||
status = H5Awrite(str_id, dtype_id, str);
|
||||
if (status < 0) {
|
||||
H5Aclose(str_id);
|
||||
@ -673,7 +874,7 @@ trexio_hdf5_write_$group_str$ (trexio_t* const file, const char* str)
|
||||
H5Tclose(dtype_id);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
H5Aclose(str_id);
|
||||
H5Sclose(dspace_id);
|
||||
H5Tclose(dtype_id);
|
||||
@ -703,11 +904,256 @@ trexio_hdf5_has_$group_str$ (trexio_t* const file)
|
||||
|
||||
}
|
||||
#+end_src
|
||||
** Helper functions
|
||||
|
||||
#+begin_src c :tangle helpers_hdf5.c
|
||||
trexio_exit_code
|
||||
trexio_hdf5_create_write_dset_sparse (const hid_t group_id,
|
||||
const char* dset_name,
|
||||
const hid_t dtype_id,
|
||||
const hsize_t* chunk_dims,
|
||||
const void* data_sparse)
|
||||
{
|
||||
const int h5_rank = 1;
|
||||
const hsize_t maxdims[1] = {H5S_UNLIMITED};
|
||||
|
||||
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
|
||||
if (dspace < 0) return TREXIO_INVALID_ID;
|
||||
|
||||
hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
|
||||
if (prop < 0) {
|
||||
H5Sclose(dspace);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
|
||||
if (status < 0) {
|
||||
H5Sclose(dspace);
|
||||
H5Pclose(prop);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
hid_t dset_id = H5Dcreate(group_id,
|
||||
dset_name,
|
||||
dtype_id,
|
||||
dspace,
|
||||
H5P_DEFAULT,
|
||||
prop,
|
||||
H5P_DEFAULT);
|
||||
if (dset_id < 0) {
|
||||
H5Sclose(dspace);
|
||||
H5Pclose(prop);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
status = H5Dwrite(dset_id,
|
||||
dtype_id,
|
||||
H5S_ALL, H5S_ALL, H5P_DEFAULT,
|
||||
data_sparse);
|
||||
H5Sclose(dspace);
|
||||
H5Pclose(prop);
|
||||
H5Dclose(dset_id);
|
||||
if (status < 0) return TREXIO_FAILURE;
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
trexio_exit_code
|
||||
trexio_hdf5_open_write_dset_sparse (const hid_t group_id,
|
||||
const char* dset_name,
|
||||
const hid_t dtype_id,
|
||||
const hsize_t* chunk_dims,
|
||||
const hsize_t* offset_file,
|
||||
const void* data_sparse)
|
||||
{
|
||||
const int h5_rank = 1;
|
||||
|
||||
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
|
||||
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
hid_t fspace = H5Dget_space(dset_id);
|
||||
if (fspace < 0) {
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
// allocate space for the dimensions to be read
|
||||
hsize_t ddims[1] = {0};
|
||||
|
||||
// get the rank and dimensions of the dataset
|
||||
int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
|
||||
ddims[0] += chunk_dims[0];
|
||||
|
||||
// extend the dset size
|
||||
herr_t status = H5Dset_extent(dset_id, ddims);
|
||||
if (status < 0) {
|
||||
H5Sclose(fspace);
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
// close and reopen the file dataspace to take into account the extension
|
||||
H5Sclose(fspace);
|
||||
fspace = H5Dget_space(dset_id);
|
||||
if (fspace < 0) {
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
// select hyperslab to be written using chunk_dims and offset values
|
||||
status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
|
||||
if (status < 0) {
|
||||
H5Sclose(fspace);
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
// create memory dataspace to write from
|
||||
hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
|
||||
if (dspace < 0) {
|
||||
H5Sclose(fspace);
|
||||
H5Sclose(dspace);
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
status = H5Dwrite(dset_id,
|
||||
dtype_id,
|
||||
dspace, fspace, H5P_DEFAULT,
|
||||
data_sparse);
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(dspace);
|
||||
H5Sclose(fspace);
|
||||
if (status < 0) return TREXIO_FAILURE;
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
trexio_exit_code
|
||||
trexio_hdf5_open_read_dset_sparse (const hid_t group_id,
|
||||
const char* dset_name,
|
||||
const hsize_t* offset_file,
|
||||
hsize_t* const size_read,
|
||||
int64_t* const eof_read_size,
|
||||
const int is_index,
|
||||
void* const data_sparse
|
||||
)
|
||||
{
|
||||
const int h5_rank = 1;
|
||||
|
||||
// get the dataset handle
|
||||
hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
|
||||
if (dset_id <= 0) return TREXIO_INVALID_ID;
|
||||
|
||||
// get the dataspace of the dataset
|
||||
hid_t fspace_id = H5Dget_space(dset_id);
|
||||
if (fspace_id < 0) {
|
||||
H5Dclose(dset_id);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
/* get dims of the dset stored in the file to check whether reading with user-provided chunk size
|
||||
will reach end of the dataset (i.e. EOF in TEXT back end)
|
||||
,*/
|
||||
hsize_t ddims[1] = {0};
|
||||
int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
|
||||
hsize_t max_offset = offset_file[0] + size_read[0];
|
||||
|
||||
int is_EOF = 0;
|
||||
// if max_offset exceed current dim of the dset => EOF
|
||||
if (max_offset > ddims[0]) {
|
||||
is_EOF = 1;
|
||||
// lower the value of count to reduce the number of elements which will be read
|
||||
size_read[0] -= max_offset - ddims[0];
|
||||
// modified the value of eof_read_size passed by address
|
||||
if (eof_read_size != NULL) *eof_read_size = size_read[0];
|
||||
}
|
||||
|
||||
// special case when reading int indices
|
||||
int64_t size_ranked = (int64_t) size_read[0];
|
||||
void* index_p;
|
||||
// read the datatype from the dataset and compare with the pre-defined values
|
||||
hid_t dtype = H5Dget_type(dset_id);
|
||||
if (is_index == 1) {
|
||||
if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
|
||||
uint8_t* index = CALLOC(size_ranked, uint8_t);
|
||||
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
|
||||
index_p = index;
|
||||
} else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
|
||||
uint16_t* index = CALLOC(size_ranked, uint16_t);
|
||||
if (index == NULL) return TREXIO_ALLOCATION_FAILED;
|
||||
index_p = index;
|
||||
} else {
|
||||
index_p = data_sparse;
|
||||
}
|
||||
}
|
||||
|
||||
herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL);
|
||||
if (status < 0) {
|
||||
H5Sclose(fspace_id);
|
||||
H5Dclose(dset_id);
|
||||
if (index_p != data_sparse) FREE(index_p);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL);
|
||||
if (memspace_id < 0) {
|
||||
H5Sclose(fspace_id);
|
||||
H5Dclose(dset_id);
|
||||
if (index_p != data_sparse) FREE(index_p);
|
||||
return TREXIO_INVALID_ID;
|
||||
}
|
||||
|
||||
if (is_index == 1) {
|
||||
status = H5Dread(dset_id,
|
||||
dtype,
|
||||
memspace_id, fspace_id, H5P_DEFAULT,
|
||||
index_p);
|
||||
} else {
|
||||
status = H5Dread(dset_id,
|
||||
dtype,
|
||||
memspace_id, fspace_id, H5P_DEFAULT,
|
||||
data_sparse);
|
||||
}
|
||||
|
||||
H5Sclose(fspace_id);
|
||||
H5Sclose(memspace_id);
|
||||
H5Dclose(dset_id);
|
||||
if (status < 0) {
|
||||
if (index_p != data_sparse) FREE(index_p);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
if (is_index == 1) {
|
||||
if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
|
||||
uint8_t* index = (uint8_t*) index_p;
|
||||
for (int64_t i=0; i<size_ranked; ++i){
|
||||
((int32_t*)data_sparse)[i] = (int32_t) index[i];
|
||||
}
|
||||
FREE(index_p);
|
||||
} else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
|
||||
uint16_t* index = (uint16_t*) index_p;
|
||||
for (int64_t i=0; i<size_ranked; ++i){
|
||||
((int32_t*)data_sparse)[i] = (int32_t) index[i];
|
||||
}
|
||||
FREE(index_p);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_EOF == 1) return TREXIO_END;
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
* Constant file suffixes (not used by the generator) :noexport:
|
||||
|
||||
#+begin_src c :tangle suffix_hdf5.h
|
||||
trexio_exit_code trexio_hdf5_create_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const void* data_sparse);
|
||||
trexio_exit_code trexio_hdf5_open_write_dset_sparse (const hid_t group_id, const char* dset_name, const hid_t dtype_id, const hsize_t* chunk_dims, const hsize_t* offset_file, const void* data_sparse);
|
||||
trexio_exit_code trexio_hdf5_open_read_dset_sparse (const hid_t group_id, const char* dset_name, const hsize_t* offset_file, hsize_t* const size_read, int64_t* const eof_read_size, const int is_index, void* const data_sparse);
|
||||
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
|
||||
|
@ -19,23 +19,26 @@ cat populated/pop_flush_group_text.h >> trexio_text.h
|
||||
|
||||
cat populated/pop_has_dset_data_text.c >> trexio_text.c
|
||||
cat populated/pop_has_dset_str_text.c >> trexio_text.c
|
||||
cat populated/pop_has_dset_sparse_text.c >> trexio_text.c
|
||||
cat populated/pop_has_attr_num_text.c >> trexio_text.c
|
||||
cat populated/pop_has_attr_str_text.c >> trexio_text.c
|
||||
|
||||
cat populated/pop_read_dset_data_text.c >> trexio_text.c
|
||||
cat populated/pop_read_dset_str_text.c >> trexio_text.c
|
||||
cat populated/pop_read_dset_sparse_text.c >> trexio_text.c
|
||||
cat populated/pop_read_attr_str_text.c >> trexio_text.c
|
||||
cat populated/pop_read_attr_num_text.c >> trexio_text.c
|
||||
|
||||
cat populated/pop_write_dset_data_text.c >> trexio_text.c
|
||||
cat populated/pop_write_dset_str_text.c >> trexio_text.c
|
||||
cat populated/pop_write_dset_sparse_text.c >> trexio_text.c
|
||||
cat populated/pop_write_attr_str_text.c >> trexio_text.c
|
||||
cat populated/pop_write_attr_num_text.c >> trexio_text.c
|
||||
|
||||
cat populated/pop_hrw_dset_data_text.h >> trexio_text.h
|
||||
cat populated/pop_hrw_dset_str_text.h >> trexio_text.h
|
||||
cat populated/pop_hrw_dset_sparse_text.h >> trexio_text.h
|
||||
cat populated/pop_hrw_attr_num_text.h >> trexio_text.h
|
||||
cat populated/pop_hrw_attr_str_text.h >> trexio_text.h
|
||||
|
||||
cat rdm_text.c >> trexio_text.c
|
||||
cat rdm_text.h >> trexio_text.h
|
||||
|
||||
cat suffix_text.h >> trexio_text.h
|
||||
|
||||
|
@ -93,22 +93,10 @@ typedef struct $group$_s {
|
||||
|
||||
** Template for general structure in text back end
|
||||
|
||||
#+begin_src c :tangle struct_text_group.h
|
||||
typedef struct rdm_s {
|
||||
uint64_t dim_one_e;
|
||||
uint32_t to_flush;
|
||||
uint32_t padding;
|
||||
double* one_e;
|
||||
char file_name[TREXIO_MAX_FILENAME_LENGTH];
|
||||
char two_e_file_name[TREXIO_MAX_FILENAME_LENGTH];
|
||||
} rdm_t;
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle struct_text_group.h
|
||||
typedef struct trexio_text_s {
|
||||
trexio_t parent ;
|
||||
$group$_t* $group$;
|
||||
rdm_t* rdm;
|
||||
int lock_file;
|
||||
} trexio_text_t;
|
||||
#+end_src
|
||||
@ -269,9 +257,6 @@ trexio_text_deinit (trexio_t* const file)
|
||||
/* Error handling for this call is added by the generator */
|
||||
rc = trexio_text_free_$group$( (trexio_text_t*) file);
|
||||
|
||||
rc = trexio_text_free_rdm( (trexio_text_t*) file);
|
||||
if (rc != TREXIO_SUCCESS) return rc;
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
|
||||
}
|
||||
@ -411,7 +396,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = fscanf(f, "%$group_num_std_dtype_in$", &($group$->$group_num$));
|
||||
rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$));
|
||||
assert(!(rc != 1));
|
||||
if (rc != 1) {
|
||||
FREE(buffer);
|
||||
@ -499,7 +484,7 @@ trexio_text_read_$group$ (trexio_text_t* const file)
|
||||
}
|
||||
|
||||
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
|
||||
rc = fscanf(f, "%$group_dset_std_dtype_in$", &($group$->$group_dset$[i]));
|
||||
rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i]));
|
||||
assert(!(rc != 1));
|
||||
if (rc != 1) {
|
||||
FREE(buffer);
|
||||
@ -535,16 +520,16 @@ trexio_text_read_$group$ (trexio_text_t* const file)
|
||||
}
|
||||
|
||||
/* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
|
||||
, BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
|
||||
,*/
|
||||
BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
|
||||
*/
|
||||
char* tmp_$group_dset$;
|
||||
tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);
|
||||
|
||||
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
|
||||
$group$->$group_dset$[i] = tmp_$group_dset$;
|
||||
/* conventional fcanf with "%s" only return the string before the first space character
|
||||
,* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
|
||||
,* Q: depending on what ? */
|
||||
* to read string with spaces use "%[^\n]" possible with space before or after, i.e. " %[^\n]"
|
||||
* Q: depending on what ? */
|
||||
rc = fscanf(f, " %1023[^\n]", tmp_$group_dset$);
|
||||
assert(!(rc != 1));
|
||||
if (rc != 1) {
|
||||
@ -613,7 +598,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)
|
||||
|
||||
// START REPEAT GROUP_NUM
|
||||
fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet);
|
||||
if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_std_dtype_out$ \n", $group$->$group_num$);
|
||||
if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$);
|
||||
// END REPEAT GROUP_NUM
|
||||
|
||||
// START REPEAT GROUP_ATTR_STR
|
||||
@ -627,7 +612,7 @@ trexio_text_flush_$group$ (trexio_text_t* const file)
|
||||
|
||||
fprintf(f, "$group_dset$\n");
|
||||
for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
|
||||
fprintf(f, "%$group_dset_std_dtype_out$\n", $group$->$group_dset$[i]);
|
||||
fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]);
|
||||
}
|
||||
// END REPEAT GROUP_DSET_ALL
|
||||
|
||||
@ -1016,327 +1001,280 @@ trexio_text_has_$group_str$ (trexio_t* const file)
|
||||
|
||||
}
|
||||
#+end_src
|
||||
** RDM struct (hard-coded)
|
||||
*** Read the complete struct
|
||||
** Template for has/read/write the dataset of sparse data
|
||||
|
||||
#+begin_src c :tangle rdm_text.h
|
||||
rdm_t* trexio_text_read_rdm(trexio_text_t* const file);
|
||||
#+end_src
|
||||
Each sparse array is stored in a separate =.txt= file due to the fact that sparse I/O has to be decoupled
|
||||
from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data
|
||||
to prevent memory overflow. Chunks have a given ~int64_t size~
|
||||
(size specifies the number of sparse data items, e.g. integrals).
|
||||
|
||||
#+begin_src c :tangle rdm_text.c
|
||||
rdm_t* trexio_text_read_rdm(trexio_text_t* const file) {
|
||||
if (file == NULL) return NULL;
|
||||
User provides indices and values of the sparse array as two separate variables.
|
||||
|
||||
if (file->rdm != NULL) return file->rdm;
|
||||
|
||||
/* Allocate the data structure */
|
||||
rdm_t* rdm = MALLOC(rdm_t);
|
||||
assert (rdm != NULL);
|
||||
#+begin_src c :tangle hrw_dset_sparse_text.h :exports none
|
||||
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file);
|
||||
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, int64_t* const eof_read_size, int32_t* const index_sparse, double* const value_sparse);
|
||||
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file, const int64_t offset_file, const int64_t size, const int64_t size_max, const int64_t size_start, const int32_t* index_sparse, const double* value_sparse);
|
||||
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max);
|
||||
#+end_src
|
||||
|
||||
rdm->one_e = NULL;
|
||||
rdm->two_e_file_name[0] = '\0';
|
||||
rdm->to_flush = 0;
|
||||
|
||||
/* Try to open the file. If the file does not exist, return */
|
||||
const char* rdm_file_name = "/rdm.txt";
|
||||
#+begin_src c :tangle write_dset_sparse_text.c
|
||||
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
const int64_t size_start,
|
||||
const int32_t* index_sparse,
|
||||
const double* value_sparse)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH);
|
||||
/* Build the name of the file with sparse data*/
|
||||
/* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
|
||||
strncat (rdm->file_name, rdm_file_name,
|
||||
TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name));
|
||||
/* Copy directory name in file_full_path */
|
||||
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
|
||||
/* Append name of the file with sparse data */
|
||||
strncat (file_full_path, $group_dset$_file_name,
|
||||
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
|
||||
|
||||
if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
|
||||
FREE(rdm);
|
||||
return NULL;
|
||||
/* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
|
||||
FILE* f = fopen(file_full_path, "a");
|
||||
if(f == NULL) return TREXIO_FILE_ERROR;
|
||||
|
||||
|
||||
/* Specify the line length in order to offset properly. For example, for 4-index quantities
|
||||
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
|
||||
CURRENTLY NO OFFSET IS USED WHEN WRITING !
|
||||
,*/
|
||||
int64_t line_length = 0L;
|
||||
char format_str[256] = "\0";
|
||||
|
||||
/* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
|
||||
if (size_max < UINT8_MAX) {
|
||||
line_length = $sparse_line_length_8$; // 41 for 4 indices
|
||||
strncpy(format_str, $sparse_format_printf_8$, 256);
|
||||
} else if (size_max < UINT16_MAX) {
|
||||
line_length = $sparse_line_length_16$; // 49 for 4 indices
|
||||
strncpy(format_str, $sparse_format_printf_16$, 256);
|
||||
} else {
|
||||
line_length = $sparse_line_length_32$; //69 for 4 indices
|
||||
strncpy(format_str, $sparse_format_printf_32$, 256);
|
||||
}
|
||||
/* If the file exists, read it */
|
||||
FILE* f = fopen(rdm->file_name,"r");
|
||||
if (f != NULL) {
|
||||
strncat(format_str, "\n", 2);
|
||||
|
||||
/* Find size of file to allocate the max size of the string buffer */
|
||||
fseek(f, 0L, SEEK_END);
|
||||
size_t sz = ftell(f);
|
||||
fseek(f, 0L, SEEK_SET);
|
||||
sz = (sz < 1024) ? (1024) : (sz);
|
||||
char* buffer = CALLOC(sz, char);
|
||||
/* Get the starting position of the IO stream to be written in the .size file.
|
||||
This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
|
||||
One can use ftello function which is adapted for large files.
|
||||
For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow.
|
||||
*/
|
||||
int64_t io_start_pos = size_start * line_length;
|
||||
|
||||
/* Read the dimensioning variables */
|
||||
int rc;
|
||||
rc = fscanf(f, "%1023s", buffer);
|
||||
assert (rc == 1);
|
||||
assert (strcmp(buffer, "dim_one_e") == 0);
|
||||
|
||||
rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e));
|
||||
assert (rc == 1);
|
||||
|
||||
/* Allocate arrays */
|
||||
rdm->one_e = CALLOC(rdm->dim_one_e, double);
|
||||
assert (rdm->one_e != NULL);
|
||||
|
||||
/* Read one_e */
|
||||
rc = fscanf(f, "%1023s", buffer);
|
||||
assert (rc == 1);
|
||||
assert (strcmp(buffer, "one_e") == 0);
|
||||
|
||||
for (uint64_t i=0 ; i<rdm->dim_one_e; ++i) {
|
||||
rc = fscanf(f, "%lf", &(rdm->one_e[i]));
|
||||
assert (rc == 1);
|
||||
}
|
||||
|
||||
/* Read two_e */
|
||||
rc = fscanf(f, "%1023s", buffer);
|
||||
assert (rc == 1);
|
||||
assert (strcmp(buffer, "two_e_file_name") == 0);
|
||||
|
||||
rc = fscanf(f, "%1023s", buffer);
|
||||
assert (rc == 1);
|
||||
strncpy(rdm->two_e_file_name, buffer, 1024);
|
||||
if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
|
||||
FREE(buffer);
|
||||
FREE(rdm->one_e);
|
||||
FREE(rdm);
|
||||
/* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
|
||||
int rc;
|
||||
for (uint64_t i=0UL; i<size; ++i) {
|
||||
rc = fprintf(f, format_str,
|
||||
$group_dset_sparse_indices_printf$,
|
||||
*(value_sparse + i));
|
||||
if(rc <= 0) {
|
||||
fclose(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FREE(buffer);
|
||||
fclose(f);
|
||||
f = NULL;
|
||||
}
|
||||
file->rdm = rdm ;
|
||||
return rdm;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Flush the complete struct
|
||||
|
||||
#+begin_src c :tangle rdm_text.h
|
||||
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file);
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle rdm_text.c
|
||||
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) {
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
if (file->parent.mode == 'r') return TREXIO_READONLY;
|
||||
|
||||
rdm_t* const rdm = file->rdm;
|
||||
if (rdm == NULL) return TREXIO_SUCCESS;
|
||||
|
||||
if (rdm->to_flush == 0) return TREXIO_SUCCESS;
|
||||
|
||||
FILE* f = fopen(rdm->file_name,"w");
|
||||
assert (f != NULL);
|
||||
|
||||
/* Write the dimensioning variables */
|
||||
fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e);
|
||||
|
||||
/* Write arrays */
|
||||
fprintf(f, "one_e\n");
|
||||
for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) {
|
||||
fprintf(f, "%lf\n", rdm->one_e[i]);
|
||||
}
|
||||
|
||||
fprintf(f, "two_e_file_name\n");
|
||||
fprintf(f, "%s\n", rdm->two_e_file_name);
|
||||
|
||||
fclose(f);
|
||||
rdm->to_flush = 0;
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Free memory
|
||||
|
||||
Memory is allocated when reading. The followig function frees memory.
|
||||
|
||||
#+begin_src c :tangle rdm_text.h
|
||||
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file);
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle rdm_text.c
|
||||
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) {
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
if (file->parent.mode != 'r') {
|
||||
trexio_exit_code rc = trexio_text_flush_rdm(file);
|
||||
if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
rdm_t* const rdm = file->rdm;
|
||||
if (rdm == NULL) return TREXIO_SUCCESS;
|
||||
|
||||
if (rdm->one_e != NULL) {
|
||||
FREE (rdm->one_e);
|
||||
}
|
||||
|
||||
free (rdm);
|
||||
file->rdm = NULL;
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Read/Write the one_e attribute
|
||||
|
||||
The ~one_e~ array is assumed allocated with the appropriate size.
|
||||
|
||||
#+begin_src c :tangle rdm_text.h
|
||||
trexio_exit_code
|
||||
trexio_text_read_rdm_one_e(trexio_t* const file,
|
||||
double* const one_e,
|
||||
const uint64_t dim_one_e);
|
||||
|
||||
trexio_exit_code
|
||||
trexio_text_write_rdm_one_e(trexio_t* const file,
|
||||
const double* one_e,
|
||||
const uint64_t dim_one_e);
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle rdm_text.c
|
||||
trexio_exit_code
|
||||
trexio_text_read_rdm_one_e(trexio_t* const file,
|
||||
double* const one_e,
|
||||
const uint64_t dim_one_e)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (one_e == NULL) return TREXIO_INVALID_ARG_2;
|
||||
|
||||
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
|
||||
if (rdm == NULL) return TREXIO_FAILURE;
|
||||
|
||||
if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3;
|
||||
|
||||
for (uint64_t i=0 ; i<dim_one_e ; ++i) {
|
||||
one_e[i] = rdm->one_e[i];
|
||||
}
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
trexio_exit_code
|
||||
trexio_text_write_rdm_one_e(trexio_t* const file,
|
||||
const double* one_e,
|
||||
const uint64_t dim_one_e)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (one_e == NULL) return TREXIO_INVALID_ARG_2;
|
||||
if (file->mode != 'r') return TREXIO_READONLY;
|
||||
|
||||
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
|
||||
if (rdm == NULL) return TREXIO_FAILURE;
|
||||
|
||||
rdm->dim_one_e = dim_one_e;
|
||||
for (uint64_t i=0 ; i<dim_one_e ; ++i) {
|
||||
rdm->one_e[i] = one_e[i];
|
||||
}
|
||||
|
||||
rdm->to_flush = 1;
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Read/Write the two_e attribute
|
||||
|
||||
~two_e~ is a sparse data structure, which can be too large to fit
|
||||
in memory. So we provide functions to read and write it by
|
||||
chunks.
|
||||
In the text back end, the easiest way to do it is to create a
|
||||
file for each sparse float structure.
|
||||
|
||||
#+begin_src c :tangle rdm_text.h
|
||||
trexio_exit_code
|
||||
trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
|
||||
const uint64_t offset,
|
||||
const uint64_t size,
|
||||
int64_t* const index,
|
||||
double* const value);
|
||||
|
||||
trexio_exit_code
|
||||
trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
|
||||
const uint64_t offset,
|
||||
const uint64_t size,
|
||||
const int64_t* index,
|
||||
const double* value);
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle rdm_text.c
|
||||
trexio_exit_code
|
||||
trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
|
||||
const uint64_t offset,
|
||||
const uint64_t size,
|
||||
int64_t* const index,
|
||||
double* const value)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (index == NULL) return TREXIO_INVALID_ARG_4;
|
||||
if (value == NULL) return TREXIO_INVALID_ARG_5;
|
||||
|
||||
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
|
||||
if (rdm == NULL) return TREXIO_FAILURE;
|
||||
|
||||
FILE* f = fopen(rdm->two_e_file_name, "r");
|
||||
if (f == NULL) return TREXIO_END;
|
||||
|
||||
const uint64_t line_length = 64L;
|
||||
fseek(f, (long) offset * line_length, SEEK_SET);
|
||||
|
||||
for (uint64_t i=0 ; i<size ; ++i) {
|
||||
int rc = fscanf(f, "%9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %24le\n",
|
||||
&index[4*i],
|
||||
&index[4*i+1],
|
||||
&index[4*i+2],
|
||||
&index[4*i+3],
|
||||
&value[i]);
|
||||
if (rc == 5) {
|
||||
/* Do nothing */
|
||||
} else if (rc == EOF) {
|
||||
return TREXIO_END;
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
/* Close the TXT file */
|
||||
rc = fclose(f);
|
||||
if (rc != 0) return TREXIO_FILE_ERROR;
|
||||
|
||||
/* Append .size to the file_full_path in order to write additional info about the written buffer of data */
|
||||
strncat(file_full_path, ".size", 6);
|
||||
|
||||
trexio_exit_code
|
||||
trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
|
||||
const uint64_t offset,
|
||||
const uint64_t size,
|
||||
const int64_t* index,
|
||||
const double* value)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (index == NULL) return TREXIO_INVALID_ARG_4;
|
||||
if (value == NULL) return TREXIO_INVALID_ARG_5;
|
||||
if (file->mode != 'r') return TREXIO_READONLY;
|
||||
/* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
|
||||
FILE *f_wSize = fopen(file_full_path, "a");
|
||||
if (f_wSize == NULL) return TREXIO_FILE_ERROR;
|
||||
|
||||
rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
|
||||
if (rdm == NULL) return TREXIO_FAILURE;
|
||||
|
||||
FILE* f = fopen(rdm->two_e_file_name, "w");
|
||||
if (f == NULL) return TREXIO_FAILURE;
|
||||
|
||||
const uint64_t line_length = 64L;
|
||||
fseek(f, (long) offset * line_length, SEEK_SET);
|
||||
|
||||
for (uint64_t i=0 ; i<size ; ++i) {
|
||||
int rc = fprintf(f, "%9" PRId64 " %9" PRId64 " %9" PRId64 " %9" PRId64 " %24le\n",
|
||||
index[4*i],
|
||||
index[4*i+1],
|
||||
index[4*i+2],
|
||||
index[4*i+3],
|
||||
value[i]);
|
||||
if (rc != 5) return TREXIO_FAILURE;
|
||||
/* Write the buffer_size */
|
||||
rc = fprintf(f_wSize, "%" PRId64 " %" PRId64 "\n", size, io_start_pos);
|
||||
if (rc <= 0) {
|
||||
fclose(f_wSize);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
|
||||
/* Close the TXT file */
|
||||
rc = fclose(f_wSize);
|
||||
if (rc != 0) return TREXIO_FILE_ERROR;
|
||||
|
||||
/* Exit upon success */
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_text.c
|
||||
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
|
||||
const int64_t offset_file,
|
||||
const int64_t size,
|
||||
const int64_t size_max,
|
||||
int64_t* const eof_read_size,
|
||||
int32_t* const index_sparse,
|
||||
double* const value_sparse)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
|
||||
|
||||
/* Build the name of the file with sparse data.
|
||||
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
|
||||
,*/
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
|
||||
/* Copy directory name in file_full_path */
|
||||
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
|
||||
/* Append name of the file with sparse data */
|
||||
strncat (file_full_path, $group_dset$_file_name,
|
||||
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
|
||||
|
||||
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
|
||||
FILE* f = fopen(file_full_path, "r");
|
||||
if(f == NULL) return TREXIO_FILE_ERROR;
|
||||
|
||||
/* Specify the line length in order to offset properly. For example, for 4-index quantities
|
||||
the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
|
||||
,*/
|
||||
uint64_t line_length = 0UL;
|
||||
/* Determine the line length depending on the size_max (usually mo_num or ao_num) */
|
||||
if (size_max < UINT8_MAX) {
|
||||
line_length = $sparse_line_length_8$; // 41 for 4 indices
|
||||
} else if (size_max < UINT16_MAX) {
|
||||
line_length = $sparse_line_length_16$; // 49 for 4 indices
|
||||
} else {
|
||||
line_length = $sparse_line_length_32$; //69 for 4 indices
|
||||
}
|
||||
|
||||
/* Offset in the file according to the provided value of offset_file and optimal line_length */
|
||||
fseek(f, (long) offset_file * line_length, SEEK_SET);
|
||||
|
||||
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
|
||||
int rc;
|
||||
char buffer[1024];
|
||||
uint64_t count = 0UL;
|
||||
for (uint64_t i=0UL; i<size; ++i) {
|
||||
|
||||
memset(buffer,0,sizeof(buffer));
|
||||
|
||||
if(fgets(buffer, 1023, f) == NULL){
|
||||
|
||||
fclose(f);
|
||||
*eof_read_size = count;
|
||||
return TREXIO_END;
|
||||
|
||||
} else {
|
||||
|
||||
rc = sscanf(buffer, "$group_dset_format_scanf$",
|
||||
$group_dset_sparse_indices_scanf$,
|
||||
value_sparse + i);
|
||||
if(rc <= 0) {
|
||||
fclose(f);
|
||||
return TREXIO_FAILURE;
|
||||
}
|
||||
count += 1UL;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* Close the TXT file */
|
||||
rc = fclose(f);
|
||||
if(rc != 0) return TREXIO_FILE_ERROR;
|
||||
|
||||
return TREXIO_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle read_dset_sparse_text.c
|
||||
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
/* Build the name of the file with sparse data.
|
||||
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
|
||||
*/
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
|
||||
/* Copy directory name in file_full_path */
|
||||
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
|
||||
/* Append name of the file with sparse data */
|
||||
strncat (file_full_path, $group_dset$_file_name,
|
||||
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
|
||||
|
||||
/* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
|
||||
FILE* f = fopen(file_full_path, "r");
|
||||
if(f == NULL) return TREXIO_FILE_ERROR;
|
||||
|
||||
|
||||
/* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
|
||||
int rc;
|
||||
int64_t size_item, offset_item, size_accum=0L;
|
||||
|
||||
/* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
|
||||
while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) {
|
||||
/* Check that summation will not overflow the int64_t value */
|
||||
if (INT64_MAX - size_accum > size_item) {
|
||||
size_accum += size_item;
|
||||
} else {
|
||||
fclose(f);
|
||||
*size_max = -1L;
|
||||
return TREXIO_INT_SIZE_OVERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
/* Close the TXT file */
|
||||
rc = fclose(f);
|
||||
if(rc != 0) return TREXIO_FILE_ERROR;
|
||||
|
||||
/* Overwrite the value at the input address and return TREXIO_SUCCESS */
|
||||
*size_max = size_accum;
|
||||
return TREXIO_SUCCESS;
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle has_dset_sparse_text.c
|
||||
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
|
||||
{
|
||||
if (file == NULL) return TREXIO_INVALID_ARG_1;
|
||||
|
||||
/* Build the name of the file with sparse data.
|
||||
The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
|
||||
*/
|
||||
const char $group_dset$_file_name[256] = "/$group_dset$.txt";
|
||||
/* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
|
||||
char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
|
||||
|
||||
/* Copy directory name in file_full_path */
|
||||
strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
|
||||
/* Append name of the file with sparse data */
|
||||
strncat (file_full_path, $group_dset$_file_name,
|
||||
TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
|
||||
|
||||
/* Check the return code of access function to determine whether the file with sparse data exists or not */
|
||||
if (access(file_full_path, F_OK) == 0){
|
||||
return TREXIO_SUCCESS;
|
||||
} else {
|
||||
return TREXIO_HAS_NOT;
|
||||
}
|
||||
}
|
||||
#+end_src
|
||||
|
||||
* Constant file suffixes (not used by the generator) :noexport:
|
||||
|
||||
#+begin_src c :tangle suffix_text.h
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
|
@ -1,11 +1,12 @@
|
||||
|
||||
# ================= TESTING =================
|
||||
# ================= TESTING =================
|
||||
|
||||
# Create a list of tests for TEXT back end.
|
||||
set(Tests_text
|
||||
open_text
|
||||
io_dset_float_text
|
||||
io_dset_str_text
|
||||
io_dset_sparse_text
|
||||
io_safe_dset_float_text
|
||||
io_dset_int_text
|
||||
io_num_text
|
||||
@ -19,6 +20,7 @@ if(ENABLE_HDF5)
|
||||
open_hdf5
|
||||
io_dset_float_hdf5
|
||||
io_dset_str_hdf5
|
||||
io_dset_sparse_hdf5
|
||||
io_safe_dset_float_hdf5
|
||||
io_dset_int_hdf5
|
||||
io_num_hdf5
|
||||
@ -43,4 +45,3 @@ endforeach()
|
||||
add_executable(test_f test_f.f90)
|
||||
target_link_libraries(test_f PRIVATE trexio_f)
|
||||
add_test(NAME test_f COMMAND $<TARGET_FILE:test_f>)
|
||||
|
||||
|
235
tests/io_dset_sparse_hdf5.c
Normal file
235
tests/io_dset_sparse_hdf5.c
Normal file
@ -0,0 +1,235 @@
|
||||
#include "trexio.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define TEST_BACKEND TREXIO_HDF5
|
||||
#define TREXIO_FILE "test_dset_sparse.h5"
|
||||
#define RM_COMMAND "rm -f -- " TREXIO_FILE
|
||||
#define SIZE 100
|
||||
#define N_CHUNKS 5
|
||||
|
||||
static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
|
||||
|
||||
/* Try to write an array of sparse data into the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file in 'write' mode
|
||||
file = trexio_open(file_name, 'w', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// parameters to be written
|
||||
int32_t* index;
|
||||
double* value;
|
||||
|
||||
index = calloc(4L*SIZE, sizeof(int32_t));
|
||||
value = calloc(SIZE, sizeof(double));
|
||||
|
||||
for(int i=0; i<SIZE; i++){
|
||||
index[4*i] = 4*i;
|
||||
index[4*i+1] = 4*i+1;
|
||||
index[4*i+2] = 4*i+2;
|
||||
index[4*i+3] = 4*i+3;
|
||||
value[i] = 3.14 + (double) i;
|
||||
}
|
||||
|
||||
// write mo_num which will be used to determine the optimal size of int indices
|
||||
if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
|
||||
rc = trexio_write_mo_num(file, 1000);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
}
|
||||
|
||||
// write dataset chunks of sparse data in the file (including FAKE statements)
|
||||
uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
|
||||
uint64_t offset_f = 0UL;
|
||||
uint64_t offset_d = 0UL;
|
||||
if (offset != 0L) offset_f += offset;
|
||||
|
||||
// write n_chunks times using write_sparse
|
||||
for(int i=0; i<N_CHUNKS; ++i){
|
||||
rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
offset_d += chunk_size;
|
||||
offset_f += chunk_size;
|
||||
}
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// free the allocated memeory
|
||||
free(index);
|
||||
free(value);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
|
||||
|
||||
/* Try to check the existence of a dataset of sparse data in the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file
|
||||
file = trexio_open(file_name, 'r', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
|
||||
rc = trexio_has_mo_2e_int_eri_lr(file);
|
||||
assert(rc==TREXIO_HAS_NOT);
|
||||
|
||||
// check that previous call to has_sparse did not create a file/dset
|
||||
rc = trexio_has_mo_2e_int_eri_lr(file);
|
||||
assert(rc==TREXIO_HAS_NOT);
|
||||
|
||||
// now check that previously written mo_2e_int_eri exists
|
||||
rc = trexio_has_mo_2e_int_eri(file);
|
||||
assert(rc==TREXIO_SUCCESS);
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
|
||||
|
||||
/* Try to read one chunk of dataset of sparse data in the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file
|
||||
file = trexio_open(file_name, 'r', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// define arrays to read into
|
||||
int32_t* index_read;
|
||||
double* value_read;
|
||||
uint64_t size_r = 40L;
|
||||
|
||||
index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
|
||||
value_read = (double*) calloc(size_r,sizeof(double));
|
||||
|
||||
// specify the read parameters, here:
|
||||
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
|
||||
int64_t chunk_read = 10L;
|
||||
int64_t offset_file_read = 40L;
|
||||
int offset_data_read = 5;
|
||||
int64_t read_size_check;
|
||||
read_size_check = chunk_read;
|
||||
|
||||
if (offset != 0L) offset_file_read += offset;
|
||||
|
||||
// read one chunk using the aforementioned parameters
|
||||
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
assert(chunk_read == read_size_check);
|
||||
assert(index_read[0] == 0);
|
||||
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
|
||||
|
||||
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
|
||||
offset_file_read = 97;
|
||||
offset_data_read = 1;
|
||||
int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
|
||||
|
||||
if (offset != 0L) offset_file_read += offset;
|
||||
|
||||
// read one chunk that will reach EOF and return TREXIO_END code
|
||||
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
|
||||
assert(rc == TREXIO_END);
|
||||
assert(chunk_read == eof_read_size_check);
|
||||
assert(index_read[4*size_r-1] == 0);
|
||||
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
|
||||
/*
|
||||
for(int i=0; i<size_r; ++i){
|
||||
printf("%d %lf\n", index_read[4*i], value_read[i]);
|
||||
}
|
||||
*/
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// free the memory
|
||||
free(index_read);
|
||||
free(value_read);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
|
||||
|
||||
/* Try to read a size of the dataset of sparse data in the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file
|
||||
file = trexio_open(file_name, 'r', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// define the variable to read into
|
||||
int64_t size_written;
|
||||
|
||||
// read one chunk using the aforementioned parameters
|
||||
rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
assert(size_written == size_check);
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
/*============== Test launcher ================*/
|
||||
|
||||
int rc;
|
||||
rc = system(RM_COMMAND);
|
||||
assert (rc == 0);
|
||||
|
||||
// check the first write attempt (SIZE elements written in N_CHUNKS chunks)
|
||||
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
|
||||
test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND);
|
||||
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
|
||||
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
|
||||
|
||||
// check the second write attempt (SIZE elements written in N_CHUNKS chunks)
|
||||
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
|
||||
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
|
||||
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
|
||||
|
||||
rc = system(RM_COMMAND);
|
||||
assert (rc == 0);
|
||||
|
||||
return 0;
|
||||
}
|
230
tests/io_dset_sparse_text.c
Normal file
230
tests/io_dset_sparse_text.c
Normal file
@ -0,0 +1,230 @@
|
||||
#include "trexio.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define TEST_BACKEND TREXIO_TEXT
|
||||
#define TREXIO_FILE "test_dset_sparse.dir"
|
||||
#define RM_COMMAND "rm -rf " TREXIO_FILE
|
||||
#define SIZE 100
|
||||
#define N_CHUNKS 5
|
||||
|
||||
static int test_write_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
|
||||
|
||||
/* Try to write an array of sparse data into the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file in 'write' mode
|
||||
file = trexio_open(file_name, 'w', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// parameters to be written
|
||||
int32_t* index;
|
||||
double* value;
|
||||
|
||||
index = calloc(4L*SIZE, sizeof(int32_t));
|
||||
value = calloc(SIZE, sizeof(double));
|
||||
|
||||
for(int i=0; i<SIZE; i++){
|
||||
index[4*i] = 4*i;
|
||||
index[4*i+1] = 4*i+1;
|
||||
index[4*i+2] = 4*i+2;
|
||||
index[4*i+3] = 4*i+3;
|
||||
value[i] = 3.14 + (double) i;
|
||||
}
|
||||
|
||||
// write mo_num which will be used to determine the optimal size of int indices
|
||||
if (trexio_has_mo_num(file) == TREXIO_HAS_NOT) {
|
||||
rc = trexio_write_mo_num(file, 1000);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
}
|
||||
|
||||
// write dataset chunks of sparse data in the file (including FAKE statements)
|
||||
uint64_t chunk_size = (uint64_t) SIZE/N_CHUNKS;
|
||||
uint64_t offset_f = 0UL;
|
||||
uint64_t offset_d = 0UL;
|
||||
if (offset != 0L) offset_f += offset;
|
||||
|
||||
// write n_chunks times using write_sparse
|
||||
for(int i=0; i<N_CHUNKS; ++i){
|
||||
rc = trexio_write_mo_2e_int_eri(file, offset_f, chunk_size, &index[4*offset_d], &value[offset_d]);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
offset_d += chunk_size;
|
||||
offset_f += chunk_size;
|
||||
}
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// free the allocated memeory
|
||||
free(index);
|
||||
free(value);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_has_dset_sparse (const char* file_name, const back_end_t backend) {
|
||||
|
||||
/* Try to check the existence of a dataset of sparse data in the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file
|
||||
file = trexio_open(file_name, 'r', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// first check that mo_2e_int_eri_lr (we only write non-lr component in this unit test)
|
||||
rc = trexio_has_mo_2e_int_eri_lr(file);
|
||||
assert(rc==TREXIO_HAS_NOT);
|
||||
|
||||
// check that previous call to has_sparse did not create a file/dset
|
||||
rc = trexio_has_mo_2e_int_eri_lr(file);
|
||||
assert(rc==TREXIO_HAS_NOT);
|
||||
|
||||
// now check that previously written mo_2e_int_eri exists
|
||||
rc = trexio_has_mo_2e_int_eri(file);
|
||||
assert(rc==TREXIO_SUCCESS);
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_read_dset_sparse (const char* file_name, const back_end_t backend, const int64_t offset) {
|
||||
|
||||
/* Try to read one chunk of dataset of sparse data in the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file
|
||||
file = trexio_open(file_name, 'r', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// define arrays to read into
|
||||
int32_t* index_read;
|
||||
double* value_read;
|
||||
uint64_t size_r = 40L;
|
||||
|
||||
index_read = (int32_t*) calloc(4L*size_r,sizeof(int32_t));
|
||||
value_read = (double*) calloc(size_r,sizeof(double));
|
||||
|
||||
// specify the read parameters, here:
|
||||
// 1 chunk of 10 elements using offset of 40 (i.e. lines No. 40--59) into elements of the array starting from 5
|
||||
int64_t chunk_read = 10L;
|
||||
int64_t offset_file_read = 40L;
|
||||
int offset_data_read = 5;
|
||||
int64_t read_size_check;
|
||||
read_size_check = chunk_read;
|
||||
|
||||
if (offset != 0L) offset_file_read += offset;
|
||||
|
||||
// read one chunk using the aforementioned parameters
|
||||
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
assert(chunk_read == read_size_check);
|
||||
assert(index_read[0] == 0);
|
||||
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
|
||||
|
||||
// now attempt to read so that one encounters end of file during reading (i.e. offset_file_read + chunk_read > size_max)
|
||||
offset_file_read = 97L;
|
||||
offset_data_read = 1;
|
||||
int64_t eof_read_size_check = SIZE - offset_file_read; // if offset_file_read=97 => only 3 integrals will be read out of total of 100
|
||||
|
||||
if (offset != 0L) offset_file_read += offset;
|
||||
|
||||
// read one chunk that will reach EOF and return TREXIO_END code
|
||||
rc = trexio_read_mo_2e_int_eri(file, offset_file_read, &chunk_read, &index_read[4*offset_data_read], &value_read[offset_data_read]);
|
||||
assert(rc == TREXIO_END);
|
||||
assert(chunk_read == eof_read_size_check);
|
||||
assert(index_read[4*size_r-1] == 0);
|
||||
assert(index_read[4*offset_data_read] == 4 * (int32_t) (offset_file_read-offset));
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// free the memory
|
||||
free(index_read);
|
||||
free(value_read);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_read_dset_sparse_size (const char* file_name, const back_end_t backend, const int64_t size_check) {
|
||||
|
||||
/* Try to read a size of the dataset of sparse data in the TREXIO file */
|
||||
|
||||
trexio_t* file = NULL;
|
||||
trexio_exit_code rc;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
// open file
|
||||
file = trexio_open(file_name, 'r', backend, &rc);
|
||||
assert (file != NULL);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// define the variable to read into
|
||||
int64_t size_written;
|
||||
|
||||
// read one chunk using the aforementioned parameters
|
||||
rc = trexio_read_mo_2e_int_eri_size(file, &size_written);
|
||||
assert(rc == TREXIO_SUCCESS);
|
||||
assert(size_written == size_check);
|
||||
|
||||
// close current session
|
||||
rc = trexio_close(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
/*================= END OF TEST ==================*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
/*============== Test launcher ================*/
|
||||
|
||||
int rc;
|
||||
rc = system(RM_COMMAND);
|
||||
assert (rc == 0);
|
||||
|
||||
// check the first write attempt (SIZE elements written in N_CHUNKS chunks)
|
||||
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
|
||||
test_has_dset_sparse (TREXIO_FILE, TEST_BACKEND);
|
||||
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, 0);
|
||||
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE);
|
||||
|
||||
// check the second write attempt (SIZE elements written in N_CHUNKS chunks)
|
||||
test_write_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
|
||||
test_read_dset_sparse (TREXIO_FILE, TEST_BACKEND, SIZE);
|
||||
test_read_dset_sparse_size(TREXIO_FILE, TEST_BACKEND, SIZE*2);
|
||||
|
||||
rc = system(RM_COMMAND);
|
||||
assert (rc == 0);
|
||||
|
||||
return 0;
|
||||
}
|
@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
|
||||
rc = trexio_write_nucleus_num(file, num);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
rc = trexio_write_nucleus_repulsion(file, 2.14171677);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
|
||||
rc = trexio_write_mo_num(file, 0);
|
||||
assert (rc == TREXIO_INVALID_NUM);
|
||||
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
|
||||
rc = trexio_has_nucleus_num(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
rc = trexio_has_nucleus_repulsion(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// check that the num variable does not exist
|
||||
rc = trexio_has_mo_num(file);
|
||||
assert (rc == TREXIO_HAS_NOT);
|
||||
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
|
||||
// parameters to be read
|
||||
int num;
|
||||
int cartesian;
|
||||
float repulsion_32;
|
||||
double repulsion_64, d;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
assert (num == 12);
|
||||
|
||||
rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
d = repulsion_32 - 2.14171677;
|
||||
assert( d*d < 1.e-8 );
|
||||
|
||||
rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
d = repulsion_64 - 2.14171677;
|
||||
assert( d*d < 1.e-14 );
|
||||
|
||||
// read non-existing numerical attribute from the file
|
||||
rc = trexio_read_mo_num(file, &num);
|
||||
assert (rc == TREXIO_ATTR_MISSING);
|
||||
@ -134,5 +152,3 @@ int main(void) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,6 +27,9 @@ static int test_write_num (const char* file_name, const back_end_t backend) {
|
||||
rc = trexio_write_nucleus_num(file, num);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
rc = trexio_write_nucleus_repulsion(file, 2.14171677);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// attempt to write 0 as dimensioning variable in an empty file; should FAIL and return TREXIO_INVALID_ARG_2
|
||||
rc = trexio_write_mo_num(file, 0);
|
||||
assert (rc == TREXIO_INVALID_NUM);
|
||||
@ -62,6 +65,9 @@ static int test_has_num (const char* file_name, const back_end_t backend) {
|
||||
rc = trexio_has_nucleus_num(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
rc = trexio_has_nucleus_repulsion(file);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
|
||||
// check that the num variable does not exist
|
||||
rc = trexio_has_mo_num(file);
|
||||
assert (rc == TREXIO_HAS_NOT);
|
||||
@ -86,6 +92,8 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
|
||||
// parameters to be read
|
||||
int num;
|
||||
int cartesian;
|
||||
float repulsion_32;
|
||||
double repulsion_64, d;
|
||||
|
||||
/*================= START OF TEST ==================*/
|
||||
|
||||
@ -98,6 +106,16 @@ static int test_read_num (const char* file_name, const back_end_t backend) {
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
assert (num == 12);
|
||||
|
||||
rc = trexio_read_nucleus_repulsion_32(file, &repulsion_32);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
d = repulsion_32 - 2.14171677;
|
||||
assert( d*d < 1.e-8 );
|
||||
|
||||
rc = trexio_read_nucleus_repulsion_64(file, &repulsion_64);
|
||||
assert (rc == TREXIO_SUCCESS);
|
||||
d = repulsion_64 - 2.14171677;
|
||||
assert( d*d < 1.e-14 );
|
||||
|
||||
// read non-existing numerical attribute from the file
|
||||
rc = trexio_read_mo_num(file, &num);
|
||||
assert (rc == TREXIO_ATTR_MISSING);
|
||||
@ -134,5 +152,3 @@ int main(void) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
120
tests/test_f.f90
120
tests/test_f.f90
@ -2,25 +2,25 @@ program test_trexio
|
||||
use trexio
|
||||
use, intrinsic :: iso_c_binding
|
||||
implicit none
|
||||
|
||||
|
||||
logical :: have_hdf5
|
||||
|
||||
print * , "============================================"
|
||||
print'(a,a)' , " TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION
|
||||
print * , "============================================"
|
||||
print'(a,a)' , " TREXIO VERSION STRING : ", TREXIO_PACKAGE_VERSION
|
||||
print'(a,i3)', " TREXIO MAJOR VERSION : ", TREXIO_VERSION_MAJOR
|
||||
print'(a,i3)', " TREXIO MINOR VERSION : ", TREXIO_VERSION_MINOR
|
||||
print * , "============================================"
|
||||
print * , "============================================"
|
||||
|
||||
call system('rm -rf test_write_f.dir')
|
||||
call system('rm -rf -- test_write_f.dir')
|
||||
print *, 'call test_write(''test_write_f.dir'', TREXIO_TEXT)'
|
||||
call test_write('test_write_f.dir', TREXIO_TEXT)
|
||||
print *, 'call test_read(''test_write_f.dir'', TREXIO_TEXT)'
|
||||
call test_read('test_write_f.dir', TREXIO_TEXT)
|
||||
call system('rm -rf test_write_f.dir')
|
||||
call system('rm -rf -- test_write_f.dir')
|
||||
|
||||
call test_read_void('test_write_f.dir', TREXIO_TEXT)
|
||||
|
||||
! No way to conditionally check whether compilation was done with HDF5
|
||||
! No way to conditionally check whether compilation was done with HDF5
|
||||
! So temporarily disable the test for HDF5 back end at the moment
|
||||
have_hdf5 = trexio_has_backend(TREXIO_HDF5)
|
||||
if (have_hdf5) then
|
||||
@ -30,7 +30,7 @@ program test_trexio
|
||||
print *, 'call test_read(''test_write_f.h5'', TREXIO_HDF5)'
|
||||
call test_read('test_write_f.h5', TREXIO_HDF5)
|
||||
call system('rm -f -- test_write_f.h5')
|
||||
|
||||
|
||||
call test_read_void('test_write_f.h5', TREXIO_HDF5)
|
||||
endif
|
||||
|
||||
@ -61,6 +61,22 @@ subroutine test_write(file_name, back_end)
|
||||
character(len=:), allocatable :: sym_str
|
||||
character(len=:), allocatable :: label(:)
|
||||
|
||||
! sparse data
|
||||
integer(4) :: index_sparse_mo_2e_int_eri(4,100)
|
||||
double precision :: value_sparse_mo_2e_int_eri(100)
|
||||
|
||||
integer :: i, n_buffers = 5
|
||||
integer(8) :: buf_size, offset
|
||||
buf_size = 100/n_buffers
|
||||
|
||||
do i = 1, 100
|
||||
index_sparse_mo_2e_int_eri(1,i) = 4*i - 3
|
||||
index_sparse_mo_2e_int_eri(2,i) = 4*i+1 - 3
|
||||
index_sparse_mo_2e_int_eri(3,i) = 4*i+2 - 3
|
||||
index_sparse_mo_2e_int_eri(4,i) = 4*i+3 - 3
|
||||
value_sparse_mo_2e_int_eri(i) = 3.14 + float(i)
|
||||
enddo
|
||||
|
||||
! parameters to be written
|
||||
num = 12
|
||||
charge = (/ 6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1. /)
|
||||
@ -96,6 +112,9 @@ subroutine test_write(file_name, back_end)
|
||||
rc = trexio_has_nucleus_charge(trex_file)
|
||||
call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 2')
|
||||
|
||||
rc = trexio_has_mo_2e_int_eri(trex_file)
|
||||
call trexio_assert(rc, TREXIO_HAS_NOT, 'SUCCESS HAS NOT 3')
|
||||
|
||||
rc = trexio_write_nucleus_num(trex_file, num)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE NUM')
|
||||
|
||||
@ -106,8 +125,8 @@ subroutine test_write(file_name, back_end)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE COORD')
|
||||
|
||||
rc = trexio_write_nucleus_label(trex_file, label, 5)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
|
||||
deallocate(label)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE LABEL')
|
||||
|
||||
rc = trexio_write_nucleus_point_group(trex_file, sym_str, 32)
|
||||
deallocate(sym_str)
|
||||
@ -119,6 +138,20 @@ subroutine test_write(file_name, back_end)
|
||||
rc = trexio_write_basis_nucleus_index(trex_file, basis_nucleus_index)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE INDEX')
|
||||
|
||||
! write mo_num which will be used to determine the optimal size of int indices
|
||||
if (trexio_has_mo_num(trex_file) == TREXIO_HAS_NOT) then
|
||||
rc = trexio_write_mo_num(trex_file, 1000)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE MO NUM')
|
||||
endif
|
||||
|
||||
offset = 0
|
||||
do i = 1,n_buffers
|
||||
rc = trexio_write_mo_2e_int_eri(trex_file, offset, buf_size, &
|
||||
index_sparse_mo_2e_int_eri(1,offset+1), &
|
||||
value_sparse_mo_2e_int_eri(offset+1))
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS WRITE SPARSE')
|
||||
offset = offset + buf_size
|
||||
enddo
|
||||
|
||||
rc = trexio_has_nucleus_num(trex_file)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 1')
|
||||
@ -126,6 +159,9 @@ subroutine test_write(file_name, back_end)
|
||||
rc = trexio_has_nucleus_coord(trex_file)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 2')
|
||||
|
||||
rc = trexio_has_mo_2e_int_eri(trex_file)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS HAS 3')
|
||||
|
||||
rc = trexio_close(trex_file)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS, 'SUCCESS CLOSE')
|
||||
|
||||
@ -160,11 +196,25 @@ subroutine test_read(file_name, back_end)
|
||||
|
||||
character(len=32) :: sym_str
|
||||
|
||||
! sparse data
|
||||
integer(4) :: index_sparse_mo_2e_int_eri(4,20)
|
||||
double precision :: value_sparse_mo_2e_int_eri(20)
|
||||
integer(8) :: read_buf_size = 10
|
||||
integer(8) :: read_buf_size_save = 10
|
||||
integer(8) :: offset_read = 40
|
||||
integer(8) :: offset_data_read = 5
|
||||
integer(8) :: offset_eof = 97
|
||||
integer(8) :: offset_data_eof = 1
|
||||
integer(8) :: size_toread = 0
|
||||
|
||||
character*(128) :: str
|
||||
|
||||
num = 12
|
||||
basis_shell_num = 24
|
||||
|
||||
index_sparse_mo_2e_int_eri = 0
|
||||
value_sparse_mo_2e_int_eri = 0.0d0
|
||||
|
||||
! ================= START OF TEST ===================== !
|
||||
|
||||
trex_file = trexio_open(file_name, 'r', back_end, rc)
|
||||
@ -199,7 +249,7 @@ subroutine test_read(file_name, back_end)
|
||||
call exit(-1)
|
||||
endif
|
||||
|
||||
|
||||
|
||||
rc = trexio_read_nucleus_label(trex_file, label, 2)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS)
|
||||
if (trim(label(2)) == 'Na') then
|
||||
@ -230,6 +280,52 @@ subroutine test_read(file_name, back_end)
|
||||
endif
|
||||
|
||||
|
||||
rc = trexio_read_mo_2e_int_eri(trex_file, offset_read, read_buf_size, &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_read + 1), &
|
||||
value_sparse_mo_2e_int_eri(offset_data_read + 1))
|
||||
!do i = 1,20
|
||||
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
|
||||
!enddo
|
||||
call trexio_assert(rc, TREXIO_SUCCESS)
|
||||
if (index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1) then
|
||||
write(*,*) 'SUCCESS READ SPARSE DATA'
|
||||
else
|
||||
print *, 'FAILURE SPARSE DATA CHECK'
|
||||
call exit(-1)
|
||||
endif
|
||||
|
||||
|
||||
! attempt to read reaching EOF: should return TREXIO_END and
|
||||
! NOT increment the existing values in the buffer (only upd with what has been read)
|
||||
rc = trexio_read_mo_2e_int_eri(trex_file, offset_eof, read_buf_size, &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1), &
|
||||
value_sparse_mo_2e_int_eri(offset_data_eof + 1))
|
||||
!do i = 1,20
|
||||
! write(*,*) index_sparse_mo_2e_int_eri(1,i)
|
||||
!enddo
|
||||
call trexio_assert(rc, TREXIO_END)
|
||||
if (read_buf_size == 3 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, 1) == 0 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_read + 1) == offset_read*4 + 1 .and. &
|
||||
index_sparse_mo_2e_int_eri(1, offset_data_eof + 1) == offset_eof*4 + 1) then
|
||||
write(*,*) 'SUCCESS READ SPARSE DATA EOF'
|
||||
read_buf_size = read_buf_size_save
|
||||
else
|
||||
print *, 'FAILURE SPARSE DATA EOF CHECK'
|
||||
call exit(-1)
|
||||
endif
|
||||
|
||||
rc = trexio_read_mo_2e_int_eri_size(trex_file, size_toread)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS)
|
||||
if (size_toread == 100) then
|
||||
write(*,*) 'SUCCESS READ SPARSE SIZE'
|
||||
else
|
||||
print *, 'FAILURE SPARSE SIZE CHECK'
|
||||
call exit(-1)
|
||||
endif
|
||||
|
||||
|
||||
rc = trexio_close(trex_file)
|
||||
call trexio_assert(rc, TREXIO_SUCCESS)
|
||||
|
||||
@ -254,6 +350,9 @@ subroutine test_read_void(file_name, back_end)
|
||||
! ================= START OF TEST ===================== !
|
||||
|
||||
trex_file = trexio_open(file_name, 'r', back_end, rc)
|
||||
if (rc /= TREXIO_OPEN_ERROR) then
|
||||
rc = trexio_close(trex_file)
|
||||
endif
|
||||
call trexio_assert(rc, TREXIO_OPEN_ERROR)
|
||||
|
||||
call trexio_string_of_error(rc, str)
|
||||
@ -262,4 +361,3 @@ subroutine test_read_void(file_name, back_end)
|
||||
! ================= END OF TEST ===================== !
|
||||
|
||||
end subroutine test_read_void
|
||||
|
||||
|
@ -6,29 +6,31 @@ config_file = 'trex.json'
|
||||
trex_config = read_json(config_file)
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# -------------------------------- [WIP] ------------------------------------ #
|
||||
# for now remove rdm from config because it functions are hardcoded
|
||||
del trex_config['rdm']
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# -------------------- GET ATTRIBUTES FROM THE CONFIGURATION ---------------- #
|
||||
group_dict = get_group_dict(trex_config)
|
||||
detailed_nums = get_detailed_num_dict(trex_config)
|
||||
detailed_strs = get_detailed_str_dict(trex_config)
|
||||
# helper dictionaries that contain names of groups, nums or dsets as keys
|
||||
dsets = get_dset_dict(trex_config)
|
||||
detailed_dsets_nostr, detailed_dsets_str = split_dset_dict_detailed(dsets)
|
||||
detailed_dsets_nostr, detailed_dsets_str, detailed_dsets_sparse = split_dset_dict_detailed(dsets)
|
||||
detailed_dsets = detailed_dsets_nostr.copy()
|
||||
detailed_dsets.update(detailed_dsets_str)
|
||||
# build a big dictionary with all pre-processed data
|
||||
detailed_all = {
|
||||
'datasets' : dict(detailed_dsets_nostr, **detailed_dsets_str, **detailed_dsets_sparse),
|
||||
'groups' : group_dict,
|
||||
'numbers' : detailed_nums,
|
||||
'strings' : detailed_strs
|
||||
}
|
||||
# consistency check for dimensioning variables
|
||||
check_dim_consistency(detailed_nums, dsets)
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
# -------------------- GET TEMPLATED FILES TO BE POPULATED ------------------ #
|
||||
source = ['front', 'text', 'hdf5']
|
||||
# build helper dictionaries with paths per source directory
|
||||
# build helper dictionaries with paths per source directory
|
||||
template_paths = get_template_paths(source)
|
||||
# build helper dictionaries with source files per source directory
|
||||
# build helper dictionaries with source files per source directory
|
||||
source_files = get_source_files(template_paths)
|
||||
# build helper dictionaries with templated files
|
||||
files_todo = get_files_todo(source_files)
|
||||
@ -38,7 +40,7 @@ files_todo = get_files_todo(source_files)
|
||||
|
||||
# populate files with iterative scheme, i.e. for unique functions
|
||||
for fname in files_todo['auxiliary']:
|
||||
iterative_populate_file(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
|
||||
iterative_populate_file(fname, template_paths, detailed_all)
|
||||
|
||||
# populate has/read/write_num functions with recursive scheme
|
||||
for fname in files_todo['attr_num']:
|
||||
@ -48,14 +50,18 @@ for fname in files_todo['attr_num']:
|
||||
for fname in files_todo['attr_str']:
|
||||
recursive_populate_file(fname, template_paths, detailed_strs)
|
||||
|
||||
# populate has/read/write_dset (numerical) functions with recursive scheme
|
||||
# populate has/read/write_dset (numerical) functions with recursive scheme
|
||||
for fname in files_todo['dset_data']:
|
||||
recursive_populate_file(fname, template_paths, detailed_dsets_nostr)
|
||||
|
||||
# populate has/read/write_dset (strings) functions with recursive scheme
|
||||
# populate has/read/write_dset (strings) functions with recursive scheme
|
||||
for fname in files_todo['dset_str']:
|
||||
recursive_populate_file(fname, template_paths, detailed_dsets_str)
|
||||
|
||||
# populate has/read/write_dset (sparse) functions with recursive scheme
|
||||
for fname in files_todo['dset_sparse']:
|
||||
recursive_populate_file(fname, template_paths, detailed_dsets_sparse)
|
||||
|
||||
# populate group-related functions with mixed (iterative+recursive) scheme [text backend]
|
||||
for fname in files_todo['group']:
|
||||
special_populate_text_group(fname, template_paths, group_dict, detailed_dsets, detailed_nums, detailed_strs)
|
||||
|
@ -4,7 +4,7 @@ from json import load as json_load
|
||||
|
||||
|
||||
def read_json(fname: str) -> dict:
|
||||
"""
|
||||
"""
|
||||
Read configuration from the input `fname` JSON file.
|
||||
|
||||
Parameters:
|
||||
@ -23,7 +23,7 @@ def read_json(fname: str) -> dict:
|
||||
|
||||
|
||||
def get_files_todo(source_files: dict) -> dict:
|
||||
"""
|
||||
"""
|
||||
Build dictionaries of templated files per objective.
|
||||
|
||||
Parameters:
|
||||
@ -36,21 +36,21 @@ def get_files_todo(source_files: dict) -> dict:
|
||||
for key in source_files.keys():
|
||||
all_files += source_files[key]
|
||||
|
||||
files_todo = {}
|
||||
files_todo = {}
|
||||
#files_todo['all'] = list(filter(lambda x: 'read' in x or 'write' in x or 'has' in x or 'hrw' in x or 'flush' in x or 'free' in x, all_files))
|
||||
files_todo['all'] = [f for f in all_files if 'read' in f or 'write' in f or 'has' in f or 'flush' in f or 'free' in f or 'hrw' in f]
|
||||
for key in ['dset_data', 'dset_str', 'attr_num', 'attr_str', 'group']:
|
||||
for key in ['dset_data', 'dset_str', 'dset_sparse', 'attr_num', 'attr_str', 'group']:
|
||||
files_todo[key] = list(filter(lambda x: key in x, files_todo['all']))
|
||||
|
||||
files_todo['group'].append('struct_text_group_dset.h')
|
||||
# files that correspond to iterative population (e.g. the code is repeated within the function body but the function itself is unique)
|
||||
files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h']
|
||||
files_todo['auxiliary'] = ['def_hdf5.c', 'basic_hdf5.c', 'basic_text_group.c', 'struct_hdf5.h', 'struct_text_group.h']
|
||||
|
||||
return files_todo
|
||||
|
||||
|
||||
def get_source_files(paths: dict) -> dict:
|
||||
"""
|
||||
"""
|
||||
Build dictionaries of all files per source directory.
|
||||
|
||||
Parameters:
|
||||
@ -67,7 +67,7 @@ def get_source_files(paths: dict) -> dict:
|
||||
|
||||
|
||||
def get_template_paths(source: list) -> dict:
|
||||
"""
|
||||
"""
|
||||
Build dictionary of the absolute paths to directory with templates per source.
|
||||
|
||||
Parameters:
|
||||
@ -86,7 +86,7 @@ def get_template_paths(source: list) -> dict:
|
||||
|
||||
|
||||
def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> None:
|
||||
"""
|
||||
"""
|
||||
Populate files containing basic read/write/has functions.
|
||||
|
||||
Parameters:
|
||||
@ -107,6 +107,10 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
|
||||
'group_num_f_dtype_default', 'group_num_f_dtype_double', 'group_num_f_dtype_single',
|
||||
'group_num_dtype_default', 'group_num_dtype_double', 'group_num_dtype_single',
|
||||
'group_num_h5_dtype', 'group_num_py_dtype',
|
||||
'group_dset_format_scanf', 'group_dset_format_printf', 'group_dset_sparse_dim',
|
||||
'group_dset_sparse_indices_printf', 'group_dset_sparse_indices_scanf',
|
||||
'sparse_format_printf_8', 'sparse_format_printf_16', 'sparse_format_printf_32',
|
||||
'sparse_line_length_8', 'sparse_line_length_16', 'sparse_line_length_32',
|
||||
'group_dset', 'group_num', 'group_str', 'group']
|
||||
|
||||
for item in detailed_source.keys():
|
||||
@ -133,9 +137,9 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
|
||||
elif 'uncommented by the generator for dimensioning' in line:
|
||||
# only uncomment and write the line if `num` is in the name
|
||||
if 'dim' in detailed_source[item]['trex_json_int_type']:
|
||||
templine = line.replace('//', '')
|
||||
templine = line.replace('//', '')
|
||||
f_out.write(templine)
|
||||
# general case of recursive replacement of inline triggers
|
||||
# general case of recursive replacement of inline triggers
|
||||
else:
|
||||
populated_line = recursive_replace_line(line, triggers, detailed_source[item])
|
||||
f_out.write(populated_line)
|
||||
@ -144,8 +148,8 @@ def recursive_populate_file(fname: str, paths: dict, detailed_source: dict) -> N
|
||||
|
||||
|
||||
def recursive_replace_line (input_line: str, triggers: list, source: dict) -> str:
|
||||
"""
|
||||
Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`.
|
||||
"""
|
||||
Recursive replacer. Recursively calls itself as long as there is at least one "$" present in the `input_line`.
|
||||
|
||||
Parameters:
|
||||
input_line (str) : input line
|
||||
@ -154,10 +158,10 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
|
||||
|
||||
Returns:
|
||||
output_line (str) : processed (replaced) line
|
||||
"""
|
||||
"""
|
||||
is_triggered = False
|
||||
output_line = input_line
|
||||
|
||||
|
||||
if '$' in input_line:
|
||||
for case in triggers:
|
||||
test_case = f'${case}$'
|
||||
@ -175,21 +179,22 @@ def recursive_replace_line (input_line: str, triggers: list, source: dict) -> st
|
||||
else:
|
||||
print(output_line)
|
||||
raise ValueError('Recursion went wrong, not all cases considered')
|
||||
|
||||
|
||||
return output_line
|
||||
|
||||
|
||||
def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets: dict, numbers: dict, strings: dict) -> None:
|
||||
"""
|
||||
def iterative_populate_file (filename: str, paths: dict, detailed_all: dict) -> None:
|
||||
"""
|
||||
Iteratively populate files with unique functions that contain templated variables.
|
||||
|
||||
Parameters:
|
||||
filename (str) : template file to be populated
|
||||
paths (dict) : dictionary of paths per source directory
|
||||
groups (dict) : dictionary of groups
|
||||
datasets (dict) : dictionary of datasets with substitution details
|
||||
numbers (dict) : dictionary of numbers with substitution details
|
||||
strings (dict) : dictionary of strings with substitution details
|
||||
detailed_all(dict) : dictionary with substitution details with the following keys:
|
||||
'groups' : dictionary of groups with substitution details
|
||||
'datasets' : dictionary of datasets with substitution details
|
||||
'numbers' : dictionary of numbers with substitution details
|
||||
'strings' : dictionary of strings with substitution details
|
||||
|
||||
Returns:
|
||||
None
|
||||
@ -200,7 +205,7 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
|
||||
templ_path = get_template_path(filename, paths)
|
||||
filename_out = join('populated',f'pop_{filename}')
|
||||
# Note: it is important that special conditions like add_trigger above will be checked before standard triggers
|
||||
# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed
|
||||
# that contain only basic $-ed variable (like $group$). Otherwise, the standard triggers will be removed
|
||||
# from the template and the special condition will never be met.
|
||||
with open(join(templ_path,filename), 'r') as f_in :
|
||||
with open(join(templ_path,filename_out), 'a') as f_out :
|
||||
@ -209,29 +214,29 @@ def iterative_populate_file (filename: str, paths: dict, groups: dict, datasets:
|
||||
if id == 0:
|
||||
# special case for proper error handling when deallocting text groups
|
||||
error_handler = ' if (rc != TREXIO_SUCCESS) return rc;\n'
|
||||
populated_line = iterative_replace_line(line, '$group$', groups, add_line=error_handler)
|
||||
populated_line = iterative_replace_line(line, '$group$', detailed_all['groups'], add_line=error_handler)
|
||||
f_out.write(populated_line)
|
||||
elif id == 1:
|
||||
populated_line = iterative_replace_line(line, triggers[id], datasets, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['datasets'], None)
|
||||
f_out.write(populated_line)
|
||||
elif id == 2:
|
||||
populated_line = iterative_replace_line(line, triggers[id], numbers, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['numbers'], None)
|
||||
f_out.write(populated_line)
|
||||
elif id == 3:
|
||||
populated_line = iterative_replace_line(line, triggers[id], strings, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['strings'], None)
|
||||
f_out.write(populated_line)
|
||||
elif id == 4:
|
||||
populated_line = iterative_replace_line(line, triggers[id], groups, None)
|
||||
populated_line = iterative_replace_line(line, triggers[id], detailed_all['groups'], None)
|
||||
f_out.write(populated_line)
|
||||
else:
|
||||
f_out.write(line)
|
||||
|
||||
|
||||
f_out.write("\n")
|
||||
|
||||
|
||||
def iterative_replace_line (input_line: str, case: str, source: dict, add_line: str) -> str:
|
||||
"""
|
||||
Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`.
|
||||
"""
|
||||
Iterative replacer. Iteratively copy-pastes `input_line` each time with a new substitution of a templated variable depending on the `case`.
|
||||
|
||||
Parameters:
|
||||
input_line (str) : input line
|
||||
@ -241,7 +246,7 @@ def iterative_replace_line (input_line: str, case: str, source: dict, add_line:
|
||||
|
||||
Returns:
|
||||
output_block (str) : processed (replaced) block of text
|
||||
"""
|
||||
"""
|
||||
output_block = ""
|
||||
for item in source.keys():
|
||||
templine1 = input_line.replace(case.upper(), item.upper())
|
||||
@ -270,12 +275,12 @@ def check_triggers (input_line: str, triggers: list) -> int:
|
||||
if trig in input_line or trig.upper() in input_line:
|
||||
out_id = id
|
||||
return out_id
|
||||
|
||||
|
||||
return out_id
|
||||
|
||||
|
||||
def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detailed_dset: dict, detailed_numbers: dict, detailed_strings: dict) -> None:
|
||||
"""
|
||||
"""
|
||||
Special population for group-related functions in the TEXT back end.
|
||||
|
||||
Parameters:
|
||||
@ -292,8 +297,8 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
|
||||
fname_new = join('populated',f'pop_{fname}')
|
||||
templ_path = get_template_path(fname, paths)
|
||||
|
||||
triggers = ['group_dset_dtype', 'group_dset_std_dtype_out', 'group_dset_std_dtype_in',
|
||||
'group_num_dtype_double', 'group_num_std_dtype_out', 'group_num_std_dtype_in',
|
||||
triggers = ['group_dset_dtype', 'group_dset_format_printf', 'group_dset_format_scanf',
|
||||
'group_num_dtype_double', 'group_num_format_printf', 'group_num_format_scanf',
|
||||
'group_dset', 'group_num', 'group_str', 'group']
|
||||
|
||||
for group in group_dict.keys():
|
||||
@ -316,16 +321,16 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
|
||||
elif 'START REPEAT GROUP_NUM' in line or 'START REPEAT GROUP_ATTR_STR' in line:
|
||||
subloop_num = True
|
||||
continue
|
||||
|
||||
|
||||
if 'END REPEAT GROUP_DSET' in line:
|
||||
|
||||
for dset in detailed_dset.keys():
|
||||
if group != detailed_dset[dset]['group']:
|
||||
if group != detailed_dset[dset]['group']:
|
||||
continue
|
||||
|
||||
if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['dtype'] != 'char*'):
|
||||
if ('REPEAT GROUP_DSET_STR' in line) and (detailed_dset[dset]['group_dset_dtype'] != 'char*'):
|
||||
continue
|
||||
if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['dtype'] == 'char*'):
|
||||
if ('REPEAT GROUP_DSET_NUM' in line) and (detailed_dset[dset]['group_dset_dtype'] == 'char*'):
|
||||
continue
|
||||
|
||||
dset_allocated.append(dset)
|
||||
@ -351,7 +356,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
|
||||
|
||||
elif 'END REPEAT GROUP_NUM' in line:
|
||||
for dim in detailed_numbers.keys():
|
||||
if group != detailed_numbers[dim]['group']:
|
||||
if group != detailed_numbers[dim]['group']:
|
||||
continue
|
||||
|
||||
save_body = loop_body
|
||||
@ -364,7 +369,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
|
||||
|
||||
elif 'END REPEAT GROUP_ATTR_STR' in line:
|
||||
for str in detailed_strings.keys():
|
||||
if group != detailed_strings[str]['group']:
|
||||
if group != detailed_strings[str]['group']:
|
||||
continue
|
||||
|
||||
str_allocated.append(str)
|
||||
@ -390,22 +395,22 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
|
||||
continue
|
||||
|
||||
if not subloop_num and not subloop_dset:
|
||||
# NORMAL CASE WITHOUT SUBLOOPS
|
||||
# NORMAL CASE WITHOUT SUBLOOPS
|
||||
if '$group_dset' in line:
|
||||
for dset in detailed_dset.keys():
|
||||
if group != detailed_dset[dset]['group']:
|
||||
if group != detailed_dset[dset]['group']:
|
||||
continue
|
||||
populated_line = recursive_replace_line(line, triggers, detailed_dset[dset])
|
||||
f_out.write(populated_line)
|
||||
elif '$group_str' in line:
|
||||
for str in detailed_strings.keys():
|
||||
if group != detailed_strings[str]['group']:
|
||||
if group != detailed_strings[str]['group']:
|
||||
continue
|
||||
populated_line = recursive_replace_line(line, triggers, detailed_strings[str])
|
||||
f_out.write(populated_line)
|
||||
elif '$group_num$' in line:
|
||||
for dim in detailed_numbers.keys():
|
||||
if group != detailed_numbers[dim]['group']:
|
||||
if group != detailed_numbers[dim]['group']:
|
||||
continue
|
||||
populated_line = recursive_replace_line(line, triggers, detailed_numbers[dim])
|
||||
f_out.write(populated_line)
|
||||
@ -421,7 +426,7 @@ def special_populate_text_group(fname: str, paths: dict, group_dict: dict, detai
|
||||
|
||||
|
||||
def get_template_path (filename: str, path_dict: dict) -> str:
|
||||
"""
|
||||
"""
|
||||
Returns the absolute path to the directory with indicated `filename` template.
|
||||
|
||||
Parameters:
|
||||
@ -435,12 +440,12 @@ def get_template_path (filename: str, path_dict: dict) -> str:
|
||||
if dir_type in filename:
|
||||
path = path_dict[dir_type]
|
||||
return path
|
||||
|
||||
|
||||
raise ValueError('Filename should contain one of the keywords')
|
||||
|
||||
|
||||
def get_group_dict (configuration: dict) -> dict:
|
||||
"""
|
||||
"""
|
||||
Returns the dictionary of all groups.
|
||||
|
||||
Parameters:
|
||||
@ -456,10 +461,126 @@ def get_group_dict (configuration: dict) -> dict:
|
||||
return group_dict
|
||||
|
||||
|
||||
def get_dtype_dict (dtype: str, target: str, rank = None, int_len_printf = None) -> dict:
|
||||
"""
|
||||
Returns the dictionary of dtype-related templated variables set for a given `dtype`.
|
||||
Keys are names of templated variables, values are strings to be used by the generator.
|
||||
|
||||
Parameters:
|
||||
dtype (str) : dtype corresponding to the trex.json (i.e. int/dim/float/float sparse/str)
|
||||
target (str) : `num` or `dset`
|
||||
rank (int) : [optional] value of n in n-index (sparse) dset; needed to build the printf/scanf format string
|
||||
int_len_printf(dict): [optional]
|
||||
keys: precision (e.g. 32 for int32_t)
|
||||
values: lengths reserved for one index when printing n-index (sparse) dset (e.g. 10 for int32_t)
|
||||
|
||||
Returns:
|
||||
dtype_dict (dict) : dictionary dtype-related substitutions
|
||||
"""
|
||||
if not target in ['num', 'dset']:
|
||||
raise Exception('Only num or dset target can be set.')
|
||||
if 'sparse' in dtype:
|
||||
if rank is None or int_len_printf is None:
|
||||
raise Exception("Both rank and int_len_printf arguments has to be provided to build the dtype_dict for sparse data.")
|
||||
if rank is not None and rank <= 1:
|
||||
raise Exception('Rank of sparse quantity cannot be lower than 2.')
|
||||
if int_len_printf is not None and not isinstance(int_len_printf, dict):
|
||||
raise Exception('int_len_printf has to be a dictionary of lengths for different precisions.')
|
||||
|
||||
dtype_dict = {}
|
||||
# set up the key-value pairs dependending on the dtype
|
||||
if dtype == 'float':
|
||||
dtype_dict.update({
|
||||
'default_prec' : '64',
|
||||
f'group_{target}_dtype' : 'double',
|
||||
f'group_{target}_h5_dtype' : 'native_double',
|
||||
f'group_{target}_f_dtype_default' : 'real(8)',
|
||||
f'group_{target}_f_dtype_double' : 'real(8)',
|
||||
f'group_{target}_f_dtype_single' : 'real(4)',
|
||||
f'group_{target}_dtype_default' : 'double',
|
||||
f'group_{target}_dtype_double' : 'double',
|
||||
f'group_{target}_dtype_single' : 'float',
|
||||
f'group_{target}_format_printf' : '24.16e',
|
||||
f'group_{target}_format_scanf' : 'lf',
|
||||
f'group_{target}_py_dtype' : 'float'
|
||||
})
|
||||
elif dtype in ['int', 'dim', 'index']:
|
||||
dtype_dict.update({
|
||||
'default_prec' : '32',
|
||||
f'group_{target}_dtype' : 'int64_t',
|
||||
f'group_{target}_h5_dtype' : 'native_int64',
|
||||
f'group_{target}_f_dtype_default' : 'integer(4)',
|
||||
f'group_{target}_f_dtype_double' : 'integer(8)',
|
||||
f'group_{target}_f_dtype_single' : 'integer(4)',
|
||||
f'group_{target}_dtype_default' : 'int32_t',
|
||||
f'group_{target}_dtype_double' : 'int64_t',
|
||||
f'group_{target}_dtype_single' : 'int32_t',
|
||||
f'group_{target}_format_printf' : '" PRId64 "',
|
||||
f'group_{target}_format_scanf' : '" SCNd64 "',
|
||||
f'group_{target}_py_dtype' : 'int'
|
||||
})
|
||||
elif dtype == 'str':
|
||||
dtype_dict.update({
|
||||
'default_prec' : '',
|
||||
f'group_{target}_dtype' : 'char*',
|
||||
f'group_{target}_h5_dtype' : '',
|
||||
f'group_{target}_f_dtype_default': '',
|
||||
f'group_{target}_f_dtype_double' : '',
|
||||
f'group_{target}_f_dtype_single' : '',
|
||||
f'group_{target}_dtype_default' : 'char*',
|
||||
f'group_{target}_dtype_double' : '',
|
||||
f'group_{target}_dtype_single' : '',
|
||||
f'group_{target}_format_printf' : 's',
|
||||
f'group_{target}_format_scanf' : 's',
|
||||
f'group_{target}_py_dtype' : 'str'
|
||||
})
|
||||
elif 'sparse' in dtype:
|
||||
# build format string for n-index sparse quantity
|
||||
item_printf_8 = f'%{int_len_printf[8]}" PRIu8 " '
|
||||
item_printf_16 = f'%{int_len_printf[16]}" PRIu16 " '
|
||||
item_printf_32 = f'%{int_len_printf[32]}" PRId32 " '
|
||||
item_scanf = '%" SCNd32 " '
|
||||
group_dset_format_printf_8 = '"'
|
||||
group_dset_format_printf_16 = '"'
|
||||
group_dset_format_printf_32 = '"'
|
||||
group_dset_format_scanf = ''
|
||||
for i in range(rank):
|
||||
group_dset_format_printf_8 += item_printf_8
|
||||
group_dset_format_printf_16 += item_printf_16
|
||||
group_dset_format_printf_32 += item_printf_32
|
||||
group_dset_format_scanf += item_scanf
|
||||
# append the format string for float values
|
||||
group_dset_format_printf_8 += '%24.16e" '
|
||||
group_dset_format_printf_16 += '%24.16e" '
|
||||
group_dset_format_printf_32 += '%24.16e" '
|
||||
group_dset_format_scanf += '%lf'
|
||||
|
||||
# set up the dictionary for sparse
|
||||
dtype_dict.update({
|
||||
'default_prec' : '',
|
||||
f'group_{target}_dtype' : 'double',
|
||||
f'group_{target}_h5_dtype' : '',
|
||||
f'group_{target}_f_dtype_default': '',
|
||||
f'group_{target}_f_dtype_double' : '',
|
||||
f'group_{target}_f_dtype_single' : '',
|
||||
f'group_{target}_dtype_default' : '',
|
||||
f'group_{target}_dtype_double' : '',
|
||||
f'group_{target}_dtype_single' : '',
|
||||
f'sparse_format_printf_8' : group_dset_format_printf_8,
|
||||
f'sparse_format_printf_16' : group_dset_format_printf_16,
|
||||
f'sparse_format_printf_32' : group_dset_format_printf_32,
|
||||
f'group_{target}_format_scanf' : group_dset_format_scanf,
|
||||
f'group_{target}_py_dtype' : ''
|
||||
})
|
||||
|
||||
return dtype_dict
|
||||
|
||||
|
||||
|
||||
def get_detailed_num_dict (configuration: dict) -> dict:
|
||||
"""
|
||||
"""
|
||||
Returns the dictionary of all `num`-suffixed variables.
|
||||
Keys are names, values are subdictionaries containing corresponding group and group_num names.
|
||||
Keys are names, values are subdictionaries containing corresponding group and group_num names.
|
||||
|
||||
Parameters:
|
||||
configuration (dict) : configuration from `trex.json`
|
||||
@ -472,40 +593,17 @@ def get_detailed_num_dict (configuration: dict) -> dict:
|
||||
for k2,v2 in v1.items():
|
||||
if len(v2[1]) == 0:
|
||||
tmp_num = f'{k1}_{k2}'
|
||||
if 'str' not in v2[0]:
|
||||
if not 'str' in v2[0]:
|
||||
tmp_dict = {}
|
||||
tmp_dict['group'] = k1
|
||||
tmp_dict['group_num'] = tmp_num
|
||||
num_dict[tmp_num] = tmp_dict
|
||||
|
||||
# TODO the arguments below are almost the same as for group_dset (except for trex_json_int_type) and can be exported from somewhere
|
||||
if v2[0] == 'float':
|
||||
tmp_dict['datatype'] = 'double'
|
||||
tmp_dict['group_num_h5_dtype'] = 'native_double'
|
||||
tmp_dict['group_num_f_dtype_default']= 'real(8)'
|
||||
tmp_dict['group_num_f_dtype_double'] = 'real(8)'
|
||||
tmp_dict['group_num_f_dtype_single'] = 'real(4)'
|
||||
tmp_dict['group_num_dtype_default']= 'double'
|
||||
tmp_dict['group_num_dtype_double'] = 'double'
|
||||
tmp_dict['group_num_dtype_single'] = 'float'
|
||||
tmp_dict['default_prec'] = '64'
|
||||
tmp_dict['group_num_std_dtype_out'] = '24.16e'
|
||||
tmp_dict['group_num_std_dtype_in'] = 'lf'
|
||||
tmp_dict['group_num_py_dtype'] = 'float'
|
||||
elif v2[0] in ['int', 'dim']:
|
||||
tmp_dict['datatype'] = 'int64_t'
|
||||
tmp_dict['group_num_h5_dtype'] = 'native_int64'
|
||||
tmp_dict['group_num_f_dtype_default']= 'integer(4)'
|
||||
tmp_dict['group_num_f_dtype_double'] = 'integer(8)'
|
||||
tmp_dict['group_num_f_dtype_single'] = 'integer(4)'
|
||||
tmp_dict['group_num_dtype_default']= 'int32_t'
|
||||
tmp_dict['group_num_dtype_double'] = 'int64_t'
|
||||
tmp_dict['group_num_dtype_single'] = 'int32_t'
|
||||
tmp_dict['default_prec'] = '32'
|
||||
tmp_dict['group_num_std_dtype_out'] = '" PRId64 "'
|
||||
tmp_dict['group_num_std_dtype_in'] = '" SCNd64 "'
|
||||
tmp_dict['group_num_py_dtype'] = 'int'
|
||||
tmp_dict.update(get_dtype_dict(v2[0], 'num'))
|
||||
if v2[0] in ['int', 'dim']:
|
||||
tmp_dict['trex_json_int_type'] = v2[0]
|
||||
else:
|
||||
tmp_dict['trex_json_int_type'] = ''
|
||||
|
||||
return num_dict
|
||||
|
||||
@ -536,8 +634,8 @@ def get_detailed_str_dict (configuration: dict) -> dict:
|
||||
|
||||
|
||||
def get_dset_dict (configuration: dict) -> dict:
|
||||
"""
|
||||
Returns the dictionary of datasets.
|
||||
"""
|
||||
Returns the dictionary of datasets.
|
||||
Keys are names, values are lists containing datatype, list of dimensions and group name
|
||||
|
||||
Parameters:
|
||||
@ -559,8 +657,8 @@ def get_dset_dict (configuration: dict) -> dict:
|
||||
|
||||
|
||||
def split_dset_dict_detailed (datasets: dict) -> tuple:
|
||||
"""
|
||||
Returns the detailed dictionary of datasets.
|
||||
"""
|
||||
Returns the detailed dictionary of datasets.
|
||||
Keys are names, values are subdictionaries containing substitutes for templated variables
|
||||
|
||||
Parameters:
|
||||
@ -571,106 +669,106 @@ def split_dset_dict_detailed (datasets: dict) -> tuple:
|
||||
"""
|
||||
dset_numeric_dict = {}
|
||||
dset_string_dict = {}
|
||||
dset_sparse_dict = {}
|
||||
for k,v in datasets.items():
|
||||
# create a temp dictionary
|
||||
tmp_dict = {}
|
||||
# specify details required to replace templated variables later
|
||||
if v[0] == 'float':
|
||||
datatype = 'double'
|
||||
group_dset_h5_dtype = 'native_double'
|
||||
group_dset_f_dtype_default= 'real(8)'
|
||||
group_dset_f_dtype_double = 'real(8)'
|
||||
group_dset_f_dtype_single = 'real(4)'
|
||||
group_dset_dtype_default= 'double'
|
||||
group_dset_dtype_double = 'double'
|
||||
group_dset_dtype_single = 'float'
|
||||
default_prec = '64'
|
||||
group_dset_std_dtype_out = '24.16e'
|
||||
group_dset_std_dtype_in = 'lf'
|
||||
group_dset_py_dtype = 'float'
|
||||
elif v[0] in ['int', 'index']:
|
||||
datatype = 'int64_t'
|
||||
group_dset_h5_dtype = 'native_int64'
|
||||
group_dset_f_dtype_default= 'integer(4)'
|
||||
group_dset_f_dtype_double = 'integer(8)'
|
||||
group_dset_f_dtype_single = 'integer(4)'
|
||||
group_dset_dtype_default= 'int32_t'
|
||||
group_dset_dtype_double = 'int64_t'
|
||||
group_dset_dtype_single = 'int32_t'
|
||||
default_prec = '32'
|
||||
group_dset_std_dtype_out = '" PRId64 "'
|
||||
group_dset_std_dtype_in = '" SCNd64 "'
|
||||
group_dset_py_dtype = 'int'
|
||||
elif v[0] == 'str':
|
||||
datatype = 'char*'
|
||||
group_dset_h5_dtype = ''
|
||||
group_dset_f_dtype_default = ''
|
||||
group_dset_f_dtype_double = ''
|
||||
group_dset_f_dtype_single = ''
|
||||
group_dset_dtype_default = 'char*'
|
||||
group_dset_dtype_double = ''
|
||||
group_dset_dtype_single = ''
|
||||
default_prec = ''
|
||||
group_dset_std_dtype_out = 's'
|
||||
group_dset_std_dtype_in = 's'
|
||||
group_dset_py_dtype = 'str'
|
||||
|
||||
# add the dset name for templates
|
||||
rank = len(v[1])
|
||||
datatype = v[0]
|
||||
|
||||
# define whether the dset is sparse
|
||||
is_sparse = False
|
||||
int_len_printf = {}
|
||||
if 'sparse' in datatype:
|
||||
is_sparse = True
|
||||
int_len_printf[32] = 10
|
||||
int_len_printf[16] = 5
|
||||
int_len_printf[8] = 3
|
||||
|
||||
# get the dtype-related substitutions required to replace templated variables later
|
||||
if not is_sparse:
|
||||
dtype_dict = get_dtype_dict(datatype, 'dset')
|
||||
else:
|
||||
dtype_dict = get_dtype_dict(datatype, 'dset', rank, int_len_printf)
|
||||
|
||||
tmp_dict.update(dtype_dict)
|
||||
|
||||
# set the group_dset key to the full name of the dset
|
||||
tmp_dict['group_dset'] = k
|
||||
# add flag to detect index types
|
||||
if 'index' == v[0]:
|
||||
if 'index' in datatype:
|
||||
tmp_dict['is_index'] = 'file->one_based'
|
||||
else:
|
||||
tmp_dict['is_index'] = 'false'
|
||||
# add the datatypes for templates
|
||||
tmp_dict['dtype'] = datatype
|
||||
tmp_dict['group_dset_dtype'] = datatype
|
||||
tmp_dict['group_dset_h5_dtype'] = group_dset_h5_dtype
|
||||
tmp_dict['group_dset_f_dtype_default'] = group_dset_f_dtype_default
|
||||
tmp_dict['group_dset_f_dtype_double'] = group_dset_f_dtype_double
|
||||
tmp_dict['group_dset_f_dtype_single'] = group_dset_f_dtype_single
|
||||
tmp_dict['group_dset_dtype_default'] = group_dset_dtype_default
|
||||
tmp_dict['group_dset_dtype_double'] = group_dset_dtype_double
|
||||
tmp_dict['group_dset_dtype_single'] = group_dset_dtype_single
|
||||
tmp_dict['default_prec'] = default_prec
|
||||
tmp_dict['group_dset_std_dtype_in'] = group_dset_std_dtype_in
|
||||
tmp_dict['group_dset_std_dtype_out'] = group_dset_std_dtype_out
|
||||
tmp_dict['group_dset_py_dtype'] = group_dset_py_dtype
|
||||
|
||||
# add the rank
|
||||
tmp_dict['rank'] = len(v[1])
|
||||
tmp_dict['group_dset_rank'] = str(tmp_dict['rank'])
|
||||
tmp_dict['rank'] = rank
|
||||
tmp_dict['group_dset_rank'] = str(rank)
|
||||
# add the list of dimensions
|
||||
tmp_dict['dims'] = [dim.replace('.','_') for dim in v[1]]
|
||||
# build a list of dimensions to be inserted in the dims array initialization, e.g. {ao_num, ao_num}
|
||||
dim_list = tmp_dict['dims'][0]
|
||||
if tmp_dict['rank'] > 1:
|
||||
for i in range(1, tmp_dict['rank']):
|
||||
if rank > 1:
|
||||
for i in range(1, rank):
|
||||
dim_toadd = tmp_dict['dims'][i]
|
||||
dim_list += f', {dim_toadd}'
|
||||
|
||||
|
||||
tmp_dict['group_dset_dim_list'] = dim_list
|
||||
|
||||
if tmp_dict['rank'] == 0:
|
||||
if rank == 0:
|
||||
dim_f_list = ""
|
||||
else:
|
||||
dim_f_list = "(*)"
|
||||
tmp_dict['group_dset_f_dims'] = dim_f_list
|
||||
|
||||
if is_sparse:
|
||||
# store the max possible dim of the sparse dset (e.g. mo_num)
|
||||
tmp_dict['group_dset_sparse_dim'] = tmp_dict['dims'][0]
|
||||
# build printf/scanf sequence and compute line length for n-index sparse quantity
|
||||
index_printf = f'*(index_sparse + {str(rank)}*i'
|
||||
index_scanf = f'index_sparse + {str(rank)}*i'
|
||||
# one index item consumes up to index_length characters (int32_len_printf for int32 + 1 for space)
|
||||
group_dset_sparse_indices_printf = index_printf + ')'
|
||||
group_dset_sparse_indices_scanf = index_scanf
|
||||
sparse_line_length_32 = int_len_printf[32] + 1
|
||||
sparse_line_length_16 = int_len_printf[16] + 1
|
||||
sparse_line_length_8 = int_len_printf[8] + 1
|
||||
# loop from 1 because we already have stored one index
|
||||
for index_count in range(1,rank):
|
||||
group_dset_sparse_indices_printf += f', {index_printf} + {index_count})'
|
||||
group_dset_sparse_indices_scanf += f', {index_scanf} + {index_count}'
|
||||
sparse_line_length_32 += int_len_printf[32] + 1
|
||||
sparse_line_length_16 += int_len_printf[16] + 1
|
||||
sparse_line_length_8 += int_len_printf[8] + 1
|
||||
|
||||
# add 24 chars occupied by the floating point value of sparse dataset + 1 char for "\n"
|
||||
sparse_line_length_32 += 24 + 1
|
||||
sparse_line_length_16 += 24 + 1
|
||||
sparse_line_length_8 += 24 + 1
|
||||
|
||||
tmp_dict['sparse_line_length_32'] = str(sparse_line_length_32)
|
||||
tmp_dict['sparse_line_length_16'] = str(sparse_line_length_16)
|
||||
tmp_dict['sparse_line_length_8'] = str(sparse_line_length_8)
|
||||
tmp_dict['group_dset_sparse_indices_printf'] = group_dset_sparse_indices_printf
|
||||
tmp_dict['group_dset_sparse_indices_scanf'] = group_dset_sparse_indices_scanf
|
||||
|
||||
# add group name as a key-value pair to the dset dict
|
||||
tmp_dict['group'] = v[2]
|
||||
|
||||
# split datasets in numeric- and string- based
|
||||
if (datatype == 'char*'):
|
||||
if 'str' in datatype:
|
||||
dset_string_dict[k] = tmp_dict
|
||||
elif is_sparse:
|
||||
dset_sparse_dict[k] = tmp_dict
|
||||
else:
|
||||
dset_numeric_dict[k] = tmp_dict
|
||||
|
||||
return (dset_numeric_dict, dset_string_dict)
|
||||
return (dset_numeric_dict, dset_string_dict, dset_sparse_dict)
|
||||
|
||||
|
||||
def check_dim_consistency(num: dict, dset: dict) -> None:
|
||||
"""
|
||||
Consistency check to make sure that each dimensioning variable exists as a num attribute of some group.
|
||||
"""
|
||||
Consistency check to make sure that each dimensioning variable exists as a num attribute of some group.
|
||||
|
||||
Parameters:
|
||||
num (dict) : dictionary of numerical attributes
|
||||
|
94
trex.org
94
trex.org
@ -2,32 +2,40 @@
|
||||
#+STARTUP: latexpreview
|
||||
#+SETUPFILE: docs/theme.setup
|
||||
|
||||
This page contains information about the general structure of the
|
||||
TREXIO library. The source code of the library can be automatically
|
||||
generated based on the contents of the ~trex.json~ configuration file,
|
||||
which itself is compiled from different sections (groups) presented below.
|
||||
This page contains information about the general structure of the
|
||||
TREXIO library. The source code of the library can be automatically
|
||||
generated based on the contents of the ~trex.json~ configuration file,
|
||||
which itself is compiled from different sections (groups) presented
|
||||
below.
|
||||
|
||||
For more information about the automatic generation on the source code
|
||||
or regarding possible modifications, please contact the TREXIO developers.
|
||||
For more information about the automatic generation on the source code
|
||||
or regarding possible modifications, please contact the TREXIO
|
||||
developers.
|
||||
|
||||
All quantities are saved in TREXIO file in atomic units.
|
||||
The dimensions of the arrays in the tables below are given in
|
||||
column-major order (as in Fortran), and the ordering of the dimensions
|
||||
is reversed in the produced ~trex.json~ configuration file as the library is
|
||||
All quantities are saved in TREXIO file in atomic units. The
|
||||
dimensions of the arrays in the tables below are given in column-major
|
||||
order (as in Fortran), and the ordering of the dimensions is reversed
|
||||
in the produced ~trex.json~ configuration file as the library is
|
||||
written in C.
|
||||
|
||||
TREXIO currently supports ~int~, ~float~ and ~str~ types for both single attributes and arrays.
|
||||
Note, that some attributes might have ~dim~ type (e.g. ~num~ of the ~nucleus~ group).
|
||||
This type is treated exactly the same as ~int~ with the only difference that ~dim~ variables
|
||||
cannot be negative or zero. This additional constraint is required because ~dim~ attributes
|
||||
are used internally to allocate memory and to check array boundaries in the memory-safe API.
|
||||
Most of the times, the ~dim~ variables contain ~num~ suffix.
|
||||
|
||||
TREXIO currently supports ~int~, ~float~ and ~str~ types for both
|
||||
single attributes and arrays. Note, that some attributes might have
|
||||
~dim~ type (e.g. ~num~ of the ~nucleus~ group). This type is treated
|
||||
exactly the same as ~int~ with the only difference that ~dim~
|
||||
variables cannot be negative. This additional constraint is required
|
||||
because ~dim~ attributes are used internally to allocate memory and to
|
||||
check array boundaries in the memory-safe API. Most of the times, the
|
||||
~dim~ variables contain the ~num~ suffix.
|
||||
|
||||
In Fortran, the arrays are 1-based and in most other languages the
|
||||
arrays are 0-based. Hence, we introduce the ~index~ type which is an
|
||||
1-based ~int~ in the Fortran interface and 0-based otherwise.
|
||||
|
||||
For sparse data structures such as electron replusion integrals,
|
||||
the data can be too large to fit in memory and the data needs to be
|
||||
fetched using multiple function calls to perform I/O on buffers.
|
||||
|
||||
|
||||
#+begin_src python :tangle trex.json :exports none
|
||||
{
|
||||
#+end_src
|
||||
@ -78,14 +86,14 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
|
||||
|
||||
#+CALL: json(data=electron, title="electron")
|
||||
#+RESULTS:
|
||||
:RESULTS:
|
||||
:results:
|
||||
#+begin_src python :tangle trex.json
|
||||
"electron": {
|
||||
"up_num" : [ "int", [] ]
|
||||
, "dn_num" : [ "int", [] ]
|
||||
"up_num" : [ "int", [] ]
|
||||
, "dn_num" : [ "int", [] ]
|
||||
} ,
|
||||
#+end_src
|
||||
:END:
|
||||
:end:
|
||||
|
||||
* Nucleus (nucleus group)
|
||||
|
||||
@ -100,20 +108,22 @@ arrays are 0-based. Hence, we introduce the ~index~ type which is an
|
||||
| ~coord~ | ~float~ | ~(3,nucleus.num)~ | Coordinates of the atoms |
|
||||
| ~label~ | ~str~ | ~(nucleus.num)~ | Atom labels |
|
||||
| ~point_group~ | ~str~ | | Symmetry point group |
|
||||
| ~repulsion~ | ~float~ | | Nuclear repulsion energy |
|
||||
|
||||
#+CALL: json(data=nucleus, title="nucleus")
|
||||
#+RESULTS:
|
||||
:RESULTS:
|
||||
:results:
|
||||
#+begin_src python :tangle trex.json
|
||||
"nucleus": {
|
||||
"num" : [ "dim" , [] ]
|
||||
, "charge" : [ "float", [ "nucleus.num" ] ]
|
||||
, "coord" : [ "float", [ "nucleus.num", "3" ] ]
|
||||
, "label" : [ "str" , [ "nucleus.num" ] ]
|
||||
, "point_group" : [ "str" , [] ]
|
||||
"num" : [ "dim" , [] ]
|
||||
, "charge" : [ "float", [ "nucleus.num" ] ]
|
||||
, "coord" : [ "float", [ "nucleus.num", "3" ] ]
|
||||
, "label" : [ "str" , [ "nucleus.num" ] ]
|
||||
, "point_group" : [ "str" , [] ]
|
||||
, "repulsion" : [ "float", [] ]
|
||||
} ,
|
||||
#+end_src
|
||||
:END:
|
||||
:end:
|
||||
|
||||
* Effective core potentials (ecp group)
|
||||
|
||||
@ -617,15 +627,18 @@ prim_factor =
|
||||
:end:
|
||||
|
||||
* TODO Slater determinants
|
||||
* TODO Reduced density matrices (rdm group)
|
||||
* Reduced density matrices (rdm group)
|
||||
|
||||
#+NAME: rdm
|
||||
| Variable | Type | Dimensions | Description |
|
||||
|------------+----------------+------------------------------------+-------------|
|
||||
| ~one_e~ | ~float~ | ~(mo.num, mo.num)~ | |
|
||||
| ~one_e_up~ | ~float~ | ~(mo.num, mo.num)~ | |
|
||||
| ~one_e_dn~ | ~float~ | ~(mo.num, mo.num)~ | |
|
||||
| ~two_e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | |
|
||||
| Variable | Type | Dimensions | Description |
|
||||
|-----------+----------------+------------------------------------+-----------------------------------------------------------------------|
|
||||
| ~1e~ | ~float~ | ~(mo.num, mo.num)~ | One body density matrix |
|
||||
| ~1e_up~ | ~float~ | ~(mo.num, mo.num)~ | \uparrow-spin component of the one body density matrix |
|
||||
| ~1e_dn~ | ~float~ | ~(mo.num, mo.num)~ | \downarrow-spin component of the one body density matrix |
|
||||
| ~2e~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | Two-body reduced density matrix (spin trace) |
|
||||
| ~2e_upup~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\uparrow component of the two-body reduced density matrix |
|
||||
| ~2e_dndn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \downarrow\downarrow component of the two-body reduced density matrix |
|
||||
| ~2e_updn~ | ~float sparse~ | ~(mo.num, mo.num, mo.num, mo.num)~ | \uparrow\downarrow component of the two-body reduced density matrix |
|
||||
|
||||
#+CALL: json(data=rdm, title="rdm", last=1)
|
||||
|
||||
@ -633,10 +646,13 @@ prim_factor =
|
||||
:results:
|
||||
#+begin_src python :tangle trex.json
|
||||
"rdm": {
|
||||
"one_e" : [ "float" , [ "mo.num", "mo.num" ] ]
|
||||
, "one_e_up" : [ "float" , [ "mo.num", "mo.num" ] ]
|
||||
, "one_e_dn" : [ "float" , [ "mo.num", "mo.num" ] ]
|
||||
, "two_e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
|
||||
"1e" : [ "float" , [ "mo.num", "mo.num" ] ]
|
||||
, "1e_up" : [ "float" , [ "mo.num", "mo.num" ] ]
|
||||
, "1e_dn" : [ "float" , [ "mo.num", "mo.num" ] ]
|
||||
, "2e" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
|
||||
, "2e_upup" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
|
||||
, "2e_dndn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
|
||||
, "2e_updn" : [ "float sparse", [ "mo.num", "mo.num", "mo.num", "mo.num" ] ]
|
||||
}
|
||||
#+end_src
|
||||
:end:
|
||||
|
Loading…
Reference in New Issue
Block a user