From 8e44af311b5c47a0bf5bd99a212a925a9565a5cd Mon Sep 17 00:00:00 2001 From: q-posev <45995097+q-posev@users.noreply.github.com> Date: Fri, 17 Dec 2021 16:14:30 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20TREX-CoE?= =?UTF-8?q?/trexio@8ca74ffef14080237db3c3408fadf1cb9f2c6687=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.html | 4 +- Sparse.html | 285 -------------- examples.html | 692 ++++++++++++++++++++++++++++++++ index.html | 4 +- templator_front.html | 919 +++++++++++++++++++++++++++---------------- templator_hdf5.html | 525 ++++++++++++++++++++++-- templator_text.html | 672 ++++++++++++++----------------- trex.html | 204 ++++++---- 8 files changed, 2173 insertions(+), 1132 deletions(-) delete mode 100644 Sparse.html create mode 100644 examples.html diff --git a/README.html b/README.html index cf7c013..540e821 100644 --- a/README.html +++ b/README.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + @@ -347,7 +347,7 @@ and bug reports should be submitted at

Author: TREX-CoE

-

Created: 2021-12-15 Wed 12:52

+

Created: 2021-12-17 Fri 16:14

Validate

diff --git a/Sparse.html b/Sparse.html deleted file mode 100644 index 0b5e966..0000000 --- a/Sparse.html +++ /dev/null @@ -1,285 +0,0 @@ - - - - - - - - - - - - - -
-
-

Table of Contents

- -
-

-See templatorfront.org -

- -
-

1 Text back end

-
-

-As the size of the dataset should be extensible, the simplest -solution is to use one file for each sparse data set, and store a -the name of this file in the group. -Each integral can be a line in the file: -i j k l x -which can be read with "%10ld %10ld %10ld %10ld %24.16e". -The offset can be used with fseek(69L*offset, SEEK_SET) -

-
-
- -
-

2 HDF5 Back end

-
-

-We need to declare the number of rows of the dataset as -UNLIMITED. This requires to use the Chunked storage, and the -chunks should absolutely not be larger than 1MB. -

- -

-To extend the storage, see : -https://support.hdfgroup.org/HDF5/doc1.6/UG/10_Datasets.html -(figure 17) -

- -

-If the offset+num > nmax, we need to extend the dataset. -

-
-
-
-
-

Created: 2021-12-15 Wed 12:52

-

Validate

-
- - diff --git a/examples.html b/examples.html new file mode 100644 index 0000000..7f73366 --- /dev/null +++ b/examples.html @@ -0,0 +1,692 @@ + + + + + + + +Examples + + + + + + + + + + + + + +
+ UP + | + HOME +
+

Examples

+
+

Table of Contents

+ +
+ + +
+

1 Accessing sparse quantities

+
+
+
+

1.1 Fortran

+
+
+
program print_energy
+  use trexio
+  implicit none
+
+  character*(128)  :: filename   ! Name of the input file
+  integer          :: rc         ! Return code for error checking
+  integer(8)       :: f          ! TREXIO file handle
+  character*(128)  :: err_msg    ! Error message
+
+
+ +

+This program computes the energy as: +

+ +

+\[ + E = E_{\text{NN}} + \sum_{ij} D_{ij}\, \langle i | h | j \rangle\, + +\, \frac{1}{2} \sum_{ijkl} \Gamma_{ijkl}\, \langle i j | k l + \rangle\; \textrm{ with } \; 0 < i,j,k,l \le n + \] +

+ +

+One needs to read from the TREXIO file: +

+ +
+
\(n\)
The number of molecular orbitals
+
\(E_{\text{NN}}\)
The nuclear repulsion energy
+
\(D_{ij}\)
The one-body reduced density matrix
+
\(\langle i |h| j \rangle\)
The one-electron Hamiltonian integrals
+
\(\Gamma_{ijkl}\)
The two-body reduced density matrix
+
\(\langle i j | k l \rangle\)
The electron repulsion integrals
+
+ +
+
integer                       :: n
+double precision              :: E, E_nn
+double precision, allocatable :: D(:,:), h0(:,:)
+double precision, allocatable :: G(:,:,:,:), W(:,:,:,:)
+
+
+
+ +
+

1.1.1 Declare Temporary variables

+
+
+
integer                       :: i, j, k, l, m
+integer(8), parameter         :: BUFSIZE = 100000_8
+integer(8)                    :: offset, icount, size_max
+integer                       :: buffer_index(4,BUFSIZE)
+double precision              :: buffer_values(BUFSIZE)
+
+double precision, external    :: ddot   ! BLAS dot product
+
+
+
+
+ +
+

1.1.2 Obtain the name of the TREXIO file from the command line, and open it for reading

+
+
+
call getarg(1, filename)
+
+f = trexio_open (filename, 'r', TREXIO_HDF5, rc)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error opening TREXIO file: '//trim(err_msg)
+   stop
+end if
+
+
+
+
+ +
+

1.1.3 Read the nuclear repulsion energy

+
+
+
rc = trexio_read_nucleus_repulsion(f, E_nn)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error reading nuclear repulsion: '//trim(err_msg)
+   stop
+end if
+
+
+
+
+ +
+

1.1.4 Read the number of molecular orbitals

+
+
+
rc = trexio_read_mo_num(f, n)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error reading number of MOs: '//trim(err_msg)
+   stop
+end if
+
+
+
+
+ +
+

1.1.5 Allocate memory

+
+
+
allocate( D(n,n), h0(n,n) )
+allocate( G(n,n,n,n), W(n,n,n,n) )
+G(:,:,:,:) = 0.d0
+W(:,:,:,:) = 0.d0
+
+
+
+
+ +
+

1.1.6 Read one-electron quantities

+
+
+
rc = trexio_has_mo_1e_int_core_hamiltonian(f)
+if (rc /= TREXIO_SUCCESS) then
+   stop 'No core hamiltonian in file'
+end if
+
+rc = trexio_read_mo_1e_int_core_hamiltonian(f, h0)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error reading core Hamiltonian: '//trim(err_msg)
+   stop
+end if
+
+
+rc = trexio_has_rdm_1e(f)
+if (rc /= TREXIO_SUCCESS) then
+   stop 'No 1e RDM in file'
+end if
+
+rc = trexio_read_rdm_1e(f, D)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error reading one-body RDM: '//trim(err_msg)
+   stop
+end if
+
+
+
+
+ +
+

1.1.7 Read two-electron quantities

+
+

+Reading is done with OpenMP. Each thread reads its own buffer, and +the buffers are then processed in parallel. +

+ +

+Reading the file requires a lock, so it is done in a critical +section. The offset variable is shared, and it is incremented in +the critical section. For each read, the function returns in +icount the number of read integrals, so this variable needs also +to be protected in the critical section when modified. +

+
+ +
+
1.1.7.1 Electron repulsion integrals
+
+
+
rc = trexio_has_mo_2e_int_eri(f)
+if (rc /= TREXIO_SUCCESS) then
+   stop 'No electron repulsion integrals in file'
+end if
+
+rc = trexio_read_mo_2e_int_eri_size (f, size_max)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error reading number of ERIs: '//trim(err_msg)
+   stop
+end if
+
+offset = 0_8
+!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
+!$OMP   buffer_index, buffer_values, m)
+icount = BUFSIZE
+do while (icount == BUFSIZE)
+  !$OMP CRITICAL
+  if (offset < size_max) then
+    rc = trexio_read_mo_2e_int_eri(f, offset, icount, buffer_index, buffer_values)
+    offset = offset + icount
+  else
+    icount = 0
+  end if
+  !$OMP END CRITICAL
+  do m=1,icount
+    i = buffer_index(1,m)
+    j = buffer_index(2,m)
+    k = buffer_index(3,m)
+    l = buffer_index(4,m)
+    W(i,j,k,l) = buffer_values(m)
+    W(k,j,i,l) = buffer_values(m)
+    W(i,l,k,j) = buffer_values(m)
+    W(k,l,i,j) = buffer_values(m)
+    W(j,i,l,k) = buffer_values(m)
+    W(j,k,l,i) = buffer_values(m)
+    W(l,i,j,k) = buffer_values(m)
+    W(l,k,j,i) = buffer_values(m)
+  end do
+end do
+!$OMP END PARALLEL
+
+
+
+
+ +
+
1.1.7.2 Reduced density matrix
+
+
+
rc = trexio_has_rdm_2e(f)
+if (rc /= TREXIO_SUCCESS) then
+   stop 'No two-body density matrix in file'
+end if
+
+rc = trexio_read_rdm_2e_size (f, size_max)
+if (rc /= TREXIO_SUCCESS) then
+   call trexio_string_of_error(rc, err_msg)
+   print *, 'Error reading number of 2-RDM elements: '//trim(err_msg)
+   stop
+end if
+
+offset = 0_8
+!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(icount, i, j, k, l, &
+!$OMP   buffer_index, buffer_values, m)
+icount = bufsize
+do while (offset < size_max)
+  !$OMP CRITICAL
+  if (offset < size_max) then
+    rc = trexio_read_rdm_2e(f, offset, icount, buffer_index, buffer_values)
+    offset = offset + icount
+  else
+    icount = 0
+  end if
+  !$OMP END CRITICAL
+  do m=1,icount
+    i = buffer_index(1,m)
+    j = buffer_index(2,m)
+    k = buffer_index(3,m)
+    l = buffer_index(4,m)
+    G(i,j,k,l) = buffer_values(m)
+  end do
+end do
+!$OMP END PARALLEL
+
+
+
+
+
+
+ +
+

1.1.8 Compute the energy

+
+

+As \((n,m)\) 2D arrays are stored in memory as \((\n times m)\) 1D +arrays, we could pass the matrices to the ddot BLAS function to +perform the summations in a single call for the 1-electron quantities. +Instead, we prefer to interleave the 1-electron (negative) and +2-electron (positive) summations to have a better cancellation of +numerical errors. +

+ +

+Here \(n^4\) can be larger than the largest possible 32-bit integer, +so it is not safe to pass \(n^4\) to the ddot BLAS +function. Hence, we perform \(n^2\) loops, using vectors of size \(n^2\). +

+ +
+
+E = 0.d0
+do l=1,n
+  E = E + ddot( n, D(1,l), 1, h0(1,l), 1 ) 
+  do k=1,n
+     E = E + 0.5d0 * ddot( n*n, G(1,1,k,l), 1, W(1,1,k,l),  1 )
+  end do
+end do
+E = E + E_nn
+
+print *, 'Energy: ', E
+
+
+
+
+ +
+

1.1.9 Terminate

+
+
+
  deallocate( D, h0, G, W )
+
+end program
+
+
+
+
+
+
+
+
+

Author: TREX-CoE

+

Created: 2021-12-17 Fri 16:14

+

Validate

+
+ + diff --git a/index.html b/index.html index cf7c013..540e821 100644 --- a/index.html +++ b/index.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + @@ -347,7 +347,7 @@ and bug reports should be submitted at

Author: TREX-CoE

-

Created: 2021-12-15 Wed 12:52

+

Created: 2021-12-17 Fri 16:14

Validate

diff --git a/templator_front.html b/templator_front.html index 15c5fc4..cbb1fc4 100644 --- a/templator_front.html +++ b/templator_front.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + Front end API @@ -333,125 +333,136 @@ for the JavaScript code in this tag.

Table of Contents

-
-

1 Coding conventions

+
+

1 Coding conventions

  • integer types will be defined using types given in stdint.h
  • @@ -466,8 +477,8 @@ for the JavaScript code in this tag.
-
-

1.1 Memory allocation

+
+

1.1 Memory allocation

Memory allocation of structures can be facilitated by using the @@ -502,8 +513,8 @@ The maximum string size for the filenames is 4096 characters.

-
-

2 Front end

+
+

2 Front end

All calls to TREXIO are thread-safe. @@ -511,10 +522,10 @@ TREXIO front end is modular, which simplifies implementation of new back ends.

-
-

2.1 Error handling

+
+

2.1 Error handling

- +
@@ -705,9 +716,22 @@ TREXIO front end is modular, which simplifies implementation of new back ends. + + + + + +
30 'Invalid maxstrlen'
TREXIO_INT_SIZE_OVERFLOW31'Possible integer overflow'
+

+IMPORTANT! +The code below has to be executed within Emacs each time +a new error code is added to the table above. Otherwise, the codes +and the corresponding message are not propagated to the source code. +

+
""" This script generates the C and Fortran constants for the error
     codes from the org-mode table.
@@ -742,8 +766,8 @@ TREXIO front end is modular, which simplifies implementation of new back ends.
 
-
-

2.1.1 Decoding errors

+
+

2.1.1 Decoding errors

The trexio_string_of_error converts an exit code into a string. The @@ -762,12 +786,15 @@ error code into a string.

-The text strings are extracted from the previous table. +IMPORTANT! +The code below has to be executed within Emacs each time +a new error code is added to the table above. Otherwise, the codes +and the corresponding message are not propagated to the source code.

-
-
2.1.1.1 C source code
+
+
2.1.1.1 C source code
const char*
@@ -861,6 +888,9 @@ The text strings are extracted from the previous table.
   case TREXIO_INVALID_STR_LEN:
     return "Invalid max_str_len";
     break;
+  case TREXIO_INT_SIZE_OVERFLOW:
+    return "Possible integer overflow";
+    break;
   }
   return "Unknown error";
 }
@@ -875,8 +905,8 @@ The text strings are extracted from the previous table.
 
-
-
2.1.1.2 Fortran interface
+
+
2.1.1.2 Fortran interface
interface
@@ -892,8 +922,8 @@ The text strings are extracted from the previous table.
 
-
-
2.1.1.3 Python interface
+
+
2.1.1.3 Python interface
class Error(Exception):
@@ -922,7 +952,7 @@ The text strings are extracted from the previous table.
     try:
         error_str = pytr.trexio_string_of_error(return_code)
     except:
-        raise 
+        raise
 
     return error_str
 
@@ -932,8 +962,8 @@ The text strings are extracted from the previous table.
-
-

2.2 Back ends

+
+

2.2 Back ends

TREXIO has several back ends: @@ -953,12 +983,12 @@ lines that correspond to the TREXIO_JSON back end (not implemented

-Note: It is important to increment the value of TREXIOINVALIDBACKEND when a new back end is added. Otherwise, it will not be available. +Note: It is important to increment the value of TREXIOINVALIDBACKEND when a new back end is added. Otherwise, it will not be available.

-
-

2.2.1 C

+
+

2.2.1 C

typedef int32_t back_end_t;
@@ -995,17 +1025,17 @@ This is useful due to the fact that HDF5 back end can be disabled at configure s
 #endif
   }
   return false;
-}   
+}
 
-
-

2.2.2 Fortran

+
+

2.2.2 Fortran

-
  integer(trexio_backend), parameter :: TREXIO_HDF5 = 0 
+
  integer(trexio_backend), parameter :: TREXIO_HDF5 = 0
   integer(trexio_backend), parameter :: TREXIO_TEXT = 1
 ! integer(trexio_backend), parameter :: TREXIO_JSON = 2
   integer(trexio_backend), parameter :: TREXIO_INVALID_BACK_END = 2
@@ -1028,12 +1058,12 @@ The function below is a Fortran interface for the aforementioned C-compatible 
 
-
-

2.2.3 Python

+
+

2.2.3 Python

# define TREXIO back ends
-TREXIO_HDF5 = 0 
+TREXIO_HDF5 = 0
 TREXIO_TEXT = 1
 #TREXIO_JSON = 2
 TREXIO_INVALID_BACK_END = 2
@@ -1043,8 +1073,8 @@ The function below is a Fortran interface for the aforementioned C-compatible 
 
-
-

2.3 Read/write behavior

+
+

2.3 Read/write behavior

Every time a reading function is called, the data is read from the @@ -1072,8 +1102,8 @@ concurrent programs, the behavior is not specified.

-
-

2.4 TREXIO file type

+
+

2.4 TREXIO file type

trexio_s is the the main type for TREXIO files, visible to the users @@ -1107,8 +1137,8 @@ TREXIO files will have as a first argument the TREXIO file handle.

-
-

2.4.1 TREXIOFile Python class

+
+

2.4.1 TREXIOFile Python class

class File:
@@ -1131,7 +1161,7 @@ TREXIO files will have as a first argument the TREXIO file handle.
     pytrexio_s:
         A PyObject corresponding to SWIG proxy of the trexio_s struct in C.
         This argument is in fact a TREXIO file handle, which is required for
-        communicating with the C back end. 
+        communicating with the C back end.
     info: dict
         Dictionary of key-value pairs with additional information about the file.
     """
@@ -1145,7 +1175,7 @@ TREXIO files will have as a first argument the TREXIO file handle.
         self.mode = mode
         self.back_end = back_end
 
-        self.isOpen = False 
+        self.isOpen = False
         if pytrexio_s is None:
             self.pytrexio_s = open(filename, mode, back_end)
             self.isOpen = True
@@ -1174,15 +1204,15 @@ TREXIO files will have as a first argument the TREXIO file handle.
         elif self.isOpen is None:
             raise Exception("[WIP]: TREXIO file handle provided but what if the file is already closed?")
         else:
-            pass 
+            pass
 
-
-

2.5 Polymorphism of the file handle

+
+

2.5 Polymorphism of the file handle

Polymorphism of the trexio_t type is handled by ensuring that the @@ -1201,8 +1231,8 @@ corresponding types for all back ends can be safely casted to

-
-

2.6 File opening

+
+

2.6 File opening

trexio_open creates a new TREXIO file or opens existing one. @@ -1241,8 +1271,8 @@ renaming the .txt data files.

-
-

2.6.1 C

+
+

2.6.1 C

trexio_t*
@@ -1277,7 +1307,7 @@ renaming the .txt data files.
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     result_tmp = malloc(sizeof(trexio_hdf5_t));
     break;
 #else
@@ -1333,7 +1363,7 @@ renaming the .txt data files.
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_init(result);
     break;
 #else
@@ -1353,42 +1383,6 @@ renaming the .txt data files.
     return NULL;
   }
 
-  rc = trexio_has_metadata_package_version(result);
-  if (rc == TREXIO_FAILURE) {
-    if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
-    free(result);
-    return NULL;
-  }
-
-  if (rc == TREXIO_HAS_NOT) {
-    switch (back_end) {
-
-    case TREXIO_TEXT:
-      rc = trexio_text_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
-      break;
-
-    case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
-      rc = trexio_hdf5_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
-      break;
-#else
-      if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING;
-      return NULL;
-#endif 
-/*
-    case TREXIO_JSON:
-      rc = trexio_json_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
-      break;
-*/
-    }
-  }
-
-  if (rc != TREXIO_SUCCESS) {
-    if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
-    free(result);
-    return NULL;
-  }
-
 
   /* File locking */
 
@@ -1401,7 +1395,7 @@ renaming the .txt data files.
     break;
   /* HDF5 v.>=1.10 has file locking activated by default */
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = TREXIO_SUCCESS;
     break;
 #else
@@ -1421,7 +1415,42 @@ renaming the .txt data files.
     return NULL;
   }
 
+  /* Write metadata (i.e. package version) upon creation */
+  rc = trexio_has_metadata_package_version(result);
+  if (rc == TREXIO_FAILURE) {
+    if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
+    free(result);
+    return NULL;
+  }
+
+  if (rc == TREXIO_HAS_NOT) {
+    switch (back_end) {
+
+    case TREXIO_TEXT:
+      rc = trexio_text_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
+      break;
+
+    case TREXIO_HDF5:
+#ifdef HAVE_HDF5
+      rc = trexio_hdf5_write_metadata_package_version(result, TREXIO_PACKAGE_VERSION);
+      break;
+#else
+      if (rc_open != NULL) *rc_open = TREXIO_BACK_END_MISSING;
+      return NULL;
+#endif
+
+    }
+  }
+
+  if (rc != TREXIO_SUCCESS) {
+    if (rc_open != NULL) *rc_open = TREXIO_OPEN_ERROR;
+    free(result);
+    return NULL;
+  }
+
+  /* Exit upon success */
   if (rc_open != NULL) *rc_open = TREXIO_SUCCESS;
+
   return result;
 }
 
@@ -1429,8 +1458,8 @@ renaming the .txt data files.
-
-

2.6.2 Fortran

+
+

2.6.2 Fortran

interface
@@ -1448,8 +1477,8 @@ renaming the .txt data files.
 
-
-

2.6.3 Python

+
+

2.6.3 Python

def open(file_name: str, mode: str, back_end: int):
@@ -1464,9 +1493,9 @@ renaming the .txt data files.
         One of the currently supported ~open~ modes (e.g. 'w', 'r')
 
     back_end: int
-        One of the currently supported TREXIO back ends (e.g. TREXIO_HDF5, TREXIO_TEXT)    
+        One of the currently supported TREXIO back ends (e.g. TREXIO_HDF5, TREXIO_TEXT)
 
-    Return: 
+    Return:
         SWIG object of type trexio_s.
 
     Examples:
@@ -1497,8 +1526,8 @@ renaming the .txt data files.
 
-
-

2.6.4 Zero-based versus one-based arrays of indices

+
+

2.6.4 Zero-based versus one-based arrays of indices

Because arrays are zero-based in Fortran, we need to set a flag to @@ -1531,8 +1560,8 @@ know if we need to shift by 1 arrays of indices.

-
-

2.7 File closing

+
+

2.7 File closing

trexio_close closes an existing trexio_t file. @@ -1549,8 +1578,8 @@ output:

-
-

2.7.1 C

+
+

2.7.1 C

trexio_exit_code
@@ -1571,7 +1600,7 @@ output:
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_deinit(file);
     break;
 #else
@@ -1600,7 +1629,7 @@ output:
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = TREXIO_SUCCESS;
     break;
 #else
@@ -1629,8 +1658,8 @@ output:
 
-
-

2.7.2 Fortran

+
+

2.7.2 Fortran

interface
@@ -1644,8 +1673,8 @@ output:
 
-
-

2.7.3 Python

+
+

2.7.3 Python

def close(trexio_file):
@@ -1667,12 +1696,12 @@ output:
 
-
-

3 Templates for front end

+
+

3 Templates for front end

-
-

3.1 Description

+
+

3.1 Description

Consider the following block of trex.json: @@ -1780,13 +1809,13 @@ each variable can be found below: -$group_dset_std_dtype_in$ +$group_dset_format_scanf$ Input type of the dataset in TEXT [fscanf] %lf -$group_dset_std_dtype_out$ +$group_dset_format_printf$ Output type of the dataset in TEXT [fprintf] %24.16e @@ -1855,7 +1884,7 @@ each variable can be found below:

-Some of the aforementioned template variables with group_dset prefix are duplicated with group_num prefix, +Some of the aforementioned template variables with group_dset prefix are duplicated with group_num prefix, e.g. you might find \(group_num_dtype_double\) in the templates corresponding to numerical attributes. The expanding values are the same as for group_dset and thus are not listed in the table above.

@@ -1893,12 +1922,12 @@ value will result in TREXIO_INVALID_ARG_2 exit code.
-
-

3.2 Templates for front end has/read/write a single numerical attribute

+
+

3.2 Templates for front end has/read/write a single numerical attribute

-
-

3.2.1 Introduction

+
+

3.2.1 Introduction

This section concerns API calls related to numerical attributes, @@ -1969,8 +1998,8 @@ namely single value of int/float types.

-
-

3.2.2 C templates for front end

+
+

3.2.2 C templates for front end

The C templates that correspond to each of the abovementioned @@ -1984,12 +2013,12 @@ precision (see Table above).

-
-
3.2.2.1 Function declarations
+
+
3.2.2.1 Function declarations
-
-
3.2.2.2 Source code for double precision functions
+
+
3.2.2.2 Source code for double precision functions
trexio_exit_code
@@ -2005,7 +2034,7 @@ precision (see Table above).
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_read_$group_num$(file, num);
     break;
 #else
@@ -2035,17 +2064,17 @@ precision (see Table above).
 
   case TREXIO_TEXT:
     return trexio_text_write_$group_num$(file, num);
-    break;                                     
+    break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_write_$group_num$(file, num);
     break;
 #else
     return TREXIO_BACK_END_MISSING;
 #endif
-/*                                             
-  case TREXIO_JSON:                            
+/*
+  case TREXIO_JSON:
     return trexio_json_write_$group_num$(file, num);
     break;
 */
@@ -2058,8 +2087,8 @@ precision (see Table above).
 
-
-
3.2.2.3 Source code for single precision functions
+
+
3.2.2.3 Source code for single precision functions
trexio_exit_code
@@ -2078,7 +2107,7 @@ precision (see Table above).
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_read_$group_num$(file, &num_64);
     break;
 #else
@@ -2112,17 +2141,17 @@ precision (see Table above).
 
   case TREXIO_TEXT:
     return trexio_text_write_$group_num$(file, ($group_num_dtype_double$) num);
-    break;                                             
+    break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_write_$group_num$(file, ($group_num_dtype_double$) num);
     break;
 #else
     return TREXIO_BACK_END_MISSING;
-#endif 
-/*                                                     
-  case TREXIO_JSON:                                    
+#endif
+/*
+  case TREXIO_JSON:
     return trexio_json_write_$group_num$(file, ($group_num_dtype_double$) num);
     break;
 */
@@ -2135,8 +2164,8 @@ precision (see Table above).
 
-
-
3.2.2.4 Source code for default functions
+
+
3.2.2.4 Source code for default functions
trexio_exit_code
@@ -2172,7 +2201,7 @@ precision (see Table above).
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_has_$group_num$(file);
     break;
 #else
@@ -2193,8 +2222,8 @@ precision (see Table above).
 
-
-

3.2.3 Fortran templates for front end

+
+

3.2.3 Fortran templates for front end

The Fortran templates that provide an access to the C API calls from Fortran. @@ -2279,11 +2308,11 @@ These templates are based on the use of iso_c_binding. Pointers hav

-
-

3.2.4 Python templates for front end

+
+

3.2.4 Python templates for front end

-
def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None: 
+
def write_$group_num$(trexio_file, num_w: $group_num_py_dtype$) -> None:
     """Write the $group_num$ variable in the TREXIO file.
 
     Parameters:
@@ -2304,12 +2333,12 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc != TREXIO_SUCCESS:
             raise Error(rc)
     except:
-        raise    
+        raise
 
-
def read_$group_num$(trexio_file) -> $group_num_py_dtype$: 
+
def read_$group_num$(trexio_file) -> $group_num_py_dtype$:
     """Read the $group_num$ variable from the TREXIO file.
 
     Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
@@ -2328,14 +2357,14 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc != TREXIO_SUCCESS:
             raise Error(rc)
     except:
-        raise    
+        raise
 
     return num_r
 
-
def has_$group_num$(trexio_file) -> bool: 
+
def has_$group_num$(trexio_file) -> bool:
     """Check that $group_num$ variable exists in the TREXIO file.
 
     Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
@@ -2353,7 +2382,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc == TREXIO_FAILURE:
             raise Error(rc)
     except:
-        raise    
+        raise
 
     if rc == TREXIO_SUCCESS:
         return True
@@ -2365,12 +2394,12 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
-

3.3 Templates for front end has/read/write a dataset of numerical data

+
+

3.3 Templates for front end has/read/write a dataset of numerical data

-
-

3.3.1 Introduction

+
+

3.3.1 Introduction

This section concerns API calls related to datasets. @@ -2452,8 +2481,8 @@ This section concerns API calls related to datasets.

-
-

3.3.2 C templates for front end

+
+

3.3.2 C templates for front end

The C templates that correspond to each of the abovementioned functions can be found below. @@ -2464,13 +2493,12 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T

-
-
3.3.2.1 Function declarations
+
+
3.3.2.1 Function declarations
- -
-
3.3.2.2 Source code for double precision functions
+
+
3.3.2.2 Source code for double precision functions
trexio_exit_code
@@ -2502,7 +2530,7 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_read_$group_dset$(file, $group_dset$, rank, dims);
     break;
 #else
@@ -2581,7 +2609,7 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_write_$group_dset$(file, $group_dset$_p, rank, dims);
     break;
 #else
@@ -2606,8 +2634,8 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T
 
-
-
3.3.2.3 Source code for single precision functions
+
+
3.3.2.3 Source code for single precision functions
trexio_exit_code
@@ -2648,7 +2676,7 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_read_$group_dset$(file, $group_dset$_64, rank, dims);
     break;
 #else
@@ -2732,7 +2760,7 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_write_$group_dset$(file, $group_dset$_64, rank, dims);
     break;
 #else
@@ -2756,11 +2784,11 @@ The basic (non-suffixed) API call on datasets deals with double precision (see T
 
-
-
3.3.2.4 Source code for memory-safe functions
+
+
3.3.2.4 Source code for memory-safe functions
-
trexio_exit_code rc;
+
trexio_exit_code rc;
 int64_t $group_dset_dim$ = 0;
 
 /* Error handling for this call is added by the generator */
@@ -2916,8 +2944,8 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
 
-
-
3.3.2.5 Source code for default functions
+
+
3.3.2.5 Source code for default functions
trexio_exit_code
@@ -2975,7 +3003,7 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_has_$group_dset$(file);
     break;
 #else
@@ -2995,8 +3023,8 @@ rc = trexio_read_$group_dset_dim$_64(file, &($group_dset_dim$));
 
-
-

3.3.3 Fortran templates for front end

+
+

3.3.3 Fortran templates for front end

The Fortran templates that provide an access to the C API calls from Fortran. @@ -3081,11 +3109,11 @@ These templates are based on the use of iso_c_binding. Pointers hav

-
-

3.3.4 Python templates for front end

+
+

3.3.4 Python templates for front end

-
def write_$group_dset$(trexio_file, dset_w) -> None: 
+
def write_$group_dset$(trexio_file, dset_w) -> None:
     """Write the $group_dset$ array of numbers in the TREXIO file.
 
     Parameters:
@@ -3147,7 +3175,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None): 
+
def read_$group_dset$(trexio_file, dim = None, doReshape = None, dtype = None):
     """Read the $group_dset$ array of numbers from the TREXIO file.
 
     Parameters:
@@ -3167,7 +3195,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
         based on the dimensions from the ~trex.json~ file. Otherwise, ~shape~ array (list or tuple) is used if provided by the user.
 
     Returns:
-        ~dset_64~ if dtype is None or ~dset_converted~ otherwise: numpy.ndarray 
+        ~dset_64~ if dtype is None or ~dset_converted~ otherwise: numpy.ndarray
         1D NumPy array with ~dim~ elements corresponding to $group_dset$ values read from the TREXIO file.
 
     Raises:
@@ -3186,7 +3214,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
     # if dim is not specified, read dimensions from the TREXIO file
     dims_list = None
     if dim is None or doReshape:
-        $group_dset_dim$ = read_$group_dset_dim$(trexio_file) 
+        $group_dset_dim$ = read_$group_dset_dim$(trexio_file)
 
         dims_list = [$group_dset_dim_list$]
         dim = 1
@@ -3230,14 +3258,14 @@ These templates are based on the use of iso_c_binding. Pointers hav
     if doReshape:
         try:
             # in-place reshaping did not work so I have to make a copy
-            if isConverted:    
+            if isConverted:
                 dset_reshaped = np.reshape(dset_converted, shape, order='C')
             else:
                 dset_reshaped = np.reshape(dset_64, shape, order='C')
         except:
             raise
 
-    if isConverted:    
+    if isConverted:
         return dset_converted
     elif doReshape:
         return dset_reshaped
@@ -3247,7 +3275,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
def has_$group_dset$(trexio_file) -> bool: 
+
def has_$group_dset$(trexio_file) -> bool:
     """Check that $group_dset$ variable exists in the TREXIO file.
 
     Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
@@ -3265,7 +3293,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc == TREXIO_FAILURE:
             raise Error(rc)
     except:
-        raise    
+        raise
 
     if rc == TREXIO_SUCCESS:
         return True
@@ -3277,9 +3305,13 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
-

3.4 Sparse data structures

+
+

3.4 Templates for front end has/read/write a dataset of sparse data

+
+
+

3.4.1 Introduction

+

Sparse data structures are used typically for large tensors such as two-electron integrals. For example, in the trex.json file sparse @@ -3288,14 +3320,13 @@ arrays appear as for the eri :

"ao_2e_int"  : {
-  "eri_num"  : [ "int", [  ] ]
   "eri"      : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ] ]
 }
 

-The electron repulsion integral \(\langle ij | kl \rangle\) is +The electron repulsion integral (eri) \(\langle ij | kl \rangle\) is represented as a quartet of integers \((i,j,k,l)\) and a floating point value.

@@ -3323,14 +3354,15 @@ does the appropriate conversion when reading or writing.

-As the number of integrals to store can be prohibitively large, we +As the number of integrals to store can be prohibitively large, we provide the possibility to read/write the integrals in chunks. So the functions take two extra parameters:

+
    -
  • offset : the index of the 1st integral we want to read. An -offset of zero implies to read the first integral.
  • -
  • num : the number of integrals to read.
  • +
  • offset : how many integrals in the file should be skipped when reading. +An offset of zero implies to read the first integral.
  • +
  • size : the number of integrals to read.

@@ -3338,40 +3370,118 @@ We provide a function to read a chunk of indices, and a function to read a chunk of values, because some users might want to read only the values of the integrals, or only the indices.

+
+
-

-Here is an example for the indices: -

+
+

3.4.2 C templates for front end

+
+
+
+
3.4.2.1 Function declarations
+
+
+
3.4.2.2 Source code for default functions
+
trexio_exit_code
-trexio_read_chunk_ao_2e_int_eri_index_32(trexio_t* const file,
-                                         const int64_t offset,
-                                         const int64_t num,
-                                         int32_t* buffer)
+trexio_read_$group_dset$(trexio_t* const file,
+                         const int64_t offset_file,
+                         int64_t* const buffer_size,
+                         int32_t* const index_sparse,
+                         double* const value_sparse
+                         )
 {
   if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (offset   < 0L) return TREXIO_INVALID_ARG_2;
-  if (num      < 0L) return TREXIO_INVALID_ARG_3;
+  if (offset_file < 0L) return TREXIO_INVALID_ARG_2;
+  if (*buffer_size <= 0L) return TREXIO_INVALID_ARG_3;
+  if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
+  if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
+  if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
 
-  const uint32_t rank = 4;  // To be set by generator : number of indices
+  const uint32_t rank = $group_dset_rank$;  // To be set by generator : number of indices
 
-  int64_t nmax;             // Max number of integrals
+  int64_t size_max;         // Max number of integrals (already in the file)
   trexio_exit_code rc;
 
-  rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
+  /* Read the max number of integrals stored in the file */
+  rc = trexio_read_$group_dset$_size(file, &size_max);
   if (rc != TREXIO_SUCCESS) return rc;
 
+  int64_t num;
+  rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
+  if (rc != TREXIO_SUCCESS) return rc;
+
+  // introduce a new variable which will be modified with the number of integrals being read if EOF is encountered
+  int64_t eof_read_size = 0UL;
+
   switch (file->back_end) {
 
   case TREXIO_TEXT:
-    return trexio_text_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
+    rc = trexio_text_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse);
     break;
 
   case TREXIO_HDF5:
-    return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, rank, nmax);
+#ifdef HAVE_HDF5
+    rc = trexio_hdf5_read_$group_dset$(file, offset_file, *buffer_size, num, &eof_read_size, index_sparse, value_sparse);
+    break;
+#else
+    rc = TREXIO_BACK_END_MISSING;
+#endif
+/*
+  case TREXIO_JSON:
+    return trexio_json_read_$group_dset$(...);
+    break;
+*/
+  default:
+    rc = TREXIO_FAILURE;  /* Impossible case */
+  }
+
+  if (rc != TREXIO_SUCCESS && rc != TREXIO_END) return rc;
+
+  if (rc == TREXIO_END) *buffer_size = eof_read_size;
+
+  // shift indices to be one-based if Fortran API is used
+  if (file->one_based) {
+    // if EOF is reached - shift only indices that have been read, not an entire buffer
+    uint64_t index_size = rank*(*buffer_size) ;
+    for (uint64_t i=0; i<index_size; ++i){
+      index_sparse[i] += 1;
+    }
+  }
+
+  return rc;
+}
+
+
+ + +
+
trexio_exit_code
+trexio_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
+{
+  if (file  == NULL) return TREXIO_INVALID_ARG_1;
+  if (trexio_has_$group_dset$(file) != TREXIO_SUCCESS) return TREXIO_DSET_MISSING;
+
+  switch (file->back_end) {
+
+  case TREXIO_TEXT:
+    return trexio_text_read_$group_dset$_size(file, size_max);
     break;
 
+  case TREXIO_HDF5:
+#ifdef HAVE_HDF5
+    return trexio_hdf5_read_$group_dset$_size(file, size_max);
+    break;
+#else
+    return TREXIO_BACK_END_MISSING;
+#endif
+/*
+  case TREXIO_JSON:
+    return trexio_json_read_$group_dset$_size(...);
+    break;
+*/
   default:
     return TREXIO_FAILURE;  /* Impossible case */
   }
@@ -3379,52 +3489,189 @@ Here is an example for the indices:
 
-

-For the values, -

trexio_exit_code
-trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,
-                                         const int64_t offset,
-                                         const int64_t num,
-                                         double* buffer)
+trexio_write_$group_dset$(trexio_t* const file,
+                           const int64_t offset_file,
+                           const int64_t buffer_size,
+                           const int32_t* index_sparse,
+                           const double* value_sparse
+                           )
 {
   if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (offset   < 0L) return TREXIO_INVALID_ARG_2;
-  if (num      < 0L) return TREXIO_INVALID_ARG_3;
+  if (offset_file < 0L) return TREXIO_INVALID_ARG_2;
+  if (buffer_size <= 0L) return TREXIO_INVALID_ARG_3;
+  if (index_sparse == NULL) return TREXIO_INVALID_ARG_4;
+  if (value_sparse == NULL) return TREXIO_INVALID_ARG_5;
 
-  int64_t nmax;             // Max number of integrals
+  const uint32_t rank = $group_dset_rank$;  // To be set by generator : number of indices
+
+  int64_t size_max=0L;         // Max number of integrals (already in the file)
   trexio_exit_code rc;
 
-  rc = trexio_read_ao_2e_int_eri_num(const file, &nmax);
+  /* Read the max number of integrals stored in the file */
+  rc = trexio_read_$group_dset$_size(file, &size_max);
+  if (rc != TREXIO_SUCCESS && rc != TREXIO_DSET_MISSING) return rc;
+  if (rc == TREXIO_DSET_MISSING) size_max = 0L;
+
+  int64_t num;
+  rc = trexio_read_$group_dset_sparse_dim$_64(file, &num);
   if (rc != TREXIO_SUCCESS) return rc;
 
+  int32_t* index_sparse_p = (int32_t*) index_sparse;
+  // shift indices to be zero-based if Fortran API is used
+  if (file->one_based) {
+
+    uint64_t index_size = rank * buffer_size;
+    index_sparse_p = CALLOC(index_size, int32_t);
+    if (index_sparse_p == NULL) return TREXIO_ALLOCATION_FAILED;
+
+    for (uint64_t i=0; i<index_size; ++i){
+      index_sparse_p[i] = index_sparse[i] - 1;
+    }
+
+  }
+
   switch (file->back_end) {
 
   case TREXIO_TEXT:
-    return trexio_text_read_chunk_ao_2e_int_eri_value(file, buffer, offset, num, nmax);
+    rc = trexio_text_write_$group_dset$(file, offset_file, buffer_size, num, size_max, index_sparse_p, value_sparse);
     break;
 
   case TREXIO_HDF5:
-    return trexio_hdf5_read_chunk_ao_2e_int_eri_index(file, buffer, offset, num, nmax);
+#ifdef HAVE_HDF5
+    rc = trexio_hdf5_write_$group_dset$(file, offset_file, buffer_size, num, index_sparse_p, value_sparse);
+    break;
+#else
+    rc = TREXIO_BACK_END_MISSING;
+#endif
+/*
+  case TREXIO_JSON:
+    rc = trexio_json_write_$group_dset$(...);
+    break;
+*/
+  default:
+    rc = TREXIO_FAILURE;  /* Impossible case */
+  }
+
+  // free the memory allocated to shift indices to be zero-based
+  if (file->one_based) FREE(index_sparse_p);
+
+  return rc;
+}
+
+
+ + +
+
trexio_exit_code
+trexio_has_$group_dset$ (trexio_t* const file)
+{
+
+  if (file  == NULL) return TREXIO_INVALID_ARG_1;
+
+  assert(file->back_end < TREXIO_INVALID_BACK_END);
+
+  switch (file->back_end) {
+
+  case TREXIO_TEXT:
+    return trexio_text_has_$group_dset$(file);
     break;
 
-  default:
-    return TREXIO_FAILURE;  /* Impossible case */
+  case TREXIO_HDF5:
+#ifdef HAVE_HDF5
+    return trexio_hdf5_has_$group_dset$(file);
+    break;
+#else
+    return TREXIO_BACK_END_MISSING;
+#endif
+/*
+  case TREXIO_JSON:
+    return trexio_json_has_$group_dset$(file);
+    break;
+*/
   }
+
+  return TREXIO_FAILURE;
 }
 
+
-
-

3.5 Templates for front end has/read/write a dataset of strings

+
+

3.4.3 Fortran templates for front end

+
+

+The Fortran templates that provide an access to the C API calls from Fortran. +These templates are based on the use of iso_c_binding. Pointers have to be passed by value. +

+ +
+
interface
+   integer function trexio_write_$group_dset$ (trex_file, &
+                                               offset_file, buffer_size, &
+                                               index_sparse, value_sparse) bind(C)
+     use, intrinsic :: iso_c_binding
+     integer(8), intent(in), value :: trex_file
+     integer(8), intent(in), value :: offset_file
+     integer(8), intent(in), value :: buffer_size
+     integer(4), intent(in)        :: index_sparse(*)
+     double precision, intent(in)  :: value_sparse(*)
+   end function trexio_write_$group_dset$
+end interface
+
+
+ +
+
interface
+   integer function trexio_read_$group_dset$ (trex_file, &
+                                              offset_file, buffer_size, &
+                                              index_sparse, value_sparse) bind(C)
+     use, intrinsic :: iso_c_binding
+     integer(8), intent(in), value :: trex_file
+     integer(8), intent(in), value :: offset_file
+     integer(8), intent(inout)     :: buffer_size
+     integer(4), intent(out)       :: index_sparse(*)
+     double precision, intent(out) :: value_sparse(*)
+   end function trexio_read_$group_dset$
+end interface
+
+
+ +
+
interface
+   integer function trexio_read_$group_dset$_size (trex_file, &
+                                                   size_max) bind(C)
+     use, intrinsic :: iso_c_binding
+     integer(8), intent(in), value :: trex_file
+     integer(8), intent(out) :: size_max
+   end function trexio_read_$group_dset$_size
+end interface
+
+
+ +
+
interface
+   integer function trexio_has_$group_dset$ (trex_file) bind(C)
+     use, intrinsic :: iso_c_binding
+     integer(8), intent(in), value :: trex_file
+   end function trexio_has_$group_dset$
+end interface
+
+
+
+
+
+ +
+

3.5 Templates for front end has/read/write a dataset of strings

-
-

3.5.1 Introduction

+
+

3.5.1 Introduction

This section concerns API calls related to datasets of strings. @@ -3464,8 +3711,8 @@ This section concerns API calls related to datasets of strings.

-
-

3.5.2 C templates for front end

+
+

3.5.2 C templates for front end

First parameter is the TREXIO file handle. Second parameter is the variable to be written/read @@ -3473,12 +3720,12 @@ to/from the TREXIO file (except for trexio_has_ functi

-
-
3.5.2.1 Function declarations
+
+
3.5.2.1 Function declarations
-
-
3.5.2.2 Source code for default functions
+
+
3.5.2.2 Source code for default functions
trexio_exit_code
@@ -3509,7 +3756,7 @@ to/from the TREXIO file (except for trexio_has_ functi
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_read_$group_dset$(file, dset_out, rank, dims, (uint32_t) max_str_len);
     break;
 #else
@@ -3630,7 +3877,7 @@ to/from the TREXIO file (except for trexio_has_ functi
     tmp_str += pch_len + 1;
   }
 
-  rc = TREXIO_FAILURE;  
+  rc = TREXIO_FAILURE;
   switch (file->back_end) {
 
   case TREXIO_TEXT:
@@ -3638,7 +3885,7 @@ to/from the TREXIO file (except for trexio_has_ functi
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     rc = trexio_hdf5_write_$group_dset$(file, (const char**) dset_str, rank, dims);
     break;
 #else
@@ -3711,7 +3958,7 @@ to/from the TREXIO file (except for trexio_has_ functi
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_has_$group_dset$(file);
     break;
 #else
@@ -3732,8 +3979,8 @@ to/from the TREXIO file (except for trexio_has_ functi
 
-
-

3.5.3 Fortran templates for front end

+
+

3.5.3 Fortran templates for front end

The Fortran templates that provide an access to the C API calls from Fortran. @@ -3791,7 +4038,7 @@ These templates are based on the use of iso_c_binding. Pointers hav allocate(str_compiled($group_dset_dim$*(max_str_len+1)+1)) rc = trexio_read_$group_dset$_low(trex_file, str_compiled, max_str_len) - if (rc /= TREXIO_SUCCESS) then + if (rc /= TREXIO_SUCCESS) then deallocate(str_compiled) trexio_read_$group_dset$ = rc else @@ -3829,11 +4076,11 @@ These templates are based on the use of iso_c_binding. Pointers hav

-
-

3.5.4 Python templates for front end

+
+

3.5.4 Python templates for front end

-
def write_$group_dset$(trexio_file, dset_w: list) -> None: 
+
def write_$group_dset$(trexio_file, dset_w: list) -> None:
     """Write the $group_dset$ array of strings in the TREXIO file.
 
     Parameters:
@@ -3857,13 +4104,13 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc != TREXIO_SUCCESS:
             raise Error(rc)
     except:
-        raise    
+        raise
 
 
-
def read_$group_dset$(trexio_file, dim = None) -> list: 
+
def read_$group_dset$(trexio_file, dim = None) -> list:
     """Read the $group_dset$ array of strings from the TREXIO file.
 
     Parameters:
@@ -3886,7 +4133,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
     # if dim is not specified, read dimensions from the TREXIO file
     if dim is None:
-        $group_dset_dim$ = read_$group_dset_dim$(trexio_file) 
+        $group_dset_dim$ = read_$group_dset_dim$(trexio_file)
 
         dims_list = [$group_dset_dim_list$]
         dim = 1
@@ -3900,7 +4147,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc != TREXIO_SUCCESS:
             raise Error(rc)
     except:
-        raise    
+        raise
 
 
     try:
@@ -3916,7 +4163,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
def has_$group_dset$(trexio_file) -> bool: 
+
def has_$group_dset$(trexio_file) -> bool:
     """Check that $group_dset$ variable exists in the TREXIO file.
 
     Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
@@ -3934,7 +4181,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc == TREXIO_FAILURE:
             raise Error(rc)
     except:
-        raise    
+        raise
 
     if rc == TREXIO_SUCCESS:
         return True
@@ -3946,12 +4193,12 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
-

3.6 Templates for front end has/read/write a single string attribute

+
+

3.6 Templates for front end has/read/write a single string attribute

-
-

3.6.1 Introduction

+
+

3.6.1 Introduction

This section concerns API calls related to string attributes. @@ -3991,16 +4238,16 @@ This section concerns API calls related to string attributes.

-
-

3.6.2 C templates for front end

+
+

3.6.2 C templates for front end

-
-
3.6.2.1 Function declarations
+
+
3.6.2.1 Function declarations
-
-
3.6.2.2 Source code for default functions
+
+
3.6.2.2 Source code for default functions
trexio_exit_code
@@ -4019,7 +4266,7 @@ This section concerns API calls related to string attributes.
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_read_$group_str$(file, str_out, (uint32_t) max_str_len);
     break;
 #else
@@ -4048,7 +4295,7 @@ This section concerns API calls related to string attributes.
   if (trexio_has_$group_str$(file) == TREXIO_SUCCESS) return TREXIO_ATTR_ALREADY_EXISTS;
 
   size_t len_write = strlen(str);
-  if ((size_t) max_str_len < len_write) return TREXIO_INVALID_STR_LEN;  
+  if ((size_t) max_str_len < len_write) return TREXIO_INVALID_STR_LEN;
 
   switch (file->back_end) {
 
@@ -4057,7 +4304,7 @@ This section concerns API calls related to string attributes.
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_write_$group_str$(file, str);
     break;
 #else
@@ -4091,7 +4338,7 @@ This section concerns API calls related to string attributes.
     break;
 
   case TREXIO_HDF5:
-#ifdef HAVE_HDF5 
+#ifdef HAVE_HDF5
     return trexio_hdf5_has_$group_str$(file);
     break;
 #else
@@ -4112,8 +4359,8 @@ This section concerns API calls related to string attributes.
 
-
-

3.6.3 Fortran templates for front end

+
+

3.6.3 Fortran templates for front end

The Fortran templates that provide an access to the C API calls from Fortran. @@ -4190,11 +4437,11 @@ These templates are based on the use of iso_c_binding. Pointers hav

-
-

3.6.4 Python templates for front end

+
+

3.6.4 Python templates for front end

-
def write_$group_str$(trexio_file, str_w: str) -> None: 
+
def write_$group_str$(trexio_file, str_w: str) -> None:
     """Write the $group_str$ variable in the TREXIO file.
 
     Parameters:
@@ -4218,12 +4465,12 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc != TREXIO_SUCCESS:
             raise Error(rc)
     except:
-        raise    
+        raise
 
-
def read_$group_str$(trexio_file) -> str: 
+
def read_$group_str$(trexio_file) -> str:
     """Read the $group_str$ variable from the TREXIO file.
 
     Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
@@ -4243,14 +4490,14 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc != TREXIO_SUCCESS:
             raise Error(rc)
     except:
-        raise    
+        raise
 
     return str_r
 
-
def has_$group_str$(trexio_file) -> bool: 
+
def has_$group_str$(trexio_file) -> bool:
     """Check that $group_str$ variable exists in the TREXIO file.
 
     Parameter is a ~TREXIO File~ object that has been created by a call to ~open~ function.
@@ -4268,7 +4515,7 @@ These templates are based on the use of iso_c_binding. Pointers hav
         if rc == TREXIO_FAILURE:
             raise Error(rc)
     except:
-        raise    
+        raise
 
     if rc == TREXIO_SUCCESS:
         return True
@@ -4281,8 +4528,8 @@ These templates are based on the use of iso_c_binding. Pointers hav
 
-
-

4 Fortran helper/wrapper functions

+
+

4 Fortran helper/wrapper functions

The function below adapts the original C-based trexio_open for Fortran. @@ -4318,8 +4565,8 @@ Note, that Fortran interface calls the main TREXIO API, which is wr

-The subroutine below transforms an array of Fortran strings into one big string using TREXIO_DELIM symbol -as a delimeter and adds NULL character in the end in order to properly pass the desired string to +The subroutine below transforms an array of Fortran strings into one big string using TREXIO_DELIM symbol +as a delimeter and adds NULL character in the end in order to properly pass the desired string to C API. This is needed due to the fact that strings in C are terminated by NULL character \0.

@@ -4417,7 +4664,7 @@ two code are identical, i.e. if the assert statement pass.

Author: TREX-CoE

-

Created: 2021-12-15 Wed 12:52

+

Created: 2021-12-17 Fri 16:14

Validate

diff --git a/templator_hdf5.html b/templator_hdf5.html index 80abdae..a5cf3be 100644 --- a/templator_hdf5.html +++ b/templator_hdf5.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + HDF5 back end @@ -311,27 +311,29 @@ for the JavaScript code in this tag.

Table of Contents

-
-

1 HDF5 back end

+
+

1 HDF5 back end

-
-

1.1 Template for HDF5 definitions

+
+

1.1 Template for HDF5 definitions

#define $GROUP$_GROUP_NAME          "$group$"
@@ -343,8 +345,8 @@ for the JavaScript code in this tag.
 
-
-

1.2 Template for HDF5 structures

+
+

1.2 Template for HDF5 structures

typedef struct trexio_hdf5_s {
@@ -358,8 +360,8 @@ for the JavaScript code in this tag.
 
-
-

1.3 Template for HDF5 init/deinit

+
+

1.3 Template for HDF5 init/deinit

trexio_exit_code
@@ -439,8 +441,8 @@ for the JavaScript code in this tag.
 
-
-

1.4 Template for HDF5 has/read/write the numerical attribute

+
+

1.4 Template for HDF5 has/read/write the numerical attribute

trexio_exit_code
@@ -484,7 +486,7 @@ for the JavaScript code in this tag.
   const hid_t dtype = H5Tcopy(H5T_$GROUP_NUM_H5_DTYPE$);
   const hid_t dspace = H5Screate(H5S_SCALAR);
 
-  const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME, 
+  const hid_t num_id = H5Acreate(f->$group$_group, $GROUP_NUM$_NAME,
                                  dtype, dspace, H5P_DEFAULT, H5P_DEFAULT);
   if (num_id <= 0) {
     H5Sclose(dspace);
@@ -534,8 +536,8 @@ for the JavaScript code in this tag.
 
-
-

1.5 Template for HDF5 has/read/write the dataset of numerical data

+
+

1.5 Template for HDF5 has/read/write the dataset of numerical data

trexio_exit_code
@@ -543,7 +545,7 @@ for the JavaScript code in this tag.
 {
 
   if (file == NULL) return TREXIO_INVALID_ARG_1;
-  if ($group_dset$  == NULL) return TREXIO_INVALID_ARG_2;
+  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
 
   const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
 
@@ -599,7 +601,7 @@ for the JavaScript code in this tag.
 {
 
   if (file == NULL) return TREXIO_INVALID_ARG_1;
-  if ($group_dset$  == NULL) return TREXIO_INVALID_ARG_2;
+  if ($group_dset$ == NULL) return TREXIO_INVALID_ARG_2;
 
   trexio_hdf5_t* f = (trexio_hdf5_t*) file;
 
@@ -658,9 +660,211 @@ for the JavaScript code in this tag.
 
-
-

1.6 Template for HDF5 has/read/write the dataset of strings

+
+

1.6 Template for HDF5 has/read/write the dataset of sparse data

+

+Sparse data is stored using extensible datasets of HDF5. Extensibility is required +due to the fact that the sparse data will be written in chunks of user-defined size. +

+ +
+
trexio_exit_code
+trexio_hdf5_write_$group_dset$ (trexio_t* const file,
+                                const int64_t offset_file,
+                                const int64_t size,
+                                const int64_t size_max,
+                                const int32_t* index_sparse,
+                                const double* value_sparse)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  trexio_hdf5_t* f = (trexio_hdf5_t*) file;
+
+  hid_t index_dtype;
+  void* index_p;
+  uint64_t size_ranked = (uint64_t) size * $group_dset_rank$;
+  /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
+  if (size_max < UINT8_MAX) {
+    uint8_t* index = CALLOC(size_ranked, uint8_t);
+    if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+    for (int64_t i=0; i<size_ranked; ++i){
+      index[i] = (uint8_t) index_sparse[i];
+    }
+    index_p = index;
+    index_dtype = H5T_NATIVE_UINT8;
+  } else if (size_max < UINT16_MAX) {
+    uint16_t* index = CALLOC(size_ranked, uint16_t);
+    if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+    for (int64_t i=0; i<size_ranked; ++i){
+      index[i] = (uint16_t) index_sparse[i];
+    }
+    index_p = index;
+    index_dtype = H5T_NATIVE_UINT16;
+  } else {
+    index_p = (int32_t*) index_sparse;
+    index_dtype = H5T_NATIVE_INT32;
+  }
+
+  /* Store float values in double precision */
+  hid_t value_dtype = H5T_NATIVE_DOUBLE;
+  /* Arrays of chunk dims that will be used for chunking the dataset */
+  const hsize_t chunk_i_dims[1] = {size_ranked};
+  const hsize_t chunk_v_dims[1] = {size};
+
+  /* Indices and values are stored as 2 independent datasets in the HDF5 file */
+  char dset_index_name[256] = "\0";
+  char dset_value_name[256] = "\0";
+  /* Build the names of the datasets */
+  strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
+  strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
+
+  trexio_exit_code rc_write = TREXIO_FAILURE;
+  /* NOTE: chunk size is set upon creation of the HDF5 dataset and cannot be changed ! */
+  if ( H5LTfind_dataset(f->$group$_group, dset_index_name) != 1 ) {
+  /* If the file does not exist -> create it and write */
+
+  /* Create chunked dataset with index_dtype datatype and write indices into it */
+    rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, index_p);
+    if (index_p != index_sparse) FREE(index_p);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+  /* Create chunked dataset with value_dtype datatype and write values into it */
+    rc_write = trexio_hdf5_create_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, value_sparse);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+  } else {
+  /* If the file exists -> open it and write */
+    hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
+    hsize_t offset_v[1] = {(hsize_t) offset_file};
+
+  /* Create chunked dataset with index_dtype datatype and write indices into it */
+    rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_index_name, index_dtype, chunk_i_dims, offset_i, index_p);
+    if (index_p != index_sparse) FREE(index_p);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+  /* Create chunked dataset with value_dtype datatype and write values into it */
+    rc_write = trexio_hdf5_open_write_dset_sparse(f->$group$_group, dset_value_name, value_dtype, chunk_v_dims, offset_v, value_sparse);
+    if (rc_write != TREXIO_SUCCESS) return rc_write;
+
+  }
+
+  return TREXIO_SUCCESS;
+}
+
+
+ + +
+
trexio_exit_code
+trexio_hdf5_read_$group_dset$ (trexio_t* const file,
+                               const int64_t offset_file,
+                               const int64_t size,
+                               const int64_t size_max,
+                               int64_t* const eof_read_size,
+                               int32_t* const index_read,
+                               double* const value_read)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
+
+  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
+
+  /* Indices and values are stored as 2 independent datasets in the HDF5 file */
+  char dset_index_name[256] = "\0";
+  char dset_value_name[256] = "\0";
+  /* Build the names of the datasets */
+  strncpy(dset_index_name, $GROUP_DSET$_NAME "_indices", 256);
+  strncpy(dset_value_name, $GROUP_DSET$_NAME "_values", 256);
+
+  hsize_t offset_i[1] = {(hsize_t) offset_file * $group_dset_rank$};
+  hsize_t count_i[1] = {(hsize_t) size * $group_dset_rank$};
+
+  hsize_t offset_v[1] = {(hsize_t) offset_file};
+  hsize_t count_v[1] = {(hsize_t) size};
+
+  int is_index = 1, is_value = 0;
+  trexio_exit_code rc_read;
+
+  // attempt to read indices
+  rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_index_name, offset_i, count_i, NULL, is_index, index_read);
+  if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
+  // attempt to read values
+  // when EOF is encountered - the count_v[0] is modified and contains the number of elements being read
+  rc_read = trexio_hdf5_open_read_dset_sparse(f->$group$_group, dset_value_name, offset_v, count_v, eof_read_size, is_value, value_read);
+  if (rc_read != TREXIO_SUCCESS && rc_read != TREXIO_END) return rc_read;
+
+  return rc_read;
+}
+
+
+ + +
+
trexio_exit_code
+trexio_hdf5_read_$group_dset$_size (trexio_t* const file, int64_t* const size_max)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  const trexio_hdf5_t* f = (const trexio_hdf5_t*) file;
+
+  hid_t dset_id = H5Dopen(f->$group$_group, $GROUP_DSET$_NAME "_values", H5P_DEFAULT);
+  if (dset_id <= 0) return TREXIO_INVALID_ID;
+
+  hid_t fspace_id = H5Dget_space(dset_id);
+  if (fspace_id < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // allocate space for the dimensions to be read
+  hsize_t ddims[1] = {0};
+
+  // get the rank and dimensions of the dataset
+  int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
+
+  H5Dclose(dset_id);
+  H5Sclose(fspace_id);
+
+  *size_max = (int64_t) ddims[0];
+
+  return TREXIO_SUCCESS;
+}
+
+
+ + +
+
trexio_exit_code
+trexio_hdf5_has_$group_dset$ (trexio_t* const file)
+{
+
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  trexio_hdf5_t* f = (trexio_hdf5_t*) file;
+
+  herr_t status = H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME "_values");
+  /* H5LTfind_dataset returns 1 if dataset exists, 0 otherwise */
+  if (status == 1){
+    return TREXIO_SUCCESS;
+  } else if (status == 0) {
+    return TREXIO_HAS_NOT;
+  } else {
+    return TREXIO_FAILURE;
+  }
+
+}
+
+
+
+
+ +
+

1.7 Template for HDF5 has/read/write the dataset of strings

+
trexio_exit_code
 trexio_hdf5_read_$group_dset$ (trexio_t* const file, char* const $group_dset$, const uint32_t rank, const uint64_t* dims, const uint32_t max_str_len)
@@ -684,10 +888,10 @@ for the JavaScript code in this tag.
     return TREXIO_ALLOCATION_FAILED;
   }
 
-  hid_t dspace = H5Dget_space(dset_id); 
+  hid_t dspace = H5Dget_space(dset_id);
   if (dset_id <= 0) {
     FREE(ddims);
-    H5Dclose(dset_id); 
+    H5Dclose(dset_id);
     return TREXIO_INVALID_ID;
   }
 
@@ -723,7 +927,7 @@ for the JavaScript code in this tag.
   if (rdata == NULL) {
     H5Dclose(dset_id);
     H5Sclose(dspace);
-    H5Tclose(memtype); 
+    H5Tclose(memtype);
     return TREXIO_ALLOCATION_FAILED;
   }
 
@@ -732,7 +936,7 @@ for the JavaScript code in this tag.
     FREE(rdata);
     H5Dclose(dset_id);
     H5Sclose(dspace);
-    H5Tclose(memtype); 
+    H5Tclose(memtype);
     return TREXIO_FAILURE;
   }
 
@@ -755,11 +959,11 @@ for the JavaScript code in this tag.
     FREE(rdata);
     H5Dclose(dset_id);
     H5Sclose(dspace);
-    H5Tclose(memtype); 
+    H5Tclose(memtype);
     return TREXIO_FAILURE;
   }
 
-  FREE(rdata); 
+  FREE(rdata);
   H5Dclose(dset_id);
   H5Sclose(dspace);
   H5Tclose(memtype);
@@ -791,7 +995,7 @@ for the JavaScript code in this tag.
 
   if ( H5LTfind_dataset(f->$group$_group, $GROUP_DSET$_NAME) != 1 ) {
 
-    /* code to create dataset */   
+    /* code to create dataset */
     hid_t filetype = H5Tcopy (H5T_FORTRAN_S1);
     if (filetype <= 0) return TREXIO_INVALID_ID;
 
@@ -864,9 +1068,9 @@ for the JavaScript code in this tag.
 
-
-

1.7 Template for HDF5 has/read/write the string attribute

-
+
+

1.8 Template for HDF5 has/read/write the string attribute

+
trexio_exit_code
 trexio_hdf5_read_$group_str$ (trexio_t* const file, char* const str, const uint32_t max_str_len)
@@ -988,11 +1192,258 @@ for the JavaScript code in this tag.
 
+
+

1.9 Helper functions

+
+
+
trexio_exit_code
+trexio_hdf5_create_write_dset_sparse (const hid_t group_id,
+                                      const char* dset_name,
+                                      const hid_t dtype_id,
+                                      const hsize_t* chunk_dims,
+                                      const void* data_sparse)
+{
+  const int h5_rank = 1;
+  const hsize_t maxdims[1] = {H5S_UNLIMITED};
+
+  hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, maxdims);
+  if (dspace < 0) return TREXIO_INVALID_ID;
+
+  hid_t prop = H5Pcreate(H5P_DATASET_CREATE);
+  if (prop < 0) {
+    H5Sclose(dspace);
+    return TREXIO_INVALID_ID;
+  }
+
+  herr_t status = H5Pset_chunk(prop, h5_rank, chunk_dims);
+  if (status < 0) {
+    H5Sclose(dspace);
+    H5Pclose(prop);
+    return TREXIO_INVALID_ID;
+  }
+
+  hid_t dset_id = H5Dcreate(group_id,
+                            dset_name,
+                            dtype_id,
+                            dspace,
+                            H5P_DEFAULT,
+                            prop,
+                            H5P_DEFAULT);
+  if (dset_id < 0) {
+    H5Sclose(dspace);
+    H5Pclose(prop);
+    return TREXIO_INVALID_ID;
+  }
+
+  status = H5Dwrite(dset_id,
+                    dtype_id,
+                    H5S_ALL, H5S_ALL, H5P_DEFAULT,
+                    data_sparse);
+  H5Sclose(dspace);
+  H5Pclose(prop);
+  H5Dclose(dset_id);
+  if (status < 0) return TREXIO_FAILURE;
+
+  return TREXIO_SUCCESS;
+}
+
+
+trexio_exit_code
+trexio_hdf5_open_write_dset_sparse (const hid_t group_id,
+                                    const char* dset_name,
+                                    const hid_t dtype_id,
+                                    const hsize_t* chunk_dims,
+                                    const hsize_t* offset_file,
+                                    const void* data_sparse)
+{
+  const int h5_rank = 1;
+
+  hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
+  if (dset_id <= 0) return TREXIO_INVALID_ID;
+
+  hid_t fspace = H5Dget_space(dset_id);
+  if (fspace < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // allocate space for the dimensions to be read
+  hsize_t ddims[1] = {0};
+
+  // get the rank and dimensions of the dataset
+  int rrank = H5Sget_simple_extent_dims(fspace, ddims, NULL);
+  ddims[0] += chunk_dims[0];
+
+  // extend the dset size
+  herr_t status  = H5Dset_extent(dset_id, ddims);
+  if (status < 0) {
+    H5Sclose(fspace);
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // close and reopen the file dataspace to take into account the extension
+  H5Sclose(fspace);
+  fspace = H5Dget_space(dset_id);
+  if (fspace < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // select hyperslab to be written using chunk_dims and offset values
+  status = H5Sselect_hyperslab(fspace, H5S_SELECT_SET, offset_file, NULL, chunk_dims, NULL);
+  if (status < 0) {
+    H5Sclose(fspace);
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  // create memory dataspace to write from
+  hid_t dspace = H5Screate_simple(h5_rank, chunk_dims, NULL);
+  if (dspace < 0) {
+    H5Sclose(fspace);
+    H5Sclose(dspace);
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  status = H5Dwrite(dset_id,
+                    dtype_id,
+                    dspace, fspace, H5P_DEFAULT,
+                    data_sparse);
+  H5Dclose(dset_id);
+  H5Sclose(dspace);
+  H5Sclose(fspace);
+  if (status < 0) return TREXIO_FAILURE;
+
+  return TREXIO_SUCCESS;
+}
+
+
+trexio_exit_code
+trexio_hdf5_open_read_dset_sparse (const hid_t group_id,
+                                   const char* dset_name,
+                                   const hsize_t* offset_file,
+                                   hsize_t* const size_read,
+                                   int64_t* const eof_read_size,
+                                   const int is_index,
+                                   void* const data_sparse
+                                   )
+{
+  const int h5_rank = 1;
+
+  // get the dataset handle
+  hid_t dset_id = H5Dopen(group_id, dset_name, H5P_DEFAULT);
+  if (dset_id <= 0) return TREXIO_INVALID_ID;
+
+  // get the dataspace of the dataset
+  hid_t fspace_id = H5Dget_space(dset_id);
+  if (fspace_id < 0) {
+    H5Dclose(dset_id);
+    return TREXIO_INVALID_ID;
+  }
+
+  /* get dims of the dset stored in the file to check whether reading with user-provided chunk size
+     will reach end of the dataset (i.e. EOF in TEXT back end)
+   */
+  hsize_t ddims[1] = {0};
+  int rrank = H5Sget_simple_extent_dims(fspace_id, ddims, NULL);
+  hsize_t max_offset = offset_file[0] + size_read[0];
+
+  int is_EOF = 0;
+  // if max_offset exceed current dim of the dset => EOF
+  if (max_offset > ddims[0]) {
+    is_EOF = 1;
+    // lower the value of count to reduce the number of elements which will be read
+    size_read[0] -= max_offset - ddims[0];
+    // modified the value of eof_read_size passed by address
+    if (eof_read_size != NULL) *eof_read_size = size_read[0];
+  }
+
+  // special case when reading int indices
+  int64_t size_ranked = (int64_t) size_read[0];
+  void* index_p;
+  // read the datatype from the dataset and compare with the pre-defined values
+  hid_t dtype = H5Dget_type(dset_id);
+  if (is_index == 1) {
+    if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
+      uint8_t* index = CALLOC(size_ranked, uint8_t);
+      if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+      index_p = index;
+    } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
+      uint16_t* index = CALLOC(size_ranked, uint16_t);
+      if (index == NULL) return TREXIO_ALLOCATION_FAILED;
+      index_p = index;
+    } else {
+      index_p = data_sparse;
+    }
+  }
+
+  herr_t status = H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, offset_file, NULL, size_read, NULL);
+  if (status < 0) {
+    H5Sclose(fspace_id);
+    H5Dclose(dset_id);
+    if (index_p != data_sparse) FREE(index_p);
+    return TREXIO_INVALID_ID;
+  }
+
+  hid_t memspace_id = H5Screate_simple(h5_rank, size_read, NULL);
+  if (memspace_id < 0) {
+    H5Sclose(fspace_id);
+    H5Dclose(dset_id);
+    if (index_p != data_sparse) FREE(index_p);
+    return TREXIO_INVALID_ID;
+  }
+
+  if (is_index == 1) {
+    status = H5Dread(dset_id,
+                     dtype,
+                     memspace_id, fspace_id, H5P_DEFAULT,
+                     index_p);
+  } else {
+    status = H5Dread(dset_id,
+                     dtype,
+                     memspace_id, fspace_id, H5P_DEFAULT,
+                     data_sparse);
+  }
+
+  H5Sclose(fspace_id);
+  H5Sclose(memspace_id);
+  H5Dclose(dset_id);
+  if (status < 0) {
+    if (index_p != data_sparse) FREE(index_p);
+    return TREXIO_FAILURE;
+  }
+
+  if (is_index == 1) {
+    if (H5Tequal(dtype, H5T_NATIVE_UINT8) > 0) {
+      uint8_t* index = (uint8_t*) index_p;
+      for (int64_t i=0; i<size_ranked; ++i){
+        ((int32_t*)data_sparse)[i] = (int32_t) index[i];
+      }
+      FREE(index_p);
+    } else if (H5Tequal(dtype, H5T_NATIVE_UINT16) > 0) {
+      uint16_t* index = (uint16_t*) index_p;
+      for (int64_t i=0; i<size_ranked; ++i){
+        ((int32_t*)data_sparse)[i] = (int32_t) index[i];
+      }
+      FREE(index_p);
+    }
+  }
+
+  if (is_EOF == 1) return TREXIO_END;
+
+  return TREXIO_SUCCESS;
+}
+
+
+
+

Author: TREX-CoE

-

Created: 2021-12-15 Wed 12:52

+

Created: 2021-12-17 Fri 16:14

Validate

diff --git a/templator_text.html b/templator_text.html index dcc692b..8fcf038 100644 --- a/templator_text.html +++ b/templator_text.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + TEXT back end @@ -311,36 +311,28 @@ for the JavaScript code in this tag.

Table of Contents

-
-

1 TEXT back end

+
+

1 TEXT back end

The "file" produced by the text back end is a directory with one @@ -361,8 +353,8 @@ The file is written when closed, or when the flush function is called.

-
-

1.1 Template for group-related structures in text back end

+
+

1.1 Template for group-related structures in text back end

typedef struct $group$_s {
@@ -381,26 +373,13 @@ The file is written when closed, or when the flush function is called.
 
-
-

1.2 Template for general structure in text back end

+
+

1.2 Template for general structure in text back end

-
-
typedef struct rdm_s {
-  uint64_t dim_one_e;
-  uint32_t to_flush;
-  uint32_t padding;
-  double*  one_e;
-  char     file_name[TREXIO_MAX_FILENAME_LENGTH];
-  char     two_e_file_name[TREXIO_MAX_FILENAME_LENGTH];
-} rdm_t;
-
-
-
typedef struct trexio_text_s {
   trexio_t   parent ;
   $group$_t* $group$;
-  rdm_t*      rdm;
   int        lock_file;
 } trexio_text_t;
 
@@ -408,8 +387,8 @@ The file is written when closed, or when the flush function is called.
-
-

1.3 Initialize function (constant part)

+
+

1.3 Initialize function (constant part)

trexio_exit_code
@@ -540,8 +519,8 @@ The file is written when closed, or when the flush function is called.
 
-
-

1.4 Deinitialize function (templated part)

+
+

1.4 Deinitialize function (templated part)

trexio_exit_code
@@ -555,9 +534,6 @@ The file is written when closed, or when the flush function is called.
   /* Error handling for this call is added by the generator */
   rc = trexio_text_free_$group$( (trexio_text_t*) file);
 
-  rc = trexio_text_free_rdm( (trexio_text_t*) file);
-  if (rc != TREXIO_SUCCESS) return rc;
-
   return TREXIO_SUCCESS;
 
 }
@@ -566,8 +542,8 @@ The file is written when closed, or when the flush function is called.
 
-
-

1.5 Template for text read struct

+
+

1.5 Template for text read struct

$group$_t*
@@ -697,7 +673,7 @@ trexio_text_read_$group$ (trexio_text_t* return NULL;
       }
 
-      rc = fscanf(f, "%$group_num_std_dtype_in$", &($group$->$group_num$));
+      rc = fscanf(f, "%$group_num_format_scanf$", &($group$->$group_num$));
       assert(!(rc != 1));
       if (rc != 1) {
         FREE(buffer);
@@ -785,7 +761,7 @@ trexio_text_read_$group$ (trexio_text_t* for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
-      rc = fscanf(f, "%$group_dset_std_dtype_in$", &($group$->$group_dset$[i]));
+      rc = fscanf(f, "%$group_dset_format_scanf$", &($group$->$group_dset$[i]));
       assert(!(rc != 1));
       if (rc != 1) {
         FREE(buffer);
@@ -821,8 +797,8 @@ trexio_text_read_$group$ (trexio_text_t* WARNING: this tmp array allows to avoid allocation of space for each element of array of string
-      ,  BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
-      */
+         BUT it's size has to be number_of_str*max_len_str where max_len_str is somewhat arbitrary, e.g. 32.
+       */
       char* tmp_$group_dset$;
       tmp_$group_dset$ = CALLOC(size_$group_dset$*32, char);
 
@@ -861,8 +837,8 @@ trexio_text_read_$group$ (trexio_text_t* 
 
-
-

1.6 Template for text flush struct

+
+

1.6 Template for text flush struct

trexio_exit_code
@@ -899,7 +875,7 @@ trexio_text_read_$group$ (trexio_text_t* START REPEAT GROUP_NUM
   fprintf(f, "$group_num$_isSet %u \n", $group$->$group_num$_isSet);
-  if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_std_dtype_out$ \n", $group$->$group_num$);
+  if ($group$->$group_num$_isSet == true) fprintf(f, "$group_num$ %$group_num_format_printf$ \n", $group$->$group_num$);
   // END REPEAT GROUP_NUM
 
   // START REPEAT GROUP_ATTR_STR
@@ -913,7 +889,7 @@ trexio_text_read_$group$ (trexio_text_t* "$group_dset$\n");
   for (uint64_t i=0 ; i<size_$group_dset$ ; ++i) {
-    fprintf(f, "%$group_dset_std_dtype_out$\n", $group$->$group_dset$[i]);
+    fprintf(f, "%$group_dset_format_printf$\n", $group$->$group_dset$[i]);
   }
   // END REPEAT GROUP_DSET_ALL
 
@@ -927,8 +903,8 @@ trexio_text_read_$group$ (trexio_text_t* 
 
-
-

1.7 Template for text free memory

+
+

1.7 Template for text free memory

Memory is allocated when reading. The following function frees memory. @@ -973,8 +949,8 @@ Memory is allocated when reading. The following function frees memory.

-
-

1.8 Template for has/read/write the numerical attribute

+
+

1.8 Template for has/read/write the numerical attribute

trexio_exit_code
@@ -1037,8 +1013,8 @@ Memory is allocated when reading. The following function frees memory.
 
-
-

1.9 Template for has/read/write the dataset of numerical data

+
+

1.9 Template for has/read/write the dataset of numerical data

The group_dset array is assumed allocated with the appropriate size. @@ -1134,8 +1110,8 @@ The group_dset array is assumed allocated with the appropriate size

-
-

1.10 Template for has/read/write the dataset of strings

+
+

1.10 Template for has/read/write the dataset of strings

The group_dset array is assumed allocated with the appropriate size. @@ -1236,8 +1212,8 @@ The group_dset array is assumed allocated with the appropriate size

-
-

1.11 Template for has/read/write the string attribute

+
+

1.11 Template for has/read/write the string attribute

trexio_exit_code
@@ -1311,355 +1287,277 @@ The group_dset array is assumed allocated with the appropriate size
 
-
-

1.12 RDM struct (hard-coded)

+
+

1.12 Template for has/read/write the dataset of sparse data

-
-
-

1.12.1 Read the complete struct

-
-
-
rdm_t* trexio_text_read_rdm(trexio_text_t* const file);
-
-
+

+Each sparse array is stored in a separate .txt file due to the fact that sparse I/O has to be decoupled +from conventional write/read/flush behaviour of the TEXT back end. Chunks are used to read/write sparse data +to prevent memory overflow. Chunks have a given int64_t size +(size specifies the number of sparse data items, e.g. integrals). +

+ +

+User provides indices and values of the sparse array as two separate variables. +

+
-
rdm_t* trexio_text_read_rdm(trexio_text_t* const file) {
-  if (file  == NULL) return NULL;
+
trexio_exit_code trexio_text_write_$group_dset$(trexio_t* const file,
+                                                const int64_t offset_file,
+                                                const int64_t size,
+                                                const int64_t size_max,
+                                                const int64_t size_start,
+                                                const int32_t* index_sparse,
+                                                const double* value_sparse)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
 
-  if (file->rdm != NULL) return file->rdm;
+  /* Build the name of the file with sparse data*/
+  /* The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed? */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
+  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
 
-  /* Allocate the data structure */
-  rdm_t* rdm = MALLOC(rdm_t);
-  assert (rdm != NULL);
+  /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+  /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
 
-  rdm->one_e           = NULL;
-  rdm->two_e_file_name[0] = '\0';
-  rdm->to_flush        = 0;
+  /* Open the file in "a" (append) mode to guarantee that no truncation happens upon consecutive writes */
+  FILE* f = fopen(file_full_path, "a");
+  if(f == NULL) return TREXIO_FILE_ERROR;
 
-  /* Try to open the file. If the file does not exist, return */
-  const char* rdm_file_name = "/rdm.txt";
 
-  strncpy (rdm->file_name, file->parent.file_name, TREXIO_MAX_FILENAME_LENGTH);
+  /* Specify the line length in order to offset properly. For example, for 4-index quantities
+     the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char.
+     CURRENTLY NO OFFSET IS USED WHEN WRITING !
+    */
+  int64_t line_length = 0L;
+  char format_str[256] = "\0";
 
-  strncat (rdm->file_name, rdm_file_name,
-           TREXIO_MAX_FILENAME_LENGTH-strlen(rdm_file_name));
-
-  if (rdm->file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
-    FREE(rdm);
-    return NULL;
+  /* Determine the optimal type for storing indices depending on the size_max (usually mo_num or ao_num) */
+  if (size_max < UINT8_MAX) {
+    line_length = $sparse_line_length_8$; // 41 for 4 indices
+    strncpy(format_str, $sparse_format_printf_8$, 256);
+  } else if (size_max < UINT16_MAX) {
+    line_length = $sparse_line_length_16$; // 49 for 4 indices
+    strncpy(format_str, $sparse_format_printf_16$, 256);
+  } else {
+    line_length = $sparse_line_length_32$; //69 for 4 indices
+    strncpy(format_str, $sparse_format_printf_32$, 256);
   }
-  /* If the file exists, read it */
-  FILE* f = fopen(rdm->file_name,"r");
-  if (f != NULL) {
+  strncat(format_str, "\n", 2);
 
-    /* Find size of file to allocate the max size of the string buffer */
-    fseek(f, 0L, SEEK_END);
-    size_t sz = ftell(f);
-    fseek(f, 0L, SEEK_SET);
-    sz = (sz < 1024) ? (1024) : (sz);
-    char* buffer = CALLOC(sz, char);
+  /* Get the starting position of the IO stream to be written in the .size file.
+     This is error-prone due to the fact that for large files (>2 GB) in 32-bit systems ftell will fail.
+     One can use ftello function which is adapted for large files.
+     For now, we can use front-end-provided size_start, which has been checked for INT64_MAX overflow.
+   */
+  int64_t io_start_pos = size_start * line_length;
 
-    /* Read the dimensioning variables */
-    int rc;
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    assert (strcmp(buffer, "dim_one_e") == 0);
-
-    rc = fscanf(f, "%" SCNu64 "", &(rdm->dim_one_e));
-    assert (rc == 1);
-
-    /* Allocate arrays */
-    rdm->one_e = CALLOC(rdm->dim_one_e, double);
-    assert (rdm->one_e != NULL);
-
-    /* Read one_e */
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    assert (strcmp(buffer, "one_e") == 0);
-
-    for (uint64_t i=0 ; i<rdm->dim_one_e; ++i) {
-      rc = fscanf(f, "%lf", &(rdm->one_e[i]));
-      assert (rc == 1);
-    }
-
-    /* Read two_e */
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    assert (strcmp(buffer, "two_e_file_name") == 0);
-
-    rc = fscanf(f, "%1023s", buffer);
-    assert (rc == 1);
-    strncpy(rdm->two_e_file_name, buffer, 1024);
-    if (rdm->two_e_file_name[TREXIO_MAX_FILENAME_LENGTH-1] != '\0') {
-      FREE(buffer);
-      FREE(rdm->one_e);
-      FREE(rdm);
+  /* Write the data in the file and check the return code of fprintf to verify that > 0 bytes have been written */
+  int rc;
+  for (uint64_t i=0UL; i<size; ++i) {
+    rc = fprintf(f, format_str,
+       $group_dset_sparse_indices_printf$,
+       *(value_sparse + i));
+    if(rc <= 0) {
       fclose(f);
-      return NULL;
-    }
-
-    FREE(buffer);
-    fclose(f);
-    f = NULL;
-  }
-  file->rdm = rdm ;
-  return rdm;
-}
-
-
-
-
- -
-

1.12.2 Flush the complete struct

-
-
-
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file);
-
-
- -
-
trexio_exit_code trexio_text_flush_rdm(trexio_text_t* const file) {
-  if (file == NULL) return TREXIO_INVALID_ARG_1;
-
-  if (file->parent.mode == 'r') return TREXIO_READONLY;
-
-  rdm_t* const rdm = file->rdm;
-  if (rdm == NULL) return TREXIO_SUCCESS;
-
-  if (rdm->to_flush == 0) return TREXIO_SUCCESS;
-
-  FILE* f = fopen(rdm->file_name,"w");
-  assert (f != NULL);
-
-  /* Write the dimensioning variables */
-  fprintf(f, "num %" PRIu64 "\n", rdm->dim_one_e);
-
-  /* Write arrays */
-  fprintf(f, "one_e\n");
-  for (uint64_t i=0 ; i< rdm->dim_one_e; ++i) {
-    fprintf(f, "%lf\n", rdm->one_e[i]);
-  }
-
-  fprintf(f, "two_e_file_name\n");
-  fprintf(f, "%s\n", rdm->two_e_file_name);
-
-  fclose(f);
-  rdm->to_flush = 0;
-  return TREXIO_SUCCESS;
-}
-
-
-
-
- -
-

1.12.3 Free memory

-
-

-Memory is allocated when reading. The followig function frees memory. -

- -
-
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file);
-
-
- -
-
trexio_exit_code trexio_text_free_rdm(trexio_text_t* const file) {
-  if (file == NULL) return TREXIO_INVALID_ARG_1;
-
-  if (file->parent.mode != 'r') {
-    trexio_exit_code rc = trexio_text_flush_rdm(file);
-    if (rc != TREXIO_SUCCESS) return TREXIO_FAILURE;
-  }
-
-  rdm_t* const rdm = file->rdm;
-  if (rdm == NULL) return TREXIO_SUCCESS;
-
-  if (rdm->one_e != NULL) {
-    FREE (rdm->one_e);
-  }
-
-  free (rdm);
-  file->rdm = NULL;
-  return TREXIO_SUCCESS;
-}
-
-
-
-
- -
-

1.12.4 Read/Write the onee attribute

-
-

-The one_e array is assumed allocated with the appropriate size. -

- -
-
trexio_exit_code
-trexio_text_read_rdm_one_e(trexio_t* const file,
-                           double* const one_e,
-                           const uint64_t dim_one_e);
-
-trexio_exit_code
-trexio_text_write_rdm_one_e(trexio_t* const file,
-                            const double* one_e,
-                            const uint64_t dim_one_e);
-
-
- -
-
trexio_exit_code
-trexio_text_read_rdm_one_e(trexio_t* const file,
-                           double* const one_e,
-                           const uint64_t dim_one_e)
-{
-  if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (one_e == NULL) return TREXIO_INVALID_ARG_2;
-
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  if (dim_one_e != rdm->dim_one_e) return TREXIO_INVALID_ARG_3;
-
-  for (uint64_t i=0 ; i<dim_one_e ; ++i) {
-    one_e[i] = rdm->one_e[i];
-  }
-
-  return TREXIO_SUCCESS;
-}
-
-
-trexio_exit_code
-trexio_text_write_rdm_one_e(trexio_t* const file,
-                            const double* one_e,
-                            const uint64_t dim_one_e)
-{
-  if (file  == NULL)  return TREXIO_INVALID_ARG_1;
-  if (one_e == NULL)  return TREXIO_INVALID_ARG_2;
-  if (file->mode != 'r') return TREXIO_READONLY;
-
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  rdm->dim_one_e = dim_one_e;
-  for (uint64_t i=0 ; i<dim_one_e ; ++i) {
-    rdm->one_e[i] = one_e[i];
-  }
-
-  rdm->to_flush = 1;
-  return TREXIO_SUCCESS;
-}
-
-
-
-
- -
-

1.12.5 Read/Write the twoe attribute

-
-

-two_e is a sparse data structure, which can be too large to fit -in memory. So we provide functions to read and write it by -chunks. -In the text back end, the easiest way to do it is to create a -file for each sparse float structure. -

- -
-
trexio_exit_code
-trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
-                                    const uint64_t offset,
-                                    const uint64_t size,
-                                    int64_t* const index,
-                                    double* const value);
-
-trexio_exit_code
-trexio_text_buffered_write_rdm_two_e(trexio_t* const file,
-                                     const uint64_t offset,
-                                     const uint64_t size,
-                                     const int64_t* index,
-                                     const double* value);
-
-
- -
-
trexio_exit_code
-trexio_text_buffered_read_rdm_two_e(trexio_t* const file,
-                                    const uint64_t offset,
-                                    const uint64_t size,
-                                    int64_t* const index,
-                                    double* const value)
-{
-  if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (index == NULL) return TREXIO_INVALID_ARG_4;
-  if (value == NULL) return TREXIO_INVALID_ARG_5;
-
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
-
-  FILE* f = fopen(rdm->two_e_file_name, "r");
-  if (f == NULL) return TREXIO_END;
-
-  const uint64_t line_length = 64L;
-  fseek(f, (long) offset * line_length, SEEK_SET);
-
-  for (uint64_t i=0 ; i<size ; ++i) {
-    int rc = fscanf(f, "%9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %9" SCNd64 " %24le\n",
-           &index[4*i],
-           &index[4*i+1],
-           &index[4*i+2],
-           &index[4*i+3],
-           &value[i]);
-    if (rc == 5) {
-      /* Do nothing */
-    } else if (rc == EOF) {
-      return TREXIO_END;
+      return TREXIO_FAILURE;
     }
   }
 
+  /* Close the TXT file */
+  rc = fclose(f);
+  if (rc != 0) return TREXIO_FILE_ERROR;
+
+  /* Append .size to the file_full_path in order to write additional info about the written buffer of data */
+  strncat(file_full_path, ".size", 6);
+
+  /* Open the new file in "a" (append) mode to append info about the buffer that has been just written */
+  FILE *f_wSize = fopen(file_full_path, "a");
+  if (f_wSize == NULL) return TREXIO_FILE_ERROR;
+
+  /* Write the buffer_size */
+  rc = fprintf(f_wSize, "%" PRId64 " %" PRId64 "\n", size, io_start_pos);
+  if (rc <= 0) {
+    fclose(f_wSize);
+    return TREXIO_FAILURE;
+  }
+
+  /* Close the TXT file */
+  rc = fclose(f_wSize);
+  if (rc != 0) return TREXIO_FILE_ERROR;
+
+  /* Exit upon success */
   return TREXIO_SUCCESS;
 }
+
+
-trexio_exit_code -trexio_text_buffered_write_rdm_two_e(trexio_t* const file, - const uint64_t offset, - const uint64_t size, - const int64_t* index, - const double* value) +
+
trexio_exit_code trexio_text_read_$group_dset$(trexio_t* const file,
+                                               const int64_t offset_file,
+                                               const int64_t size,
+                                               const int64_t size_max,
+                                               int64_t* const eof_read_size,
+                                               int32_t* const index_sparse,
+                                               double* const value_sparse)
 {
-  if (file  == NULL) return TREXIO_INVALID_ARG_1;
-  if (index == NULL) return TREXIO_INVALID_ARG_4;
-  if (value == NULL) return TREXIO_INVALID_ARG_5;
-  if (file->mode != 'r') return TREXIO_READONLY;
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+  if (eof_read_size == NULL) return TREXIO_INVALID_ARG_5;
 
-  rdm_t* const rdm = trexio_text_read_rdm((trexio_text_t*) file);
-  if (rdm == NULL) return TREXIO_FAILURE;
+  /* Build the name of the file with sparse data.
+     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
+   */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
+  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
 
-  FILE* f = fopen(rdm->two_e_file_name, "w");
-  if (f == NULL) return TREXIO_FAILURE;
+  /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+  /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
 
-  const uint64_t line_length = 64L;
-  fseek(f, (long) offset * line_length, SEEK_SET);
+  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
+  FILE* f = fopen(file_full_path, "r");
+  if(f == NULL) return TREXIO_FILE_ERROR;
 
-  for (uint64_t i=0 ; i<size ; ++i) {
-    int rc = fprintf(f, "%9" PRId64 " %9" PRId64 " %9" PRId64 " %9" PRId64 " %24le\n",
-           index[4*i],
-           index[4*i+1],
-           index[4*i+2],
-           index[4*i+3],
-           value[i]);
-    if (rc != 5) return TREXIO_FAILURE;
+  /* Specify the line length in order to offset properly. For example, for 4-index quantities
+     the line_length is 69 because 10 per index + 4 spaces + 24 for floating point value + 1 for the new line char
+   */
+  uint64_t line_length = 0UL;
+  /* Determine the line length depending on the size_max (usually mo_num or ao_num) */
+  if (size_max < UINT8_MAX) {
+    line_length = $sparse_line_length_8$; // 41 for 4 indices
+  } else if (size_max < UINT16_MAX) {
+    line_length = $sparse_line_length_16$; // 49 for 4 indices
+  } else {
+    line_length = $sparse_line_length_32$; //69 for 4 indices
   }
 
+  /* Offset in the file according to the provided  value of offset_file and optimal line_length */
+  fseek(f, (long) offset_file * line_length, SEEK_SET);
+
+  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
+  int rc;
+  char buffer[1024];
+  uint64_t count = 0UL;
+  for (uint64_t i=0UL; i<size; ++i) {
+
+      memset(buffer,0,sizeof(buffer));
+
+      if(fgets(buffer, 1023, f) == NULL){
+
+        fclose(f);
+        *eof_read_size = count;
+        return TREXIO_END;
+
+      } else {
+
+        rc = sscanf(buffer, "$group_dset_format_scanf$",
+                    $group_dset_sparse_indices_scanf$,
+                    value_sparse + i);
+        if(rc <= 0) {
+          fclose(f);
+          return TREXIO_FAILURE;
+        }
+        count += 1UL;
+
+      }
+  }
+
+  /* Close the TXT file */
+  rc = fclose(f);
+  if(rc != 0) return TREXIO_FILE_ERROR;
+
   return TREXIO_SUCCESS;
 }
 
+ + +
+
trexio_exit_code trexio_text_read_$group_dset$_size(trexio_t* const file, int64_t* const size_max)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  /* Build the name of the file with sparse data.
+     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
+   */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt.size";
+  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
+
+  /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+  /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
+
+  /* Open the file in "r" (read) mode to guarantee that no truncation happens upon consecutive reads */
+  FILE* f = fopen(file_full_path, "r");
+  if(f == NULL) return TREXIO_FILE_ERROR;
+
+
+  /* Read the data from the file and check the return code of fprintf to verify that > 0 bytes have been read or reached EOF */
+  int rc;
+  int64_t size_item, offset_item, size_accum=0L;
+
+  /* Read the values from the file. BEWARE OF POSSIBLE MAX_INT64 OVERFLOW ! */
+  while(fscanf(f, "%" SCNd64 " %" SCNd64 "", &size_item, &offset_item) != EOF) {
+    /* Check that summation will not overflow the int64_t value */
+    if (INT64_MAX - size_accum > size_item) {
+      size_accum += size_item;
+    } else {
+      fclose(f);
+      *size_max = -1L;
+      return TREXIO_INT_SIZE_OVERFLOW;
+    }
+  }
+
+  /* Close the TXT file */
+  rc = fclose(f);
+  if(rc != 0) return TREXIO_FILE_ERROR;
+
+  /* Overwrite the value at the input address and return TREXIO_SUCCESS */
+  *size_max = size_accum;
+  return TREXIO_SUCCESS;
+
+}
+
+
+ +
+
trexio_exit_code trexio_text_has_$group_dset$(trexio_t* const file)
+{
+  if (file == NULL) return TREXIO_INVALID_ARG_1;
+
+  /* Build the name of the file with sparse data.
+     The $group_dset$.txt is limited to 256 symbols for the moment. What are the chances that it will exceed?
+   */
+  const char $group_dset$_file_name[256] = "/$group_dset$.txt";
+  /* The full path to the destination TXT file with sparse data. This will include TREXIO directory name. */
+  char file_full_path[TREXIO_MAX_FILENAME_LENGTH];
+
+  /* Copy directory name in file_full_path */
+  strncpy (file_full_path, file->file_name, TREXIO_MAX_FILENAME_LENGTH);
+  /* Append name of the file with sparse data */
+  strncat (file_full_path, $group_dset$_file_name,
+           TREXIO_MAX_FILENAME_LENGTH-strlen($group_dset$_file_name));
+
+  /* Check the return code of access function to determine whether the file with sparse data exists or not */
+  if (access(file_full_path, F_OK) == 0){
+    return TREXIO_SUCCESS;
+  } else {
+    return TREXIO_HAS_NOT;
+  }
+}
+
@@ -1667,7 +1565,7 @@ file for each sparse float structure.

Author: TREX-CoE

-

Created: 2021-12-15 Wed 12:52

+

Created: 2021-12-17 Fri 16:14

Validate

diff --git a/trex.html b/trex.html index 6cb1c4c..e62e92d 100644 --- a/trex.html +++ b/trex.html @@ -3,7 +3,7 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - + TREX Configuration file @@ -333,74 +333,84 @@ for the JavaScript code in this tag.

Table of Contents

-This page contains information about the general structure of the -TREXIO library. The source code of the library can be automatically -generated based on the contents of the trex.json configuration file, -which itself is compiled from different sections (groups) presented below. +This page contains information about the general structure of the +TREXIO library. The source code of the library can be automatically +generated based on the contents of the trex.json configuration file, +which itself is compiled from different sections (groups) presented +below.

-For more information about the automatic generation on the source code -or regarding possible modifications, please contact the TREXIO developers. +For more information about the automatic generation on the source code +or regarding possible modifications, please contact the TREXIO +developers.

-All quantities are saved in TREXIO file in atomic units. -The dimensions of the arrays in the tables below are given in -column-major order (as in Fortran), and the ordering of the dimensions -is reversed in the produced trex.json configuration file as the library is +All quantities are saved in TREXIO file in atomic units. The +dimensions of the arrays in the tables below are given in column-major +order (as in Fortran), and the ordering of the dimensions is reversed +in the produced trex.json configuration file as the library is written in C.

-TREXIO currently supports int, float and str types for both single attributes and arrays. -Note, that some attributes might have dim type (e.g. num of the nucleus group). -This type is treated exactly the same as int with the only difference that dim variables -cannot be negative or zero. This additional constraint is required because dim attributes -are used internally to allocate memory and to check array boundaries in the memory-safe API. -Most of the times, the dim variables contain num suffix. +TREXIO currently supports int, float and str types for both +single attributes and arrays. Note, that some attributes might have +dim type (e.g. num of the nucleus group). This type is treated +exactly the same as int with the only difference that dim +variables cannot be negative. This additional constraint is required +because dim attributes are used internally to allocate memory and to +check array boundaries in the memory-safe API. Most of the times, the +dim variables contain the num suffix.

-

In Fortran, the arrays are 1-based and in most other languages the arrays are 0-based. Hence, we introduce the index type which is an 1-based int in the Fortran interface and 0-based otherwise.

-
-

1 Metadata (metadata group)

+

+For sparse data structures such as electron replusion integrals, +the data can be too large to fit in memory and the data needs to be +fetched using multiple function calls to perform I/O on buffers. +

+ + +
+

1 Metadata (metadata group)

As we expect our files to be archived in open-data repositories, we @@ -410,7 +420,7 @@ which have participated to the creation of the file, a list of authors of the file, and a textual description.

- +
@@ -477,15 +487,15 @@ authors of the file, and a textual description. -
-

2 Electron (electron group)

+
+

2 Electron (electron group)

We consider wave functions expressed in the spin-free formalism, where the number of ↑ and ↓ electrons is fixed.

-
+
@@ -524,15 +534,15 @@ the number of ↑ and ↓ electrons is fixed. -
-

3 Nucleus (nucleus group)

+
+

3 Nucleus (nucleus group)

The nuclei are considered as fixed point charges. Coordinates are given in Cartesian \((x,y,z)\) format.

-
+
@@ -587,13 +597,20 @@ given in Cartesian \((x,y,z)\) format. + + + + + + +
  Symmetry point group
repulsionfloat Nuclear repulsion energy
-
-

4 Effective core potentials (ecp group)

+
+

4 Effective core potentials (ecp group)

An effective core potential (ECP) \(V_A^{\text{ECP}}\) replacing the @@ -626,7 +643,7 @@ The functions \(V_{A\ell}\) are parameterized as: See http://dx.doi.org/10.1063/1.4984046 or https://doi.org/10.1063/1.5121006 for more info.

- +
@@ -724,8 +741,8 @@ The latter causes issues when written before ang_mom in the TREXIO

-
-

4.1 Example

+
+

4.1 Example

For example, consider H2 molecule with the following @@ -788,8 +805,8 @@ power = [

-
-

5 Basis set (basis group)

+
+

5 Basis set (basis group)

We consider here basis functions centered on nuclei. Hence, we enable @@ -842,7 +859,7 @@ If the the basis function is not considered normalized, \(\mathcal{N}_s=1\). All the basis set parameters are stored in one-dimensional arrays:

-
+
@@ -936,8 +953,8 @@ All the basis set parameters are stored in one-dimensional arrays:
-
-

5.1 Example

+
+

5.1 Example

For example, consider H2 with the following basis set (in GAMESS @@ -1015,8 +1032,8 @@ prim_factor =

-
-

6 Atomic orbitals (ao group)

+
+

6 Atomic orbitals (ao group)

Going from the atomic basis set to AOs implies a systematic @@ -1064,13 +1081,13 @@ shell, as in the GAMESS convention where

In such a case, one should set the normalization of the shell (in -the Basis set section) to \(\mathcal{N}_{z^2}\), which is the +the Basis set section) to \(\mathcal{N}_{z^2}\), which is the normalization factor of the atomic orbitals in spherical coordinates. The normalization factor of the \(xy\) function which should be introduced here should be \(\frac{\mathcal{N}_{xy}}{\mathcal{N}_{z^2}}\).

- +
@@ -1122,8 +1139,8 @@ introduced here should be \(\frac{\mathcal{N}_{xy}}{\mathcal{N}_{z^2}}\).
-
-

6.1 One-electron integrals (ao_1e_int group)

+
+

6.1 One-electron integrals (ao_1e_int group)

  • \[ \hat{V}_{\text{ne}} = \sum_{A=1}^{N_\text{nucl}} @@ -1141,7 +1158,7 @@ The one-electron integrals for a one-electron operator \(\hat{O}\) are over atomic orbitals.

    - +
    @@ -1208,8 +1225,8 @@ over atomic orbitals. -
    -

    6.2 Two-electron integrals (ao_2e_int group)

    +
    +

    6.2 Two-electron integrals (ao_2e_int group)

    The two-electron integrals for a two-electron operator \(\hat{O}\) are @@ -1230,7 +1247,7 @@ notation. \mathbf{r}_j \vert)}{\vert \mathbf{r}_i - \mathbf{r}_j \vert} \] : electron-electron long range potential -

    +
    @@ -1270,10 +1287,10 @@ notation. -
    -

    7 Molecular orbitals (mo group)

    +
    +

    7 Molecular orbitals (mo group)

    -
    +
    @@ -1339,8 +1356,8 @@ notation.
-
-

7.1 One-electron integrals (mo_1e_int group)

+
+

7.1 One-electron integrals (mo_1e_int group)

The operators as the same as those defined in the @@ -1348,7 +1365,7 @@ The operators as the same as those defined in the the basis of molecular orbitals.

- +
@@ -1415,8 +1432,8 @@ the basis of molecular orbitals. -
-

7.2 Two-electron integrals (mo_2e_int group)

+
+

7.2 Two-electron integrals (mo_2e_int group)

The operators as the same as those defined in the @@ -1424,7 +1441,7 @@ The operators as the same as those defined in the the basis of molecular orbitals.

-
+
@@ -1464,13 +1481,13 @@ the basis of molecular orbitals. -
-

8 TODO Slater determinants

+
+

8 TODO Slater determinants

-
-

9 TODO Reduced density matrices (rdm group)

+
+

9 Reduced density matrices (rdm group)

-
+
@@ -1492,31 +1509,52 @@ the basis of molecular orbitals. - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + + + +
one_e1e float (mo.num, mo.num) One body density matrix
one_e_up1e_up float (mo.num, mo.num) ↑-spin component of the one body density matrix
one_e_dn1e_dn float (mo.num, mo.num) ↓-spin component of the one body density matrix
two_e2e float sparse (mo.num, mo.num, mo.num, mo.num) Two-body reduced density matrix (spin trace)
2e_upupfloat sparse(mo.num, mo.num, mo.num, mo.num)↑↑ component of the two-body reduced density matrix
2e_dndnfloat sparse(mo.num, mo.num, mo.num, mo.num)↓↓ component of the two-body reduced density matrix
2e_updnfloat sparse(mo.num, mo.num, mo.num, mo.num)↑↓ component of the two-body reduced density matrix
@@ -1525,7 +1563,7 @@ the basis of molecular orbitals.

Author: TREX-CoE

-

Created: 2021-12-15 Wed 12:52

+

Created: 2021-12-17 Fri 16:14

Validate