Merge pull request #98 from TREX-CoE/jastrow

Jastrow
2025-02-18 15:44:22 +01:00 · 2023-01-04 10:11:05 +01:00 · 2023-01-04 10:11:05 +01:00 · bc5f70faad
commit bc5f70faad
parent cadcf1f62d 52d45c1e51
6 changed files with 1481 additions and 935 deletions
--- a/1
+++ b/1
@ -19,6 +19,7 @@ CHANGES
 - Added OCaml binding
 - Added spin and energy in MOs
 - Added CSF group
+- Added Jastrow group
 - Added Amplitude group
 - Added Cholesky-decomposed two-electron integrals
 - Added Cholesky-decomposed RDMs for Gammcor
--- a/Makefile.am
+++ b/Makefile.am
@ -100,6 +100,7 @@ TESTS_C = \
  tests/io_dset_int_text \
  tests/io_dset_sparse_text \
  tests/io_determinant_text \
+  tests/io_jastrow_text \
  tests/io_safe_dset_float_text \
  tests/io_str_text \
  tests/io_dset_str_text \
@ -116,6 +117,7 @@ TESTS_C += \
  tests/io_dset_int_hdf5 \
  tests/io_dset_sparse_hdf5 \
  tests/io_determinant_hdf5 \
+  tests/io_jastrow_hdf5 \
  tests/io_safe_dset_float_hdf5 \
  tests/io_str_hdf5 \
  tests/io_dset_str_hdf5 \
--- a/python/install_pytrexio.sh
+++ b/python/install_pytrexio.sh
@ -73,7 +73,7 @@ python3 -m pip install dist/trexio-*.whl --force-reinstall
 #python3 -m twine upload dist/trexio-*.tar.gz

 # Cleaning
-#rm -rf build dist trexio.egg-info
+rm -rf build dist trexio.egg-info

 # Additional information related to the installation of the TREXIO Python API

--- a/tests/io_jastrow_hdf5.c
+++ b/tests/io_jastrow_hdf5.c
@ -0,0 +1,198 @@
+#include "trexio.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#define TEST_BACKEND  TREXIO_HDF5
+#define TREXIO_FILE   "test_jastrow.h5"
+#define RM_COMMAND    "rm -f -- " TREXIO_FILE
+
+static int test_write_jastrow (const char* file_name, const back_end_t backend) {
+
+/* Try to write an array of sparse data into the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file in 'write' mode
+  file = trexio_open(file_name, 'w', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+#define nucleus_num  3
+#define ee_num 2
+#define en_num 3
+#define een_num 6
+
+  rc = trexio_write_nucleus_num(file, nucleus_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_type(file, "CHAMP", 6);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_ee_num(file, ee_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_en_num(file, en_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_een_num(file, een_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  double ee [2] = { 0.5, 2. };
+  rc = trexio_write_jastrow_ee(file, ee);
+  assert (rc == TREXIO_SUCCESS);
+
+  double en [3] = { 1., 2., 3. };
+  rc = trexio_write_jastrow_en(file, en);
+  assert (rc == TREXIO_SUCCESS);
+
+  double een [6] = { 11., 12., 13., 14., 15., 16. };
+  rc = trexio_write_jastrow_een(file, een);
+  assert (rc == TREXIO_SUCCESS);
+
+  int en_nucleus [3] = { 0, 1, 2 };
+  rc = trexio_write_jastrow_en_nucleus(file, en_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+
+  int een_nucleus [6] = { 0, 0, 1, 1, 2, 2 };
+  rc = trexio_write_jastrow_een_nucleus(file, een_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+
+  double ee_scaling = 1.0;
+  rc = trexio_write_jastrow_ee_scaling(file, ee_scaling);
+  assert (rc == TREXIO_SUCCESS);
+
+  double en_scaling[3] = { 0.5, 1.0, 0.5 };
+  rc = trexio_write_jastrow_en_scaling(file, en_scaling);
+  assert (rc == TREXIO_SUCCESS);
+
+#undef nucleus_num
+#undef ee_num
+#undef en_num
+#undef een_num
+
+  rc = trexio_close(file);
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_read_jastrow (const char* file_name, const back_end_t backend) {
+
+/* Try to read one chunk of dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  int nucleus_num = 0;
+  rc = trexio_read_nucleus_num(file, &nucleus_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (nucleus_num == 3);
+
+  char type[16] = "";
+  rc = trexio_read_jastrow_type(file, type, 16);
+  assert (rc == TREXIO_SUCCESS);
+  assert (strcmp("CHAMP",type) == 0);
+
+  int ee_num = 0;
+  rc = trexio_read_jastrow_ee_num(file, &ee_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (ee_num == 2);
+
+  int en_num = 0;
+  rc = trexio_read_jastrow_en_num(file, &en_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en_num == nucleus_num);
+
+  int een_num = 0;
+  rc = trexio_read_jastrow_een_num(file, &een_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (een_num == 2*nucleus_num);
+
+  double ee [2] = { 0., 0. };
+  rc = trexio_read_jastrow_ee(file, ee);
+  assert (rc == TREXIO_SUCCESS);
+  assert (ee[0] == 0.5);
+  assert (ee[1] == 2.0);
+
+  double en [3] = { 0., 0., 0. };
+  rc = trexio_read_jastrow_en(file, en);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en[0] == 1.0);
+  assert (en[1] == 2.0);
+  assert (en[2] == 3.0);
+
+  double een [6];
+  rc = trexio_read_jastrow_een(file, een);
+  assert (rc == TREXIO_SUCCESS);
+  assert (een[0] == 11.0);
+  assert (een[1] == 12.0);
+  assert (een[2] == 13.0);
+  assert (een[3] == 14.0);
+  assert (een[4] == 15.0);
+  assert (een[5] == 16.0);
+
+  int en_nucleus [3] = { 0, 0, 0 };
+  rc = trexio_read_jastrow_en_nucleus(file, en_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en_nucleus[0] == 0);
+  assert (en_nucleus[1] == 1);
+  assert (en_nucleus[2] == 2);
+
+  int een_nucleus [6] = { 0, 0, 0, 0, 0, 0 };
+  rc = trexio_read_jastrow_een_nucleus(file, een_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+  assert (een_nucleus[0] == 0);
+  assert (een_nucleus[1] == 0);
+  assert (een_nucleus[2] == 1);
+  assert (een_nucleus[3] == 1);
+  assert (een_nucleus[4] == 2);
+  assert (een_nucleus[5] == 2);
+
+  double ee_scaling = 0.0;
+  rc = trexio_read_jastrow_ee_scaling(file, &ee_scaling);
+  assert (rc == TREXIO_SUCCESS);
+  assert (ee_scaling == 1.0);
+
+  double en_scaling[3] = { 0.5, 1.0, 0.5 };
+  rc = trexio_read_jastrow_en_scaling(file, en_scaling);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en_scaling[0] == 0.5);
+  assert (en_scaling[1] == 1.0);
+  assert (en_scaling[2] == 0.5);
+
+  rc = trexio_close(file);
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+
+int main(){
+
+/*============== Test launcher ================*/
+
+  int rc;
+
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  test_write_jastrow (TREXIO_FILE, TEST_BACKEND);
+  test_read_jastrow  (TREXIO_FILE, TEST_BACKEND);
+
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  return 0;
+}
--- a/tests/io_jastrow_text.c
+++ b/tests/io_jastrow_text.c
@ -0,0 +1,198 @@
+#include "trexio.h"
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#define TEST_BACKEND  TREXIO_TEXT
+#define TREXIO_FILE   "test_jastrow.dir"
+#define RM_COMMAND    "rm -f -- " TREXIO_FILE "/*.txt " TREXIO_FILE "/*.txt.size " TREXIO_FILE "/.lock && rm -fd -- " TREXIO_FILE
+
+static int test_write_jastrow (const char* file_name, const back_end_t backend) {
+
+/* Try to write an array of sparse data into the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  // open file in 'write' mode
+  file = trexio_open(file_name, 'w', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+#define nucleus_num  3
+#define ee_num 2
+#define en_num 3
+#define een_num 6
+
+  rc = trexio_write_nucleus_num(file, nucleus_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_type(file, "CHAMP", 6);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_ee_num(file, ee_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_en_num(file, en_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  rc = trexio_write_jastrow_een_num(file, een_num);
+  assert (rc == TREXIO_SUCCESS);
+
+  double ee [2] = { 0.5, 2. };
+  rc = trexio_write_jastrow_ee(file, ee);
+  assert (rc == TREXIO_SUCCESS);
+
+  double en [3] = { 1., 2., 3. };
+  rc = trexio_write_jastrow_en(file, en);
+  assert (rc == TREXIO_SUCCESS);
+
+  double een [6] = { 11., 12., 13., 14., 15., 16. };
+  rc = trexio_write_jastrow_een(file, een);
+  assert (rc == TREXIO_SUCCESS);
+
+  int en_nucleus [3] = { 0, 1, 2 };
+  rc = trexio_write_jastrow_en_nucleus(file, en_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+
+  int een_nucleus [6] = { 0, 0, 1, 1, 2, 2 };
+  rc = trexio_write_jastrow_een_nucleus(file, een_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+
+  double ee_scaling = 1.0;
+  rc = trexio_write_jastrow_ee_scaling(file, ee_scaling);
+  assert (rc == TREXIO_SUCCESS);
+
+  double en_scaling[3] = { 0.5, 1.0, 0.5 };
+  rc = trexio_write_jastrow_en_scaling(file, en_scaling);
+  assert (rc == TREXIO_SUCCESS);
+
+#undef nucleus_num
+#undef ee_num
+#undef en_num
+#undef een_num
+
+  rc = trexio_close(file);
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+static int test_read_jastrow (const char* file_name, const back_end_t backend) {
+
+/* Try to read one chunk of dataset of sparse data in the TREXIO file */
+
+  trexio_t* file = NULL;
+  trexio_exit_code rc;
+
+/*================= START OF TEST ==================*/
+
+  file = trexio_open(file_name, 'r', backend, &rc);
+  assert (file != NULL);
+  assert (rc == TREXIO_SUCCESS);
+
+  int nucleus_num = 0;
+  rc = trexio_read_nucleus_num(file, &nucleus_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (nucleus_num == 3);
+
+  char type[16] = "";
+  rc = trexio_read_jastrow_type(file, type, 16);
+  assert (rc == TREXIO_SUCCESS);
+  assert (strcmp("CHAMP",type) == 0);
+
+  int ee_num = 0;
+  rc = trexio_read_jastrow_ee_num(file, &ee_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (ee_num == 2);
+
+  int en_num = 0;
+  rc = trexio_read_jastrow_en_num(file, &en_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en_num == nucleus_num);
+
+  int een_num = 0;
+  rc = trexio_read_jastrow_een_num(file, &een_num);
+  assert (rc == TREXIO_SUCCESS);
+  assert (een_num == 2*nucleus_num);
+
+  double ee [2] = { 0., 0. };
+  rc = trexio_read_jastrow_ee(file, ee);
+  assert (rc == TREXIO_SUCCESS);
+  assert (ee[0] == 0.5);
+  assert (ee[1] == 2.0);
+
+  double en [3] = { 0., 0., 0. };
+  rc = trexio_read_jastrow_en(file, en);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en[0] == 1.0);
+  assert (en[1] == 2.0);
+  assert (en[2] == 3.0);
+
+  double een [6];
+  rc = trexio_read_jastrow_een(file, een);
+  assert (rc == TREXIO_SUCCESS);
+  assert (een[0] == 11.0);
+  assert (een[1] == 12.0);
+  assert (een[2] == 13.0);
+  assert (een[3] == 14.0);
+  assert (een[4] == 15.0);
+  assert (een[5] == 16.0);
+
+  int en_nucleus [3] = { 0, 0, 0 };
+  rc = trexio_read_jastrow_en_nucleus(file, en_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en_nucleus[0] == 0);
+  assert (en_nucleus[1] == 1);
+  assert (en_nucleus[2] == 2);
+
+  int een_nucleus [6] = { 0, 0, 0, 0, 0, 0 };
+  rc = trexio_read_jastrow_een_nucleus(file, een_nucleus);
+  assert (rc == TREXIO_SUCCESS);
+  assert (een_nucleus[0] == 0);
+  assert (een_nucleus[1] == 0);
+  assert (een_nucleus[2] == 1);
+  assert (een_nucleus[3] == 1);
+  assert (een_nucleus[4] == 2);
+  assert (een_nucleus[5] == 2);
+
+  double ee_scaling = 0.0;
+  rc = trexio_read_jastrow_ee_scaling(file, &ee_scaling);
+  assert (rc == TREXIO_SUCCESS);
+  assert (ee_scaling == 1.0);
+
+  double en_scaling[3] = { 0.5, 1.0, 0.5 };
+  rc = trexio_read_jastrow_en_scaling(file, en_scaling);
+  assert (rc == TREXIO_SUCCESS);
+  assert (en_scaling[0] == 0.5);
+  assert (en_scaling[1] == 1.0);
+  assert (en_scaling[2] == 0.5);
+
+  rc = trexio_close(file);
+/*================= END OF TEST ==================*/
+
+  return 0;
+}
+
+
+int main(){
+
+/*============== Test launcher ================*/
+
+  int rc;
+
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  test_write_jastrow (TREXIO_FILE, TEST_BACKEND);
+  test_read_jastrow  (TREXIO_FILE, TEST_BACKEND);
+
+  rc = system(RM_COMMAND);
+  assert (rc == 0);
+
+  return 0;
+}
--- a/trex.org
+++ b/trex.org
@ -39,24 +39,28 @@ the [[./examples.html][examples]]. The ~sparse~ data representation implies the
 [[https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_(COO)][coordinate list]] representation, namely the user has to write a list
 of indices and values.

-For the Configuration Interfaction (CI) and Configuration State Function (CSF)
+For the Configuration Interaction (CI) and Configuration State Function (CSF)
 groups, the ~buffered~ data type is introduced, which allows similar incremental
 I/O as for ~sparse~ data but without the need to write indices of the sparse values.

 For determinant lists (integer bit fields), the ~special~ attribute is present in the type.
 This means that the source code is not produced by the generator, but hand-written.

+Some data may be complex. In that case, the real part should be stored
+in the variable, and the imaginary part will be stored in the variable
+with the same name suffixed by ~_im~.
+
  #+begin_src python :tangle trex.json :exports none
 {
  #+end_src

 * Metadata (metadata group)

-  As we expect our files to be archived in open-data repositories, we
-  need to give the possibility to the users to store some metadata
-  inside the files. We propose to store the list of names of the codes
-  which have participated to the creation of the file, a list of
-  authors of the file, and a textual description.
+  As we expect TREXIO files to be archived in open-data repositories,
+  we give the possibility to the users to store some metadata inside
+  the files. We propose to store the list of names of the codes which
+  have participated to the creation of the file, a list of authors of
+  the file, and a textual description.

  #+NAME: metadata
  | Variable          | Type  | Dimensions (for arrays) | Description                              |
@ -69,10 +73,12 @@ This means that the source code is not produced by the generator, but hand-writt
  | ~description~     | ~str~ |                         | Text describing the content of file      |
  | ~unsafe~          | ~int~ |                         | ~1~: true, ~0~: false                    |

-  **Note:** ~unsafe~ attribute of the ~metadata~ group indicates whether the file has been previously opened with ~'u'~ mode.
-  It is automatically written in the file upon the first unsafe opening.
-  If the user has checked that the TREXIO file is valid (e.g. using ~trexio-tools~) after unsafe operations,
-  then the ~unsafe~ attribute value can be manually overwritten (in unsafe mode) from ~1~ to ~0~.
+  **Note:** The ~unsafe~ attribute of the ~metadata~ group indicates
+  whether the file has been previously opened with ~'u'~ mode.  It is
+  automatically written in the file upon the first unsafe opening.  If
+  the user has checked that the TREXIO file is valid (e.g. using
+  ~trexio-tools~) after unsafe operations, then the ~unsafe~ attribute
+  value can be manually overwritten (in unsafe mode) from ~1~ to ~0~.

  #+CALL: json(data=metadata, title="metadata")
  #+RESULTS:
@ -90,31 +96,8 @@ This means that the source code is not produced by the generator, but hand-writt
  #+end_src
  :end:

-* Electron (electron group)
-
-   We consider wave functions expressed in the spin-free formalism, where
-   the number of \uparrow and \downarrow electrons is fixed.
-
-  #+NAME:electron
-  | Variable | Type  | Dimensions | Description                         |
-  |----------+-------+------------+-------------------------------------|
-  | ~num~    | ~dim~ |            | Number of electrons                 |
-  | ~up_num~ | ~int~ |            | Number of \uparrow-spin electrons   |
-  | ~dn_num~ | ~int~ |            | Number of \downarrow-spin electrons |
-
-  #+CALL: json(data=electron, title="electron")
-  #+RESULTS:
-  :results:
-  #+begin_src python :tangle trex.json
-      "electron": {
-             "num" : [ "dim", []  ]
-        , "up_num" : [ "int", []  ]
-        , "dn_num" : [ "int", []  ]
-      } ,
-  #+end_src
-  :end:
-
-* Nucleus (nucleus group)
+* System
+** Nucleus (nucleus group)

   The nuclei are considered as fixed point charges. Coordinates are
   given in Cartesian $(x,y,z)$ format.
@ -144,140 +127,119 @@ This means that the source code is not produced by the generator, but hand-writt
   #+end_src
   :end:

-* Effective core potentials (ecp group)
+** Cell (cell group)

-  An effective core potential (ECP) $V_A^{\text{ECP}}$ replacing the
-  core electrons of atom $A$ can be expressed as
-  \[
-  V_A^{\text{ECP}} =
-  V_{A \ell_{\max}+1} +
-  \sum_{\ell=0}^{\ell_{\max}}
-  \sum_{m=-\ell}^{\ell} | Y_{\ell m} \rangle \left[
-  V_{A \ell} - V_{A \ell_{\max}+1} \right] \langle Y_{\ell m} |
-  \]
+   3 Lattice vectors to define a box containing the system, for example
+   used in periodic calculations.

-  The first term in the equation above is sometimes attributed to the local channel,
-  while the remaining terms correspond to the non-local channel projections.
-
-  The functions $V_{A\ell}$ are parameterized as:
-  \[
-  V_{A \ell}(\mathbf{r}) =
-  \sum_{q=1}^{N_{q \ell}}
-  \beta_{A q \ell}\, |\mathbf{r}-\mathbf{R}_{A}|^{n_{A q \ell}}\,
-  e^{-\alpha_{A q \ell} |\mathbf{r}-\mathbf{R}_{A}|^2 }
-  \]
-
-  See http://dx.doi.org/10.1063/1.4984046 or https://doi.org/10.1063/1.5121006 for more info.
-
-  #+NAME: ecp
+   #+NAME: cell
   | Variable | Type    | Dimensions | Description           |
-  |----------------------+---------+-----------------+----------------------------------------------------------------------------------------|
-  | ~max_ang_mom_plus_1~ | ~int~   | ~(nucleus.num)~ | $\ell_{\max}+1$, one higher than the max angular momentum in the removed core orbitals |
-  | ~z_core~             | ~int~   | ~(nucleus.num)~ | Number of core electrons to remove per atom                                            |
-  | ~num~                | ~dim~   |                 | Total number of ECP functions for all atoms and all values of $\ell$                   |
-  | ~ang_mom~            | ~int~   | ~(ecp.num)~     | One-to-one correspondence between ECP items and the angular momentum $\ell$            |
-  | ~nucleus_index~      | ~index~ | ~(ecp.num)~     | One-to-one correspondence between ECP items and the atom index                         |
-  | ~exponent~           | ~float~ | ~(ecp.num)~     | $\alpha_{A q \ell}$ all ECP exponents                                                  |
-  | ~coefficient~        | ~float~ | ~(ecp.num)~     | $\beta_{A q \ell}$ all ECP coefficients                                                |
-  | ~power~              | ~int~   | ~(ecp.num)~     | $n_{A q \ell}$ all ECP powers                                                          |
+   |----------+---------+------------+-----------------------|
+   | ~a~      | ~float~ | ~(3)~      | First lattice vector  |
+   | ~b~      | ~float~ | ~(3)~      | Second lattice vector |
+   | ~c~      | ~float~ | ~(3)~      | Third lattice vector  |

-
-There might be some confusion in the meaning of the $\ell_{\max}$.
-It can be attributed to the maximum angular momentum occupied
-in the core orbitals, which are removed by the ECP.
-On the other hand, it can be attributed to the maximum angular momentum of the
-ECP that replaces the core electrons.
-*Note*, that the latter $\ell_{\max}$ is always higher by 1 than the former.
-
-
-*Note for developers*: avoid having variables with similar prefix in their name.
-HDF5 back end might cause issues due to the way ~find_dataset~ function works.
-For example, in the ECP group we use ~max_ang_mom~ and not ~ang_mom_max~.
-The latter causes issues when written before the ~ang_mom~ array in the TREXIO file.
-*Update*: in fact, the aforementioned issue has only been observed when using HDF5 version 1.10.4
-installed via ~apt-get~. Installing the same version from the ~conda-forge~ channel and running it in
-an isolated ~conda~ environment works just fine. Thus, it seems to be a bug in the ~apt~-provided package.
-If you encounter the aforementioned issue, please report it to our [[https://github.com/TREX-CoE/trexio/issues][issue tracker on GitHub]].
-
-
-  #+CALL: json(data=ecp, title="ecp")
+   #+CALL: json(data=cell, title="cell")

   #+RESULTS:
   :results:
   #+begin_src python :tangle trex.json
-      "ecp": {
-          "max_ang_mom_plus_1" : [ "int"  , [ "nucleus.num" ] ]
-        ,             "z_core" : [ "int"  , [ "nucleus.num" ] ]
-        ,                "num" : [ "dim"  , []                ]
-        ,            "ang_mom" : [ "int"  , [ "ecp.num" ]     ]
-        ,      "nucleus_index" : [ "index", [ "ecp.num" ]     ]
-        ,           "exponent" : [ "float", [ "ecp.num" ]     ]
-        ,        "coefficient" : [ "float", [ "ecp.num" ]     ]
-        ,              "power" : [ "int"  , [ "ecp.num" ]     ]
+       "cell": {
+           "a" : [ "float", [ "3" ] ]
+         , "b" : [ "float", [ "3" ] ]
+         , "c" : [ "float", [ "3" ] ]
       } ,
   #+end_src
   :end:

-** Example
+** Periodic boundary calculations (pbc group)

-  For example, consider H_2 molecule with the following
-  [[https://pseudopotentiallibrary.org/recipes/H/ccECP/H.ccECP.gamess][effective core potential]]
-  (in GAMESS input format for the H atom):
+   A single $k$-point per TREXIO file can be stored. The $k$-point is
+   defined in this group.

-   #+BEGIN_EXAMPLE
-H-ccECP GEN 0 1
-3
-1.00000000000000    1 21.24359508259891
-21.24359508259891   3 21.24359508259891
-10.85192405303825  2 21.77696655044365
-1
-0.00000000000000    2 1.000000000000000
-   #+END_EXAMPLE
+   #+NAME: pbc
+   | Variable   | Type    | Dimensions | Description             |
+   |------------+---------+------------+-------------------------|
+   | ~periodic~ | ~int~   |            | ~1~: true or ~0~: false |
+   | ~k_point~  | ~float~ | ~(3)~      | $k$-point sampling      |

-   In TREXIO representation this would be:
+   #+CALL: json(data=pbc, title="pbc")

-   #+BEGIN_EXAMPLE
-num = 8
+   #+RESULTS:
+   :results:
+   #+begin_src python :tangle trex.json
+       "pbc": {
+           "periodic" : [ "int"  , []      ]
+         ,  "k_point" : [ "float", [ "3" ] ]
+       } ,
+   #+end_src
+   :end:

-# lmax+1 per atom
-max_ang_mom_plus_1 = [ 1, 1 ]
+** Electron (electron group)

-# number of core electrons to remove per atom
-zcore = [ 0, 0 ]
+    We consider wave functions expressed in the spin-free formalism, where
+    the number of \uparrow and \downarrow electrons is fixed.

-# first 4 ECP elements correspond to the first H atom ; the remaining 4 elements are for the second H atom
-nucleus_index = [
-  0, 0, 0, 0,
-  1, 1, 1, 1
-  ]
+   #+NAME:electron
+   | Variable | Type  | Dimensions | Description                         |
+   |----------+-------+------------+-------------------------------------|
+   | ~num~    | ~dim~ |            | Number of electrons                 |
+   | ~up_num~ | ~int~ |            | Number of \uparrow-spin electrons   |
+   | ~dn_num~ | ~int~ |            | Number of \downarrow-spin electrons |

-# 3 first ECP elements correspond to potential of the P orbital (l=1), then 1 element for the S orbital (l=0) ; similar for the second H atom
-ang_mom = [
-  1, 1, 1, 0,
-  1, 1, 1, 0
-  ]
+   #+CALL: json(data=electron, title="electron")
+   #+RESULTS:
+   :results:
+   #+begin_src python :tangle trex.json
+       "electron": {
+              "num" : [ "dim", []  ]
+         , "up_num" : [ "int", []  ]
+         , "dn_num" : [ "int", []  ]
+       } ,
+   #+end_src
+   :end:

-# ECP quantities that can be attributed to atoms and/or angular momenta based on the aforementioned ecp_nucleus and ecp_ang_mom arrays
-coefficient = [
-  1.00000000000000, 21.24359508259891, -10.85192405303825, 0.00000000000000,
-  1.00000000000000, 21.24359508259891, -10.85192405303825, 0.00000000000000
-  ]
+** Ground or excited states (state group)

-exponent = [
-  21.24359508259891, 21.24359508259891, 21.77696655044365, 1.000000000000000,
-  21.24359508259891, 21.24359508259891, 21.77696655044365, 1.000000000000000
-  ]
+   This group contains information about excited states. Since only a
+   single state can be stored in a TREXIO file, it is possible to store
+   in the main TREXIO file the names of auxiliary files containing the
+   information of the other states.

-power = [
-  -1, 1, 0, 0,
-  -1, 1, 0, 0
-  ]
-   #+END_EXAMPLE
+   The ~file_name~ and ~label~ arrays have to be written only for the
+   main file, e.g. the one containing the ground state wave function
+   together with the basis set parameters, molecular orbitals,
+   integrals, etc.
+   The ~id~ and ~current_label~ attributes need to be specified for each file.

-* Basis set (basis group)
+   #+NAME: state
+   | Variable        | Type  | Dimensions    | Description                                                                                 |
+   |-----------------+-------+---------------+---------------------------------------------------------------------------------------------|
+   | ~num~           | ~dim~ |               | Number of states (including the ground state)                                               |
+   | ~id~            | ~int~ |               | Index of the current state (0 is ground state)                                              |
+   | ~current_label~ | ~str~ |               | Label of the current state                                                                  |
+   | ~label~         | ~str~ | ~(state.num)~ | Labels of all states                                                                        |
+   | ~file_name~     | ~str~ | ~(state.num)~ | Names of the TREXIO files linked to the current one (i.e. containing data for other states) |

+   #+CALL: json(data=state, title="state")

-** Gaussian and Slater-type orbitals
+   #+RESULTS:
+   :results:
+   #+begin_src python :tangle trex.json
+       "state": {
+                     "num" : [ "dim", []              ]
+         ,            "id" : [ "int", []              ]
+         , "current_label" : [ "str", []              ]
+         ,         "label" : [ "str", [ "state.num" ] ]
+         ,     "file_name" : [ "str", [ "state.num" ] ]
+       } ,
+   #+end_src
+   :end:
+
+* Basis functions
+** Basis set (basis group)
+
+*** Gaussian and Slater-type orbitals

    We consider here basis functions centered on nuclei. Hence, we enable
    the possibility to define /dummy atoms/ to place basis functions in
@ -315,7 +277,7 @@ power = [

    All the basis set parameters are stored in one-dimensional arrays.

-** Plane waves
+*** Plane waves

    A plane wave is defined as

@ -327,7 +289,7 @@ power = [
    reciprocal space, defined in the ~pbc~ group. The kinetic energy
    cutoff ~e_cut~ is the only input data relevant to plane waves.

-** Data definitions
+*** Data definitions

    #+NAME: basis
    | Variable        | Type    | Dimensions          | Description                                                     |
@ -368,7 +330,7 @@ power = [
    #+end_src
    :end:

-** Example
+*** Example

   For example, consider H_2 with the following basis set (in GAMESS
   format), where both the AOs and primitives are considered normalized:
@ -439,7 +401,191 @@ prim_factor =
  4.3649547399719840e-01, 1.8135965626177861e+00 ]
    #+END_EXAMPLE

-* Atomic orbitals (ao group)
+** Effective core potentials (ecp group)
+
+   An effective core potential (ECP) $V_A^{\text{ECP}}$ replacing the
+   core electrons of atom $A$ can be expressed as
+   \[
+   V_A^{\text{ECP}} =
+   V_{A \ell_{\max}+1} +
+   \sum_{\ell=0}^{\ell_{\max}}
+   \sum_{m=-\ell}^{\ell} | Y_{\ell m} \rangle \left[
+   V_{A \ell} - V_{A \ell_{\max}+1} \right] \langle Y_{\ell m} |
+   \]
+
+   The first term in the equation above is sometimes attributed to the local channel,
+   while the remaining terms correspond to the non-local channel projections.
+
+   The functions $V_{A\ell}$ are parameterized as:
+   \[
+   V_{A \ell}(\mathbf{r}) =
+   \sum_{q=1}^{N_{q \ell}}
+   \beta_{A q \ell}\, |\mathbf{r}-\mathbf{R}_{A}|^{n_{A q \ell}}\,
+   e^{-\alpha_{A q \ell} |\mathbf{r}-\mathbf{R}_{A}|^2 }
+   \]
+
+   See http://dx.doi.org/10.1063/1.4984046 or https://doi.org/10.1063/1.5121006 for more info.
+
+   #+NAME: ecp
+   | Variable             | Type    | Dimensions      | Description                                                                            |
+   |----------------------+---------+-----------------+----------------------------------------------------------------------------------------|
+   | ~max_ang_mom_plus_1~ | ~int~   | ~(nucleus.num)~ | $\ell_{\max}+1$, one higher than the max angular momentum in the removed core orbitals |
+   | ~z_core~             | ~int~   | ~(nucleus.num)~ | Number of core electrons to remove per atom                                            |
+   | ~num~                | ~dim~   |                 | Total number of ECP functions for all atoms and all values of $\ell$                   |
+   | ~ang_mom~            | ~int~   | ~(ecp.num)~     | One-to-one correspondence between ECP items and the angular momentum $\ell$            |
+   | ~nucleus_index~      | ~index~ | ~(ecp.num)~     | One-to-one correspondence between ECP items and the atom index                         |
+   | ~exponent~           | ~float~ | ~(ecp.num)~     | $\alpha_{A q \ell}$ all ECP exponents                                                  |
+   | ~coefficient~        | ~float~ | ~(ecp.num)~     | $\beta_{A q \ell}$ all ECP coefficients                                                |
+   | ~power~              | ~int~   | ~(ecp.num)~     | $n_{A q \ell}$ all ECP powers                                                          |
+
+
+   There might be some confusion in the meaning of the $\ell_{\max}$.
+   It can be attributed to the maximum angular momentum occupied in
+   the core orbitals, which are removed by the ECP. On the other
+   hand, it can be attributed to the maximum angular momentum of the
+   ECP that replaces the core electrons.
+   *Note*, that the latter $\ell_{\max}$ is always higher by 1 than the former.
+   
+   *Note for developers*: avoid having variables with similar prefix
+   in their name. The HDF5 back end might cause issues due to the way
+   ~find_dataset~ function works.  For example, in the ECP group we
+   use ~max_ang_mom~ and not ~ang_mom_max~. The latter causes issues
+   when written before the ~ang_mom~ array in the TREXIO file.
+   *Update*: in fact, the aforementioned issue has only been observed
+   when using HDF5 version 1.10.4 installed via ~apt-get~. Installing
+   the same version from the ~conda-forge~ channel and running it in
+   an isolated ~conda~ environment works just fine. Thus, it seems to
+   be a bug in the ~apt~-provided package.
+   If you encounter the aforementioned issue, please report it to our
+   [[https://github.com/TREX-CoE/trexio/issues][issue tracker on GitHub]].
+
+   #+CALL: json(data=ecp, title="ecp")
+
+   #+RESULTS:
+   :results:
+   #+begin_src python :tangle trex.json
+       "ecp": {
+           "max_ang_mom_plus_1" : [ "int"  , [ "nucleus.num" ] ]
+         ,             "z_core" : [ "int"  , [ "nucleus.num" ] ]
+         ,                "num" : [ "dim"  , []                ]
+         ,            "ang_mom" : [ "int"  , [ "ecp.num" ]     ]
+         ,      "nucleus_index" : [ "index", [ "ecp.num" ]     ]
+         ,           "exponent" : [ "float", [ "ecp.num" ]     ]
+         ,        "coefficient" : [ "float", [ "ecp.num" ]     ]
+         ,              "power" : [ "int"  , [ "ecp.num" ]     ]
+       } ,
+   #+end_src
+   :end:
+
+*** Example
+
+   For example, consider H_2 molecule with the following
+   [[https://pseudopotentiallibrary.org/recipes/H/ccECP/H.ccECP.gamess][effective core potential]]
+   (in GAMESS input format for the H atom):
+
+    #+BEGIN_EXAMPLE
+H-ccECP GEN 0 1
+3
+1.00000000000000    1 21.24359508259891
+21.24359508259891   3 21.24359508259891
+-10.85192405303825  2 21.77696655044365
+1
+0.00000000000000    2 1.000000000000000
+    #+END_EXAMPLE
+
+    In TREXIO representation this would be:
+
+    #+BEGIN_EXAMPLE
+num = 8
+
+# lmax+1 per atom
+max_ang_mom_plus_1 = [ 1, 1 ]
+
+# number of core electrons to remove per atom
+zcore = [ 0, 0 ]
+
+# first 4 ECP elements correspond to the first H atom ; the remaining 4 elements are for the second H atom
+nucleus_index = [
+  0, 0, 0, 0,
+  1, 1, 1, 1
+  ]
+
+# 3 first ECP elements correspond to potential of the P orbital (l=1), then 1 element for the S orbital (l=0) ; similar for the second H atom
+ang_mom = [
+  1, 1, 1, 0,
+  1, 1, 1, 0
+  ]
+
+# ECP quantities that can be attributed to atoms and/or angular momenta based on the aforementioned ecp_nucleus and ecp_ang_mom arrays
+coefficient = [
+  1.00000000000000, 21.24359508259891, -10.85192405303825, 0.00000000000000,
+  1.00000000000000, 21.24359508259891, -10.85192405303825, 0.00000000000000
+  ]
+
+exponent = [
+  21.24359508259891, 21.24359508259891, 21.77696655044365, 1.000000000000000,
+  21.24359508259891, 21.24359508259891, 21.77696655044365, 1.000000000000000
+  ]
+
+power = [
+  -1, 1, 0, 0,
+  -1, 1, 0, 0
+  ]
+    #+END_EXAMPLE
+
+** Numerical integration grid (grid group)
+
+   In some applications, such as DFT calculations, integrals have to
+   be computed numerically on a grid.  A common choice for the angular
+   grid is the one proposed by Lebedev and Laikov
+   [Russian Academy of Sciences Doklady Mathematics, Volume 59, Number 3, 1999, pages 477-481].
+   For the radial grids, many approaches have been developed over the years.
+
+   The structure of this group is adapted for the [[https://github.com/dftlibs/numgrid][numgrid]] library.
+   Feel free to submit a PR if you find missing options/functionalities.
+
+    #+NAME: grid
+   | Variable        | Type    | Dimensions       | Description                                                             |
+   |-----------------+---------+------------------+-------------------------------------------------------------------------|
+   | ~description~   | ~str~   |                  | Details about the used quadratures can go here                          |
+   | ~rad_precision~ | ~float~ |                  | Radial precision parameter (not used in some schemes like Krack-Köster) |
+   | ~num~           | ~dim~   |                  | Number of grid points                                                   |
+   | ~max_ang_num~   | ~int~   |                  | Maximum number of angular grid points (for pruning)                     |
+   | ~min_ang_num~   | ~int~   |                  | Minimum number of angular grid points (for pruning)                     |
+   | ~coord~         | ~float~ | ~(grid.num)~     | Discretized coordinate space                                            |
+   | ~weight~        | ~float~ | ~(grid.num)~     | Grid weights according to a given partitioning (e.g. Becke)             |
+   | ~ang_num~       | ~dim~   |                  | Number of angular integration points (if used)                          |
+   | ~ang_coord~     | ~float~ | ~(grid.ang_num)~ | Discretized angular space (if used)                                     |
+   | ~ang_weight~    | ~float~ | ~(grid.ang_num)~ | Angular grid weights (if used)                                          |
+   | ~rad_num~       | ~dim~   |                  | Number of radial integration points (if used)                           |
+   | ~rad_coord~     | ~float~ | ~(grid.rad_num)~ | Discretized radial space (if used)                                      |
+   | ~rad_weight~    | ~float~ | ~(grid.rad_num)~ | Radial grid weights  (if used)                                          |
+
+    #+CALL: json(data=grid, title="grid")
+
+    #+RESULTS:
+    :results:
+    #+begin_src python :tangle trex.json
+        "grid": {
+              "description" : [ "str"  , []                 ]
+          , "rad_precision" : [ "float", []                 ]
+          ,           "num" : [ "dim"  , []                 ]
+          ,   "max_ang_num" : [ "int"  , []                 ]
+          ,   "min_ang_num" : [ "int"  , []                 ]
+          ,         "coord" : [ "float", [ "grid.num" ]     ]
+          ,        "weight" : [ "float", [ "grid.num" ]     ]
+          ,       "ang_num" : [ "dim"  , []                 ]
+          ,     "ang_coord" : [ "float", [ "grid.ang_num" ] ]
+          ,    "ang_weight" : [ "float", [ "grid.ang_num" ] ]
+          ,       "rad_num" : [ "dim"  , []                 ]
+          ,     "rad_coord" : [ "float", [ "grid.rad_num" ] ]
+          ,    "rad_weight" : [ "float", [ "grid.rad_num" ] ]
+        } ,
+    #+end_src
+    :end:
+
+* Orbitals
+** Atomic orbitals (ao group)

   Going from the atomic basis set to AOs implies a systematic
   construction of all the angular functions of each shell.  We
@ -508,7 +654,7 @@ prim_factor =
   #+end_src
   :end:

-** One-electron integrals (~ao_1e_int~ group)
+*** One-electron integrals (~ao_1e_int~ group)
    :PROPERTIES:
    :CUSTOM_ID: ao_one_e
    :END:
@ -526,13 +672,13 @@ prim_factor =

    #+NAME: ao_1e_int
   | Variable              | Type    | Dimensions         | Description                                                              |
-  |-----------------------+---------+--------------------+-----------------------------------------------------------------------------------|
-  | ~overlap~             | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert q \rangle$  (real part, general case)                            |
-  | ~kinetic~             | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{T}_e \vert q \rangle$  (real part, general case)            |
-  | ~potential_n_e~       | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{V}_{\text{ne}} \vert q \rangle$  (real part, general case)  |
-  | ~ecp~                 | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{V}_{\text{ecp}} \vert q \rangle$  (real part, general case) |
-  | ~core_hamiltonian~    | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{h} \vert q \rangle$  (real part, general case)              |
-  | ~overlap_im~          | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert q \rangle$ (imaginary part)  (imaginary part)                    |
+   |-----------------------+---------+--------------------+--------------------------------------------------------------------------|
+   | ~overlap~             | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert q \rangle$                                              |
+   | ~kinetic~             | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{T}_e \vert q \rangle$                              |
+   | ~potential_n_e~       | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{V}_{\text{ne}} \vert q \rangle$                    |
+   | ~ecp~                 | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{V}_{\text{ecp}} \vert q \rangle$                   |
+   | ~core_hamiltonian~    | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{h} \vert q \rangle$                                |
+   | ~overlap_im~          | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert q \rangle$ (imaginary part)                             |
   | ~kinetic_im~          | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{T}_e \vert q \rangle$   (imaginary part)           |
   | ~potential_n_e_im~    | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{V}_{\text{ne}} \vert q \rangle$  (imaginary part)  |
   | ~ecp_im~              | ~float~ | ~(ao.num, ao.num)~ | $\langle p \vert \hat{V}_{\text{ECP}} \vert q \rangle$  (imaginary part) |
@ -558,7 +704,7 @@ prim_factor =
    #+end_src
    :end:

-** Two-electron integrals (~ao_2e_int~ group)
+*** Two-electron integrals (~ao_2e_int~ group)
    :PROPERTIES:
    :CUSTOM_ID: ao_two_e
    :END:
@ -578,11 +724,21 @@ prim_factor =
      \sum_{j=1}^{i-1} \frac{\text{erf}(\vert \mathbf{r}_i -
      \mathbf{r}_j \vert)}{\vert \mathbf{r}_i - \mathbf{r}_j \vert} \] : electron-electron long range potential

+    The Cholesky decomposition of the integrals can also be stored:
+
+    \[
+    A_{ijkl} = \sum_{\alpha} G_{il\alpha} G_{jl\alpha}
+    \]
+
    #+NAME: ao_2e_int
   | Variable              | Type           | Dimensions                                        | Description                                   |
-  |----------+----------------+------------------------------------+-----------------------------------------|
+   |-----------------------+----------------+---------------------------------------------------+-----------------------------------------------|
   | ~eri~                 | ~float sparse~ | ~(ao.num, ao.num, ao.num, ao.num)~                | Electron repulsion integrals                  |
   | ~eri_lr~              | ~float sparse~ | ~(ao.num, ao.num, ao.num, ao.num)~                | Long-range Electron repulsion integrals       |
+   | ~eri_cholesky_num~    | ~dim~          |                                                   | Number of Cholesky vectors for ERI            |
+   | ~eri_cholesky~        | ~float sparse~ | ~(ao.num, ao.num, ao_2e_int.eri_cholesky_num)~    | Cholesky decomposition of the ERI             |
+   | ~eri_lr_cholesky_num~ | ~dim~          |                                                   | Number of Cholesky vectors for long range ERI |
+   | ~eri_lr_cholesky~     | ~float sparse~ | ~(ao.num, ao.num, ao_2e_int.eri_lr_cholesky_num)~ | Cholesky decomposition of the long range ERI  |

    #+CALL: json(data=ao_2e_int, title="ao_2e_int")

@ -592,19 +748,23 @@ prim_factor =
        "ao_2e_int": {
                            "eri" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ]              ]
          ,              "eri_lr" : [ "float sparse", [ "ao.num", "ao.num", "ao.num", "ao.num" ]              ]
+          ,    "eri_cholesky_num" : [ "dim"         , []                                                      ]
+          ,        "eri_cholesky" : [ "float sparse", [ "ao_2e_int.eri_cholesky_num", "ao.num", "ao.num" ]    ]
+          , "eri_lr_cholesky_num" : [ "dim"         , []                                                      ]
+          ,     "eri_lr_cholesky" : [ "float sparse", [ "ao_2e_int.eri_lr_cholesky_num", "ao.num", "ao.num" ] ]
        } ,
    #+end_src
    :end:

-* Molecular orbitals (mo group)
+** Molecular orbitals (mo group)

   #+NAME: mo
   | Variable         | Type    | Dimensions         | Description                                                              |
   |------------------+---------+--------------------+--------------------------------------------------------------------------|
   | ~type~           | ~str~   |                    | Free text to identify the set of MOs (HF, Natural, Local, CASSCF, /etc/) |
   | ~num~            | ~dim~   |                    | Number of MOs                                                            |
-  | ~coefficient~    | ~float~ | ~(ao.num, mo.num)~ | MO coefficients (real part, general case)                                |
-  | ~coefficient_im~ | ~float~ | ~(ao.num, mo.num)~ | MO coefficients (imaginary part, for periodic calculations)              |
+   | ~coefficient~    | ~float~ | ~(ao.num, mo.num)~ | MO coefficients                                                          |
+   | ~coefficient_im~ | ~float~ | ~(ao.num, mo.num)~ | MO coefficients (imaginary part)                                         |
   | ~class~          | ~str~   | ~(mo.num)~         | Choose among: Core, Inactive, Active, Virtual, Deleted                   |
   | ~symmetry~       | ~str~   | ~(mo.num)~         | Symmetry in the point group                                              |
   | ~occupation~     | ~float~ | ~(mo.num)~         | Occupation number                                                        |
@ -630,7 +790,7 @@ prim_factor =
   #+end_src
   :end:

-** One-electron integrals (~mo_1e_int~ group)
+*** One-electron integrals (~mo_1e_int~ group)

    The operators as the same as those defined in the
    [[#ao_one_e][AO one-electron integrals section]]. Here, the integrals are given in
@ -638,13 +798,13 @@ prim_factor =

    #+NAME: mo_1e_int
   | Variable              | Type    | Dimensions         | Description                                                              |
-  |-----------------------+---------+--------------------+-----------------------------------------------------------------------------------|
-  | ~overlap~             | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert j \rangle$  (real part, general case)                            |
-  | ~kinetic~             | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{T}_e \vert j \rangle$ (real part, general case)             |
-  | ~potential_n_e~       | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{V}_{\text{ne}} \vert j \rangle$  (real part, general case)  |
-  | ~ecp~                 | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{V}_{\text{ECP}} \vert j \rangle$  (real part, general case) |
-  | ~core_hamiltonian~    | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{h} \vert j \rangle$  (real part, general case)              |
-  | ~overlap_im~          | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert j \rangle$ (imaginary part)  (imaginary part)                    |
+   |-----------------------+---------+--------------------+--------------------------------------------------------------------------|
+   | ~overlap~             | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert j \rangle$                                              |
+   | ~kinetic~             | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{T}_e \vert j \rangle$                              |
+   | ~potential_n_e~       | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{V}_{\text{ne}} \vert j \rangle$                    |
+   | ~ecp~                 | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{V}_{\text{ECP}} \vert j \rangle$                   |
+   | ~core_hamiltonian~    | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{h} \vert j \rangle$                                |
+   | ~overlap_im~          | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert j \rangle$ (imaginary part)                             |
   | ~kinetic_im~          | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{T}_e \vert j \rangle$   (imaginary part)           |
   | ~potential_n_e_im~    | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{V}_{\text{ne}} \vert j \rangle$  (imaginary part)  |
   | ~ecp_im~              | ~float~ | ~(mo.num, mo.num)~ | $\langle i \vert \hat{V}_{\text{ECP}} \vert j \rangle$  (imaginary part) |
@ -670,18 +830,12 @@ prim_factor =
    #+end_src
    :end:

-** Two-electron integrals (~mo_2e_int~ group)
+*** Two-electron integrals (~mo_2e_int~ group)

    The operators are the same as those defined in the
    [[#ao_two_e][AO two-electron integrals section]]. Here, the integrals are given in
    the basis of molecular orbitals.

-   The Cholesky decomposition of the integrals can also be stored:
-
-   \[
-   A_{ijkl} = \sum_{\alpha} G_{il\alpha} G_{jl\alpha}
-   \]
-
    #+NAME: mo_2e_int
   | Variable              | Type           | Dimensions                                        | Description                                   |
   |-----------------------+----------------+---------------------------------------------------+-----------------------------------------------|
@ -708,7 +862,8 @@ prim_factor =
    #+end_src
    :end:
   
-* Slater determinants (determinant group)
+* Multi-determinant information
+** Slater determinants (determinant group)

   The configuration interaction (CI) wave function $\Psi$
   can be expanded in the basis of Slater determinants $D_I$ as follows
@ -756,7 +911,7 @@ prim_factor =
   #+end_src
   :end:

-* Configuration state functions (csf group)
+** Configuration state functions (csf group)

   The configuration interaction (CI) wave function $\Psi$ can be
   expanded in the basis of [[https://en.wikipedia.org/wiki/Configuration_state_function][configuration state functions]] (CSFs)
@ -792,7 +947,7 @@ prim_factor =
   #+end_src
   :end:

-* Amplitudes (amplitude group)
+** Amplitudes (amplitude group)

   The wave function may be expressed in terms of action of the cluster
   operator $\hat{T}$:
@ -804,18 +959,18 @@ prim_factor =
   on a reference wave function $\Psi$, where $\hat{T}_1$ is the single excitation operator,

   \[
-  \hat{T}_1 = \sum_{ia} t_{i}^{a}\, \hat{a}^\dagger_a \hat{a}_i 
-  \],
+   \hat{T}_1 = \sum_{ia} t_{i}^{a}\, \hat{a}^\dagger_a \hat{a}_i, 
+   \]

   $\hat{T}_2$ is the double excitation operator,

   \[
-  \hat{T}_2 = \frac{1}{4} \sum_{ijab} t_{ij}^{ab}\, \hat{a}^\dagger_a \hat{a}^\dagger_b \hat{a}_j \hat{a}_i 
-  \],
+   \hat{T}_2 = \frac{1}{4} \sum_{ijab} t_{ij}^{ab}\, \hat{a}^\dagger_a \hat{a}^\dagger_b \hat{a}_j \hat{a}_i,
+   \]

-  /etc/. Indices $i,j,a,b$ denote molecular orbital indices.
+   /etc/. Indices $i$, $j$, $a$ and $b$ denote molecular orbital indices.

-  Wave functions obtained with perturbation theory of configuration
+   Wave functions obtained with perturbation theory or configuration
   interaction are of the form

   \[ |\Phi\rangle = \hat{T}|\Psi\rangle  \]
@ -865,44 +1020,7 @@ prim_factor =
   #+end_src
   :end:

-* Excited states (state group)
-
-  This group contains information about excited states. Since only a
-  single state can be stored in a TREXIO file, it is possible to store
-  in the main TREXIO file the names of auxiliary files containing the
-  information of the other states.
-
-  The ~file_name~ and ~label~ arrays have to be written only for the
-  main file, e.g. the one containing the ground state wave function
-  together with the basis set parameters, molecular orbitals,
-  integrals, etc.
-  The ~id~ and ~current_label~ attributes need to be specified for each file.
-
-  #+NAME: state
-  | Variable        | Type  | Dimensions    | Description                                                                                 |
-  |-----------------+-------+---------------+---------------------------------------------------------------------------------------------|
-  | ~num~           | ~dim~ |               | Number of states (including the ground state)                                               |
-  | ~id~            | ~int~ |               | Index of the current state (0 is ground state)                                              |
-  | ~current_label~ | ~str~ |               | Label of the current state                                                                  |
-  | ~label~         | ~str~ | ~(state.num)~ | Labels of all states                                                                        |
-  | ~file_name~     | ~str~ | ~(state.num)~ | Names of the TREXIO files linked to the current one (i.e. containing data for other states) |
-
-  #+CALL: json(data=state, title="state")
-
-  #+RESULTS:
-  :results:
-  #+begin_src python :tangle trex.json
-      "state": {
-                    "num" : [ "dim", []              ]
-        ,            "id" : [ "int", []              ]
-        , "current_label" : [ "str", []              ]
-        ,         "label" : [ "str", [ "state.num" ] ]
-        ,     "file_name" : [ "str", [ "state.num" ] ]
-      } ,
-  #+end_src
-  :end:
-
-* Reduced density matrices (rdm group)
+** Reduced density matrices (rdm group)

   The reduced density matrices are defined in the basis of molecular
   orbitals.
@ -1009,100 +1127,129 @@ prim_factor =
   #+end_src
   :end:

-* Cell (cell group)
+* Correlation factors
+** Jastrow factor (jastrow group)

-  3 Lattice vectors to define a box containing the system, for example
-  used in periodic calculations.
+   The Jastrow factor is an $N$-electron function to which the CI
+   expansion is multiplied: $\Psi = \Phi \times \exp(J)$,
+   where

-  #+NAME: cell
+   \[
+   J(\mathbf{r},\mathbf{R}) = J_{\text{eN}}(\mathbf{r},\mathbf{R}) + J_{\text{ee}}(\mathbf{r}) + J_{\text{eeN}}(\mathbf{r},\mathbf{R})
+   \]
+   
+   In the following, we use the notations $r_{ij} = |\mathbf{r}_i - \mathbf{r}_j|$ and
+   $R_{i\alpha} = |\mathbf{r}_i - \mathbf{R}_\alpha|$, where indices
+   $i$ and $j$ correspond to electrons and $\alpha$ to nuclei.
+
+   Parameters for multiple forms of Jastrow factors can be saved in
+   TREXIO files, and are described in the following sections. These
+   are identified by the ~type~ attribute. The type can be one of the
+   following:
+   - ~CHAMP~
+   - ~Mu~
+   
+*** CHAMP
+
+    The first form of Jastrow factor is the one used in
+    the [[https://trex-coe.eu/trex-quantum-chemistry-codes/champ][CHAMP]] program.
+
+   $J_{\text{eN}}$ contains electron-nucleus  terms:
+
+   \[
+   J_{\text{eN}}(\mathbf{r},\mathbf{R}) = \sum_{i=1}^{N_\text{elec}} \sum_{\alpha=1}^{N_\text{nucl}}
+   \frac{a_{1,\alpha}\, g_\alpha(R_{i\alpha})}{1+a_{2,\alpha}\, g_\alpha(R_{i\alpha})} +
+   \sum_{p=2}^{N_\text{ord}^a} a_{p+1,\alpha}\, [g_\alpha(R_{i\alpha})]^p - J_{eN}^\infty
+   \]
+
+   $J_{\text{ee}}$ contains electron-electron terms:
+   \[
+   J_{\text{ee}}(\mathbf{r}) =
+   \sum_{i=1}^{N_\text{elec}} \sum_{j=1}^{i-1}
+   \frac{b_1\, f(r_{ij})}{1+b_2\, f(r_{ij})} +
+   \sum_{p=2}^{N_\text{ord}^b} a_{p+1}\, [f(r_{ij})]^p  - J_{ee}^\infty
+   \]
+
+   and $J_{\text{eeN}}$ contains electron-electron-Nucleus terms:
+
+   \[
+   J_{\text{eeN}}(\mathbf{r},\mathbf{R}) =
+    \sum_{\alpha=1}^{N_{\text{nucl}}}
+     \sum_{i=1}^{N_{\text{elec}}}
+      \sum_{j=1}^{i-1}
+       \sum_{p=2}^{N_{\text{ord}}}
+        \sum_{k=0}^{p-1}
+         \sum_{l=0}^{p-k-2\delta_{k,0}}
+           c_{lkp\alpha} \left[ f({r}_{ij}) \right]^k
+             \left[ \left[ g_\alpha({R}_{i\alpha}) \right]^l + \left[ g_\alpha({R}_{j\alpha}) \right]^l \right]
+             \left[ g_\alpha({R}_{i\,\alpha}) \, g_\alpha({R}_{j\alpha}) \right]^{(p-k-l)/2}
+   \]
+
+   $c_{lkp\alpha}$ are non-zero only when $p-k-l$ is even.
+
+   The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that
+   $J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero.
+
+   $f$ and $g$ are scaling function defined as
+
+   \[
+   f(r) = \frac{1-e^{-\kappa\, r}}{\kappa} \text{ and }
+   g_\alpha(r) = e^{-\kappa_\alpha\, r}.
+   \]
+
+*** mu
+
+    The "mu" Jastrow factor has only a single parameter $\mu$ for the
+ [[https://doi.org/10.1063/5.0044683][electron-electron term]]: 
+
+   \[
+   J_{\text{ee}}(\mathbf{r}) = 
+   \sum_{i=1}^{N_\text{elec}} \sum_{j=1}^{i-1} r_{ij}
+   \left( 1 - \text{erf}(\mu\, r_{ij})\right) - \frac{1}{\mu\sqrt{\pi}}
+   e^{-(\mu\,r_{ij})^2}
+   \]
+
+#  It was then updated for frozen-core calculations by introducing a
+#  set of electron-electron-nucleus terms with one parameter per nucleus:
+
+#  \[
+#  J_{\text{eeN}}(\mathbf{r}) =
+#  \]
+
+*** Table of values
+   
+   #+name: jastrow
  | Variable      | Type     | Dimensions          | Description                                                     |
-  |----------+---------+------------+-----------------------|
-  | ~a~      | ~float~ | ~(3)~      | First lattice vector  |
-  | ~b~      | ~float~ | ~(3)~      | Second lattice vector |
-  | ~c~      | ~float~ | ~(3)~      | Third lattice vector  |
+  |---------------+----------+---------------------+-----------------------------------------------------------------|
+  | ~type~        | ~string~ |                     | Type of Jastrow factor: ~CHAMP~ or ~Mu~                         |
+  | ~ee_num~      | ~dim~    |                     | Number of Electron-electron parameters                          |
+  | ~en_num~      | ~dim~    |                     | Number of Electron-nucleus parameters                           |
+  | ~een_num~     | ~dim~    |                     | Number of Electron-electron-nucleus parameters                  |
+  | ~ee~          | ~float~  | ~(jastrow.ee_num)~  | Electron-electron parameters                                    |
+  | ~en~          | ~float~  | ~(jastrow.en_num)~  | Electron-nucleus parameters                                     |
+  | ~een~         | ~float~  | ~(jastrow.een_num)~ | Electron-electron-nucleus parameters                            |
+  | ~en_nucleus~  | ~index~  | ~(jastrow.en_num)~  | Nucleus relative to the eN parameter                            |
+  | ~een_nucleus~ | ~index~  | ~(jastrow.een_num)~ | Nucleus relative to the eeN parameter                           |
+  | ~ee_scaling~  | ~float~  |                     | $\kappa$ value in CHAMP Jastrow for electron-electron distances |
+  | ~en_scaling~  | ~float~  | ~(nucleus.num)~     | $\kappa$ value in CHAMP Jastrow for electron-nucleus distances  |
   
-  #+CALL: json(data=cell, title="cell")
+   #+CALL: json(data=jastrow, title="jastrow")

   #+RESULTS:
   :results:
   #+begin_src python :tangle trex.json
-      "cell": {
-          "a" : [ "float", [ "3" ] ]
-        , "b" : [ "float", [ "3" ] ]
-        , "c" : [ "float", [ "3" ] ]
-      } ,
-  #+end_src
-  :end:
-
-* Periodic boundary calculations (pbc group)
-
-  A single $k$-point per TREXIO file can be stored. The $k$-point is
-  defined in this group.
-
-  #+NAME: pbc
-  | Variable   | Type    | Dimensions | Description             |
-  |------------+---------+------------+-------------------------|
-  | ~periodic~ | ~int~   |            | ~1~: true or ~0~: false |
-  | ~k_point~  | ~float~ | ~(3)~      | $k$-point sampling      |
-
-  #+CALL: json(data=pbc, title="pbc")
-
-  #+RESULTS:
-  :results:
-  #+begin_src python :tangle trex.json
-      "pbc": {
-          "periodic" : [ "int"  , []      ]
-        ,  "k_point" : [ "float", [ "3" ] ]
-      } ,
-  #+end_src
-  :end:
-
-* Numerical integration grid (grid group)
-
-  The molecular integrals have to be computed numerically on a grid in many applications.
-  A common choice for the angular grid is the one proposed by Lebedev and Laikov
-  [Russian Academy of Sciences Doklady Mathematics, Volume 59, Number 3, 1999, pages 477-481].
-  For the radial grids, many approaches have been developed over the years.
-
-  The structure of this group is adapted for the [[https://github.com/dftlibs/numgrid][numgrid]] library.
-  Feel free to submit a PR if you find missing options/functionalities.
-
-   #+name: grid
-  | Variable        | Type    | Dimensions       | Description                                                             |
-  |-----------------+---------+------------------+-------------------------------------------------------------------------|
-  | ~description~   | ~str~   |                  | Details about the used quadratures can go here                          |
-  | ~rad_precision~ | ~float~ |                  | Radial precision parameter (not used in some schemes like Krack-Köster) |
-  | ~num~           | ~dim~   |                  | Number of grid points                                                   |
-  | ~max_ang_num~   | ~int~   |                  | Maximum number of angular grid points (for pruning)                     |
-  | ~min_ang_num~   | ~int~   |                  | Minimum number of angular grid points (for pruning)                     |
-  | ~coord~         | ~float~ | ~(grid.num)~     | Discretized coordinate space                                            |
-  | ~weight~        | ~float~ | ~(grid.num)~     | Grid weights according to a given partitioning (e.g. Becke)             |
-  | ~ang_num~       | ~dim~   |                  | Number of angular integration points (if used)                          |
-  | ~ang_coord~     | ~float~ | ~(grid.ang_num)~ | Discretized angular space (if used)                                     |
-  | ~ang_weight~    | ~float~ | ~(grid.ang_num)~ | Angular grid weights (if used)                                          |
-  | ~rad_num~       | ~dim~   |                  | Number of radial integration points (if used)                           |
-  | ~rad_coord~     | ~float~ | ~(grid.rad_num)~ | Discretized radial space (if used)                                      |
-  | ~rad_weight~    | ~float~ | ~(grid.rad_num)~ | Radial grid weights  (if used)                                          |
-
-   #+CALL: json(data=grid, title="grid")
-
-   #+RESULTS:
-   :results:
-   #+begin_src python :tangle trex.json
-       "grid": {
-	     "description" : [ "str"  , []                 ]
-	 , "rad_precision" : [ "float", []                 ]
-	 ,           "num" : [ "dim"  , []                 ]
-	 ,   "max_ang_num" : [ "int"  , []                 ]
-	 ,   "min_ang_num" : [ "int"  , []                 ]
-	 ,         "coord" : [ "float", [ "grid.num" ]     ]
-	 ,        "weight" : [ "float", [ "grid.num" ]     ]
-	 ,       "ang_num" : [ "dim"  , []                 ]
-	 ,     "ang_coord" : [ "float", [ "grid.ang_num" ] ]
-	 ,    "ang_weight" : [ "float", [ "grid.ang_num" ] ]
-	 ,       "rad_num" : [ "dim"  , []                 ]
-	 ,     "rad_coord" : [ "float", [ "grid.rad_num" ] ]
-	 ,    "rad_weight" : [ "float", [ "grid.rad_num" ] ]
+       "jastrow": {
+                  "type" : [ "string", []                    ]
+         ,      "ee_num" : [ "dim"   , []                    ]
+         ,      "en_num" : [ "dim"   , []                    ]
+         ,     "een_num" : [ "dim"   , []                    ]
+         ,          "ee" : [ "float" , [ "jastrow.ee_num" ]  ]
+         ,          "en" : [ "float" , [ "jastrow.en_num" ]  ]
+         ,         "een" : [ "float" , [ "jastrow.een_num" ] ]
+         ,  "en_nucleus" : [ "index" , [ "jastrow.en_num" ]  ]
+         , "een_nucleus" : [ "index" , [ "jastrow.een_num" ] ]
+         ,  "ee_scaling" : [ "float" , []                    ]
+         ,  "en_scaling" : [ "float" , [ "nucleus.num" ]     ]
       } ,
   #+end_src
   :end: