Merge branch 'master' of github.com:TREX-CoE/qmckl

Conflicts: org/qmckl_sherman_morrison_woodbury.org
2025-04-29 03:44:49 +02:00 · 2023-09-14 09:43:59 +02:00 · 2023-09-14 09:43:59 +02:00 · 2291103a9b
commit 2291103a9b
parent fd2addb370 d77dc26e52
20 changed files with 4396 additions and 2343 deletions
--- a/.github/workflows/devskim.yml
+++ b/.github/workflows/devskim.yml
@ -1,37 +0,0 @@
-# This workflow uses actions that are not certified by GitHub.
-# They are provided by a third-party and are governed by
-# separate terms of service, privacy policy, and support
-# documentation.
-
-name: DevSkim
-
-on:
-  push:
-    branches: [ "master" ]
-  pull_request:
-    branches: [ "master" ]
-  schedule:
-    - cron: '19 5 * * 2'
-
-permissions:
-  contents: read
-
-jobs:
-  lint:
-    name: DevSkim
-    runs-on: ubuntu-20.04
-    permissions:
-      actions: read
-      contents: read
-      security-events: write
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8
-
-      - name: Run DevSkim scanner
-        uses: microsoft/DevSkim-Action@a8a9e06bab570db990fe7351ae9d4d444b9489ca
-
-      - name: Upload DevSkim scan results to GitHub Security tab
-        uses: github/codeql-action/upload-sarif@678fc3afe258fb2e0cdc165ccf77b85719de7b3c
-        with:
-          sarif_file: devskim-results.sarif
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@ -147,60 +147,48 @@ jobs:
        name: test-report-ubuntu-debug
        path: _build_hpc/test-suite.log

-#  x86_macos:
-#
-#    runs-on: macos-latest
-#    name: x86 MacOS latest
-#
-#    steps:
-#    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791
-#    - name: install dependencies
-#      run: brew install emacs hdf5 automake pkg-config
-#
-#    - name: Symlink gfortran (macOS)
-#      if: runner.os == 'macOS'
-#      run: |
-#        # make sure gfortran is available
-#        # https://github.com/actions/virtual-environments/issues/2524
-#        # https://github.com/cbg-ethz/dce/blob/master/.github/workflows/pkgdown.yaml
-#        sudo ln -s /usr/local/bin/gfortran-10 /usr/local/bin/gfortran
-#        sudo mkdir /usr/local/gfortran
-#        sudo ln -s /usr/local/Cellar/gcc@10/*/lib/gcc/10 /usr/local/gfortran/lib
-#        gfortran --version
-#
-#    - name: Install the latest TREXIO from the GitHub clone
-#      run: |
-#        git clone https://github.com/TREX-CoE/trexio.git
-#        cd trexio
-#        ./autogen.sh
-#        ./configure --prefix=${PWD}/_install --enable-silent-rules
-#        make -j 4
-#        make install
-#
-#    - name: Test TREXIO
-#      run: make -j 4 check
-#      working-directory: trexio
-#
-#    - name: Archive TREXIO test log file
-#      if: failure()
-#      uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2
-#      with:
-#        name: test-report-trexio-macos
-#        path: trexio/test-suite.log
-#
-#    - name: Build QMCkl
-#      run: |
-#        export PKG_CONFIG_PATH=${PWD}/trexio/_install/lib/pkgconfig:$PKG_CONFIG_PATH
-#        ./autogen.sh
-#        ./configure CC=gcc-10 FC=gfortran-10 --enable-silent-rules
-#        make -j 4
-#
-#    - name: Run test
-#      run: make -j 4 check
-#
-#    - name: Archive test log file
-#      if: failure()
-#      uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2
-#      with:
-#        name: test-report-macos
-#        path: test-suite.log
+  macos:
+
+    runs-on: macos-12
+    name: x86 MacOS 12
+
+    steps:
+    - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791
+
+    - name: Install dependencies
+      run: |
+         brew install emacs
+         brew install automake
+         brew install hdf5
+         brew install gcc
+         brew install gfortran
+         brew --prefix hdf5
+
+    - name: Install the latest TREXIO from the GitHub clone
+      run: |
+        git clone https://github.com/TREX-CoE/trexio.git
+        cd trexio
+        ./autogen.sh
+        ./configure FC=gfortran-12 --enable-silent-rules
+        make -j 4
+        sudo make install
+
+    - name: Compile QMCkl in HPC mode
+      run: |
+        ./autogen.sh
+        mkdir _build_hpc
+        cd _build_hpc
+        ../configure --enable-hpc FC=gfortran-12 CC=gcc-12
+        make -j2
+
+    - name: Run test
+      run: make -j2 check
+      working-directory: _build_hpc
+
+    - name: Archive test log file
+      if: failure()
+      uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2
+      with:
+        name: test-report-macos-x86
+        path: _build_hpc/test-suite.log
+
--- a/configure.ac
+++ b/configure.ac
@ -35,7 +35,7 @@

 AC_PREREQ([2.69])

-AC_INIT([qmckl],[0.3.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html])
+AC_INIT([qmckl],[0.5.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html])
 AC_CONFIG_AUX_DIR([tools])
 AM_INIT_AUTOMAKE([subdir-objects color-tests parallel-tests silent-rules 1.11])

@ -403,6 +403,18 @@ AC_ARG_ENABLE([hpc],
 AS_IF([test "x$enable_hpc" = "xyes"],
      [AC_DEFINE([HAVE_HPC], [1], [Activate HPC routines])])

+
+AC_ARG_ENABLE([fpe],
+              [AS_HELP_STRING([--enable-fpe],
+                              [Enable floating-point exceptions])],
+              [enable_fpe=$enableval],
+              [enable_fpe=no])
+
+AS_IF([test "x$enable_fpe" = "xyes"],
+      [AC_DEFINE([HAVE_FPE], [1], [Activate floating-point exceptions])])
+
+
+
 AC_ARG_ENABLE([doc],
            [AS_HELP_STRING([--disable-doc],
                            [Disable documentation])],
@ -432,62 +444,6 @@ AS_IF([test "$FC" = "verificarlo-f"], [
  FCFLAGS="-Mpreprocess $FCFLAGS"
 ])

-## Enable GPU offloading
-
-# GPU offloading
-AC_ARG_ENABLE(gpu, [AS_HELP_STRING([--enable-gpu],[openmp|openacc : Use GPU-offloaded functions])], enable_gpu=$enableval, enable_gpu=no)
-AS_IF([test "x$enable_gpu" = "xyes"], [enable_gpu="openmp"])
-
-# OpenMP offloading
-HAVE_OPENMP_OFFLOAD="no"
-AS_IF([test "x$enable_gpu" = "xopenmp"], [
-  AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines])
-  HAVE_OPENMP_OFFLOAD="yes"
-  AS_CASE([$CC],
-    [*gcc*], [CFLAGS="$CFLAGS -fopenmp"],
-    [*nvc*], [CFLAGS="$CFLAGS -mp=gpu"],
-    [])
-  AS_CASE([$FC],
-    [*gfortran*],  [FCFLAGS="$FCFLAGS -fopenmp"],
-    [*nvfortran*], [FCFLAGS="$FCFLAGS -mp=gpu"],
-    [])
-  ]
-)
-
-# OpenMP offloading
-HAVE_OPENACC_OFFLOAD="no"
-AS_IF([test "x$enable_gpu" = "xopenacc"], [
-  AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines])
-  HAVE_OPENACC_OFFLOAD="yes"
-  AS_CASE([$CC],
-    [*gcc*], [CFLAGS="$CFLAGS -fopenacc"],
-    [*nvc*], [CFLAGS="$CFLAGS -acc=gpu"],
-    [])
-  AS_CASE([$FC],
-    [*gfortran*],  [FCFLAGS="$FCFLAGS -fopenacc"],
-    [*nvfortran*], [FCFLAGS="$FCFLAGS -acc=gpu"],
-    [])
-  ]
-
-])
-
-# cuBLAS offloading
-AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no)
-AS_IF([test "x$HAVE_CUBLAS_OFFLOAD" = "xyes"], [
-  AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
-  HAVE_OPENACC_OFFLOAD="yes"
-  AS_CASE([$CC],
-    [*gcc*], [CFLAGS="$CFLAGS -fopenmp"
-              LDFLAGS="-lcublas"],
-    [*nvc*], [CFLAGS="$CFLAGS -mp=gpu -cudalib=cublas"],
-    [])
-  AS_CASE([$FC],
-    [*gfortran*],  [FCFLAGS="$FCFLAGS -fopenmp"],
-    [*nvfortran*], [FCFLAGS="$FCFLAGS -mp=gpu -cudalib=cublas"],
-    [])
-  ]
-])
-
 AC_ARG_ENABLE(malloc-trace, [AS_HELP_STRING([--enable-malloc-trace],[use debug malloc/free])], ok=$enableval, ok=no)
 AS_IF([test "x$ok" = "xyes"], [
 	AC_DEFINE(MALLOC_TRACE,"malloc_trace.dat",[Define to use debugging malloc/free])
@ -515,12 +471,12 @@ AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])],
 AS_IF([test "x$ok" = "xyes"], [
        AS_IF([test "x$GCC" = "xyes"], [
                CPPFLAGS="-Wdate-time -D_FORTIFY_SOURCE=2"
-	        CFLAGS="$CFLAGS \
-g -Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \
-fsanitize=address -fno-omit-frame-pointer -fstack-protector-strong -Wformat -Werror=format-security \
+	        CFLAGS="$CFLAGS -g \
+-Wall -W -Wbad-function-cast -Wcast-qual -Warray-bounds -Wdisabled-optimization \
+-fno-omit-frame-pointer -fstack-protector-strong -Wformat -Werror=format-security \
 -Wpointer-arith -Wcast-align -Wpedantic -Wextra -Walloc-zero -Werror \
 "
-		LDFLAGS="$LDFLAGS -fsanitize=address"
+		LDFLAGS="$LDFLAGS"
        ])
        AS_IF([test "x$GFC" = "xyes"], [
 	        FCFLAGS="$FCFLAGS \
--- a/m4/ax_openmp.m4
+++ b/m4/ax_openmp.m4
@ -1,5 +1,5 @@
 # ===========================================================================
-#         http://www.gnu.org/software/autoconf-archive/ax_openmp.html
+#        https://www.gnu.org/software/autoconf-archive/ax_openmp.html
 # ===========================================================================
 #
 # SYNOPSIS
@ -38,6 +38,8 @@
 # LICENSE
 #
 #   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
+#   Copyright (c) 2015 John W. Peterson <jwpeterson@gmail.com>
+#   Copyright (c) 2016 Nick R. Papior <nickpapior@gmail.com>
 #
 #   This program is free software: you can redistribute it and/or modify it
 #   under the terms of the GNU General Public License as published by the
@ -50,7 +52,7 @@
 #   Public License for more details.
 #
 #   You should have received a copy of the GNU General Public License along
-#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#   with this program. If not, see <https://www.gnu.org/licenses/>.
 #
 #   As a special exception, the respective Autoconf Macro's copyright owner
 #   gives unlimited permission to copy, distribute and modify the configure
@ -65,16 +67,19 @@
 #   modified version of the Autoconf Macro, you may extend this special
 #   exception to the GPL to apply to your modified version as well.

-#serial 8
+#serial 14

 AC_DEFUN([AX_OPENMP], [
-AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX
+AC_PREREQ([2.69]) dnl for _AC_LANG_PREFIX

 AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
 ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown
-# Flags to try:  -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI),
-#                -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none
-ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none"
+# Flags to try:  -fopenmp (gcc), -mp (SGI & PGI),
+#                -qopenmp (icc>=15), -openmp (icc),
+#                -xopenmp (Sun), -omp (Tru64),
+#                -qsmp=omp (AIX),
+#                none
+ax_openmp_flags="-fopenmp -openmp -qopenmp -mp -xopenmp -omp -qsmp=omp none"
 if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then
  ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags"
 fi
@ -83,8 +88,27 @@ for ax_openmp_flag in $ax_openmp_flags; do
    none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;;
    *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;;
  esac
-  AC_TRY_LINK_FUNC(omp_set_num_threads,
-	[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break])
+  AC_LINK_IFELSE([AC_LANG_SOURCE([[
+@%:@include <omp.h>
+
+static void
+parallel_fill(int * data, int n)
+{
+  int i;
+@%:@pragma omp parallel for
+  for (i = 0; i < n; ++i)
+    data[i] = i;
+}
+
+int
+main(void)
+{
+  int arr[100000];
+  omp_set_num_threads(2);
+  parallel_fill(arr, 100000);
+  return 0;
+}
+]])],[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break],[])
 done
 []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS
 ])
@ -97,3 +121,4 @@ else
  m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])])
 fi
 ])dnl AX_OPENMP
+
--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
@ -185,7 +185,7 @@ D   1
 type = 'G'
 shell_num = 12
 prim_num = 20
-ao_num = 38
+ao_num = 30

 nucleus_index = [0 , 6]

@ -2613,7 +2613,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
    for (int32_t l=0 ; l<32 ; ++l) {
      lstart[l] = l*(l+1)*(l+2)/6;
    }
-    
+
    int64_t ao_idx = 0;
    for (int64_t inucl=0 ; inucl<nucl_num ; ++inucl) {
      const int64_t ishell_start = ctx->ao_basis.nucleus_index[inucl];
@ -2976,7 +2976,7 @@ qmckl_get_ao_basis_primitive_vgl (qmckl_context context,
        implicit none

        integer (c_int64_t) , intent(in)  , value :: context
-        double precision,     intent(out)         :: primitive_vgl(*)
+        real(c_double),       intent(out)         :: primitive_vgl(*)
        integer (c_int64_t) , intent(in)  , value :: size_max
      end function
    end interface
@ -3037,7 +3037,7 @@ qmckl_get_ao_basis_shell_vgl (qmckl_context context,
        implicit none

        integer (c_int64_t) , intent(in)  , value :: context
-        double precision,     intent(out)         :: shell_vgl(*)
+        real(c_double),       intent(out)         :: shell_vgl(*)
        integer (c_int64_t) , intent(in)  , value :: size_max
      end function
    end interface
@ -3098,7 +3098,7 @@ qmckl_get_ao_basis_ao_vgl (qmckl_context context,
       import
       implicit none
       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: ao_vgl(*)
+       real(c_double),       intent(out)         :: ao_vgl(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_ao_basis_ao_vgl
  end interface
@ -3164,7 +3164,7 @@ qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context,
       import
       implicit none
       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: ao_vgl(*)
+       real(c_double),       intent(out)         :: ao_vgl(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_ao_basis_ao_vgl_inplace
  end interface
@ -3225,7 +3225,7 @@ qmckl_get_ao_basis_ao_value (qmckl_context context,
       import
       implicit none
       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: ao_value(*)
+       real(c_double)      , intent(out)         :: ao_value(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_ao_basis_ao_value
  end interface
@ -3291,7 +3291,7 @@ qmckl_get_ao_basis_ao_value_inplace (qmckl_context context,
       import
       implicit none
       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: ao_value(*)
+       real(c_double)      , intent(out)         :: ao_value(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_ao_basis_ao_value_inplace
  end interface
@ -3349,15 +3349,15 @@ qmckl_ao_gaussian_vgl(const qmckl_context context,
 integer function qmckl_ao_gaussian_vgl_f(context, X, R, n, A, VGL, ldv) result(info)
  use qmckl
  implicit none
-  integer*8 , intent(in)  :: context
-  real*8    , intent(in)  :: X(3), R(3)
-  integer*8 , intent(in)  :: n
-  real*8    , intent(in)  :: A(n)
-  real*8    , intent(out) :: VGL(ldv,5)
-  integer*8 , intent(in)  :: ldv
+  integer*8        , intent(in)  :: context
+  double precision , intent(in)  :: X(3), R(3)
+  integer*8        , intent(in)  :: n
+  double precision , intent(in)  :: A(n)
+  double precision , intent(out) :: VGL(ldv,5)
+  integer*8        , intent(in)  :: ldv

  integer*8         :: i,j
-  real*8            :: Y(3), r2, t, u, v
+  double precision  :: Y(3), r2, t, u, v

  info = QMCKL_SUCCESS

@ -4626,19 +4626,19 @@ integer function qmckl_ao_polynomial_vgl_doc_f (context,    &
     X, R, lmax, n, L, ldl, VGL, ldv) result(info)
  use qmckl
  implicit none
-  integer*8 , intent(in)  :: context
-  real*8    , intent(in)  :: X(3), R(3)
-  integer   , intent(in)  :: lmax
-  integer*8 , intent(out) :: n
-  integer   , intent(out) :: L(ldl,(lmax+1)*(lmax+2)*(lmax+3)/6)
-  integer*8 , intent(in)  :: ldl
-  real*8    , intent(out) :: VGL(ldv,(lmax+1)*(lmax+2)*(lmax+3)/6)
-  integer*8 , intent(in)  :: ldv
+  integer*8,        intent(in)  :: context
+  double precision, intent(in)  :: X(3), R(3)
+  integer,          intent(in)  :: lmax
+  integer*8,        intent(out) :: n
+  integer,          intent(out) :: L(ldl,(lmax+1)*(lmax+2)*(lmax+3)/6)
+  integer*8,        intent(in)  :: ldl
+  double precision, intent(out) :: VGL(ldv,(lmax+1)*(lmax+2)*(lmax+3)/6)
+  integer*8,        intent(in)  :: ldv

  integer*8         :: i,j
  integer           :: a,b,c,d
-  real*8            :: Y(3)
-  real*8            :: pows(-2:lmax,3)
+  double precision  :: Y(3)
+  double precision  :: pows(-2:lmax,3)
  double precision  :: xy, yz, xz
  double precision  :: da, db, dc, dd

@ -4671,8 +4671,13 @@ integer function qmckl_ao_polynomial_vgl_doc_f (context,    &

  if (lmax == 0) then
     VGL(1,1) = 1.d0
-     VGL(2:5,1) = 0.d0
-     l(1:3,1) = 0
+     VGL(2,1) = 0.d0
+     VGL(3,1) = 0.d0
+     VGL(4,1) = 0.d0
+     VGL(5,1) = 0.d0
+     l(1,1) = 0
+     l(2,1) = 0
+     l(3,1) = 0
     n=1
  else if (lmax > 0) then
     pows(-2:0,1:3) = 1.d0
@ -4683,23 +4688,19 @@ integer function qmckl_ao_polynomial_vgl_doc_f (context,    &
     end do

     VGL(1:5,1:4) = 0.d0
-     l  (1:3,1:4) = 0
-
-     VGL(1  ,1  ) = 1.d0
-     VGL(1:5,2:4) = 0.d0
-
-     l  (1,2) = 1
+     VGL(1,1) = 1.d0
     VGL(1,2) = pows(1,1)
     VGL(2,2) = 1.d0
-
-     l  (2,3) = 1
     VGL(1,3) = pows(1,2)
     VGL(3,3) = 1.d0
-
-     l  (3,4) = 1
     VGL(1,4) = pows(1,3)
     VGL(4,4) = 1.d0

+     l  (1:3,1:4) = 0
+     l  (1,2) = 1
+     l  (2,3) = 1
+     l  (3,4) = 1
+
     n=4
  endif

@ -5483,7 +5484,9 @@ integer(c_int32_t) function test_qmckl_ao_polynomial_vgl(context) bind(C)
     if (L(3,j) > 1) then
        w = w + L(3,j) * (L(3,j)-1) * Y(1)**L(1,j) * Y(2)**L(2,j) * Y(3)**(L(3,j)-2)
     end if
-     if (dabs(1.d0 - VGL(5,j) / w) > epsilon ) return
+     if (w /= 0.d0) then
+       if (dabs(1.d0 - VGL(5,j) / w) > epsilon ) return
+     endif
  end do

  test_qmckl_ao_polynomial_vgl = QMCKL_SUCCESS
@ -5625,6 +5628,7 @@ integer function qmckl_compute_ao_value_doc_f(context, &
     e_coord(1) = coord(ipoint,1)
     e_coord(2) = coord(ipoint,2)
     e_coord(3) = coord(ipoint,3)
+     ao_value(:,ipoint) = 0.d0
     do inucl=1,nucl_num
        n_coord(1) = nucl_coord(inucl,1)
        n_coord(2) = nucl_coord(inucl,2)
@ -5748,6 +5752,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
 {
  <<ao_value_constants>>

+    memset(ao_value, 0, ao_num*point_num*sizeof(double));
+
 #ifdef HAVE_OPENMP
 #pragma omp parallel if (point_num > 16)
 #endif
@ -5760,8 +5766,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
    double exp_mat[prim_max] __attribute__((aligned(64)));
    double ce_mat[shell_max] __attribute__((aligned(64)));

-    int32_t coef_mat_sparse_idx[nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
-    double  coef_mat_sparse    [nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
+    int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = 
+      malloc(nucl_num * sizeof (*coef_mat_sparse_idx));
+
+    double  (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) =
+      malloc(nucl_num * sizeof (*coef_mat_sparse));
+
    for (int i=0 ; i<nucl_num ; ++i) {
      for (int j=0 ; j<shell_max; ++j) {
        int l=1;
@ -5785,13 +5795,13 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
        { coord[ipoint],
          coord[ipoint + point_num],
          coord[ipoint + 2*point_num] };
-      
+
      for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
        const double n_coord[3] __attribute__((aligned(64))) =
          { nucl_coord[inucl],
            nucl_coord[inucl + nucl_num],
            nucl_coord[inucl + 2*nucl_num] };
-        
+
        /* Test if the point is in the range of the nucleus */
        const double x = e_coord[0] - n_coord[0];
        const double y = e_coord[1] - n_coord[1];
@ -5929,6 +5939,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
        }
      }
    }
+    free(coef_mat_sparse_idx);
+    free(coef_mat_sparse);
  }

  return QMCKL_SUCCESS;
@ -6420,6 +6432,7 @@ integer function qmckl_compute_ao_vgl_doc_f(context, &
     e_coord(1) = coord(ipoint,1)
     e_coord(2) = coord(ipoint,2)
     e_coord(3) = coord(ipoint,3)
+     ao_vgl(:,:,ipoint) = 0.d0
     do inucl=1,nucl_num
        n_coord(1) = nucl_coord(inucl,1)
        n_coord(2) = nucl_coord(inucl,2)
@ -6539,6 +6552,8 @@ qmckl_compute_ao_vgl_hpc_gaussian (
 {
  <<ao_value_constants>>

+    memset(ao_vgl, 0, 5*ao_num*point_num*sizeof(double));
+
 #ifdef HAVE_OPENMP
 #pragma omp parallel if (point_num > 16)
 #endif
@ -6550,8 +6565,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
    double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
    double ce_mat[shell_max][8] __attribute__((aligned(64))) ;

-    int32_t coef_mat_sparse_idx[nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
-    double  coef_mat_sparse    [nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
+    int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = 
+      malloc(nucl_num * sizeof (*coef_mat_sparse_idx));
+
+    double  (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) =
+      malloc(nucl_num * sizeof (*coef_mat_sparse));
+
    for (int i=0 ; i<nucl_num ; ++i) {
      for (int j=0 ; j<shell_max; ++j) {
        int l=1;
@ -6588,7 +6607,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
        { coord[ipoint],
          coord[ipoint + point_num],
          coord[ipoint + 2*point_num] };
-      
+
      for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
        const double n_coord[3] __attribute__((aligned(64))) =
          { nucl_coord[inucl],
@ -6831,6 +6850,8 @@ qmckl_compute_ao_vgl_hpc_gaussian (
        }
      }
    }
+    free(coef_mat_sparse_idx);
+    free(coef_mat_sparse);
  }

  return QMCKL_SUCCESS;
--- a/org/qmckl_blas.org
+++ b/org/qmckl_blas.org
@ -1098,8 +1098,8 @@ end function qmckl_dgemm_f
      implicit none

      integer (c_int64_t) , intent(in)  , value :: context
-      character           , intent(in)  , value :: TransA
-      character           , intent(in)  , value :: TransB
+      character(c_char)   , intent(in)  , value :: TransA
+      character(c_char)   , intent(in)  , value :: TransB
      integer (c_int64_t) , intent(in)  , value :: m
      integer (c_int64_t) , intent(in)  , value :: n
      integer (c_int64_t) , intent(in)  , value :: k
@ -1133,8 +1133,8 @@ end function qmckl_dgemm_f
        implicit none

        integer (c_int64_t) , intent(in)  , value :: context
-        character           , intent(in)  , value :: TransA
-        character           , intent(in)  , value :: TransB
+        character(c_char)   , intent(in)  , value :: TransA
+        character(c_char)   , intent(in)  , value :: TransB
        integer (c_int64_t) , intent(in)  , value :: m
        integer (c_int64_t) , intent(in)  , value :: n
        integer (c_int64_t) , intent(in)  , value :: k
@ -1386,8 +1386,8 @@ end function qmckl_dgemm_safe_f
      implicit none

      integer (c_int64_t) , intent(in)  , value :: context
-      character           , intent(in)  , value :: TransA
-      character           , intent(in)  , value :: TransB
+      character(c_char)   , intent(in)  , value :: TransA
+      character(c_char)   , intent(in)  , value :: TransB
      integer (c_int64_t) , intent(in)  , value :: m
      integer (c_int64_t) , intent(in)  , value :: n
      integer (c_int64_t) , intent(in)  , value :: k
@ -1424,8 +1424,8 @@ end function qmckl_dgemm_safe_f
        implicit none

        integer (c_int64_t) , intent(in)  , value :: context
-        character           , intent(in)  , value :: TransA
-        character           , intent(in)  , value :: TransB
+        character(c_char)   , intent(in)  , value :: TransA
+        character(c_char)   , intent(in)  , value :: TransB
        integer (c_int64_t) , intent(in)  , value :: m
        integer (c_int64_t) , intent(in)  , value :: n
        integer (c_int64_t) , intent(in)  , value :: k
@ -2827,9 +2827,21 @@ printf("qmckl_transpose ok\n");

     #+end_src

+* Utilities
+
+  Trick to make MKL efficient on AMD
+  
+  #+begin_src c :tangle (eval c)
+int mkl_serv_intel_cpu_true() {
+  return 1;
+}
+
+  #+end_src
+
 * End of files                                                     :noexport:


+
  #+begin_src c :tangle (eval h_private_type)
 #endif
  #+end_src
--- a/org/qmckl_distance.org
+++ b/org/qmckl_distance.org
@ -86,7 +86,7 @@ int main() {
          const double* B,
          const int64_t ldb,
          double* const C,
-          const int64_t ldc ); 
+          const int64_t ldc );
    #+end_src

    #+begin_src f90 :tangle (eval f)
@ -157,12 +157,12 @@ integer function qmckl_distance_sq_f(context, transa, transb, m, n, &
     return
  endif

-  if (iand(transab,2) == 0 .and. LDA < 3) then
+  if (iand(transab,2) == 0 .and. LDB < 3) then
     info = QMCKL_INVALID_ARG_7
     return
  endif

-  if (iand(transab,2) == 2 .and. LDA < m) then
+  if (iand(transab,2) == 2 .and. LDB < n) then
     info = QMCKL_INVALID_ARG_7
     return
  endif
@ -236,8 +236,8 @@ end function qmckl_distance_sq_f
     implicit none

     integer (c_int64_t) , intent(in)  , value :: context
-     character           , intent(in)  , value :: transa
-     character           , intent(in)  , value :: transb
+     character(c_char)   , intent(in)  , value :: transa
+     character(c_char)   , intent(in)  , value :: transb
     integer (c_int64_t) , intent(in)  , value :: m
     integer (c_int64_t) , intent(in)  , value :: n
     real    (c_double ) , intent(in)          :: A(lda,*)
@ -267,8 +267,8 @@ end function qmckl_distance_sq_f
       implicit none

       integer (c_int64_t) , intent(in)  , value :: context
-       character           , intent(in)  , value :: transa
-       character           , intent(in)  , value :: transb
+       character(c_char)   , intent(in)  , value :: transa
+       character(c_char)   , intent(in)  , value :: transb
       integer (c_int64_t) , intent(in)  , value :: m
       integer (c_int64_t) , intent(in)  , value :: n
       real    (c_double ) , intent(in)          :: A(lda,*)
@ -485,7 +485,7 @@ end function test_qmckl_distance_sq
          const double* B,
          const int64_t ldb,
          double* const C,
-          const int64_t ldc ); 
+          const int64_t ldc );
    #+end_src

 *** Source
@ -558,16 +558,6 @@ integer function qmckl_distance_f(context, transa, transb, m, n, &
     return
  endif

-  if (iand(transab,2) == 0 .and. LDA < 3) then
-     info = QMCKL_INVALID_ARG_7
-     return
-  endif
-
-  if (iand(transab,2) == 2 .and. LDA < m) then
-     info = QMCKL_INVALID_ARG_7
-     return
-  endif
-
  ! check for LDB
  if (iand(transab,1) == 0 .and. LDB < 3) then
     info = QMCKL_INVALID_ARG_9
@ -579,16 +569,6 @@ integer function qmckl_distance_f(context, transa, transb, m, n, &
     return
  endif

-  if (iand(transab,2) == 0 .and. LDB < 3) then
-     info = QMCKL_INVALID_ARG_9
-     return
-  endif
-
-  if (iand(transab,2) == 2 .and. LDB < n) then
-     info = QMCKL_INVALID_ARG_9
-     return
-  endif
-
  ! check for LDC
  if (LDC < m) then
     info = QMCKL_INVALID_ARG_11
@ -669,8 +649,8 @@ end function qmckl_distance_f
     implicit none

     integer (c_int64_t) , intent(in)  , value :: context
-     character           , intent(in)  , value :: transa
-     character           , intent(in)  , value :: transb
+     character(c_char)   , intent(in)  , value :: transa
+     character(c_char)   , intent(in)  , value :: transb
     integer (c_int64_t) , intent(in)  , value :: m
     integer (c_int64_t) , intent(in)  , value :: n
     real    (c_double ) , intent(in)          :: A(lda,*)
@ -700,8 +680,8 @@ end function qmckl_distance_f
       implicit none

       integer (c_int64_t) , intent(in)  , value :: context
-       character           , intent(in)  , value :: transa
-       character           , intent(in)  , value :: transb
+       character(c_char)   , intent(in)  , value :: transa
+       character(c_char)   , intent(in)  , value :: transb
       integer (c_int64_t) , intent(in)  , value :: m
       integer (c_int64_t) , intent(in)  , value :: n
       real    (c_double ) , intent(in)          :: A(lda,*)
@ -929,7 +909,7 @@ end function test_qmckl_dist
          const int64_t ldb,
          double* const C,
          const int64_t ldc,
-          const double rescale_factor_kappa ); 
+          const double rescale_factor_kappa );
    #+end_src

 *** Source
@ -1005,27 +985,7 @@ integer function qmckl_distance_rescaled_f(context, transa, transb, m, n, &
     return
  endif

-  if (iand(transab,2) == 0 .and. LDA < 3) then
-     info = QMCKL_INVALID_ARG_7
-     return
-  endif
-
-  if (iand(transab,2) == 2 .and. LDA < m) then
-     info = QMCKL_INVALID_ARG_7
-     return
-  endif
-
  ! check for LDB
-  if (iand(transab,1) == 0 .and. LDB < 3) then
-     info = QMCKL_INVALID_ARG_9
-     return
-  endif
-
-  if (iand(transab,1) == 1 .and. LDB < n) then
-     info = QMCKL_INVALID_ARG_9
-     return
-  endif
-
  if (iand(transab,2) == 0 .and. LDB < 3) then
     info = QMCKL_INVALID_ARG_9
     return
@ -1116,8 +1076,8 @@ end function qmckl_distance_rescaled_f
     implicit none

     integer (c_int64_t) , intent(in)  , value :: context
-     character           , intent(in)  , value :: transa
-     character           , intent(in)  , value :: transb
+     character(c_char)   , intent(in)  , value :: transa
+     character(c_char)   , intent(in)  , value :: transb
     integer (c_int64_t) , intent(in)  , value :: m
     integer (c_int64_t) , intent(in)  , value :: n
     real    (c_double ) , intent(in)          :: A(lda,*)
@ -1148,8 +1108,8 @@ end function qmckl_distance_rescaled_f
       implicit none

       integer (c_int64_t) , intent(in)  , value :: context
-       character           , intent(in)  , value :: transa
-       character           , intent(in)  , value :: transb
+       character(c_char)   , intent(in)  , value :: transa
+       character(c_char)   , intent(in)  , value :: transb
       integer (c_int64_t) , intent(in)  , value :: m
       integer (c_int64_t) , intent(in)  , value :: n
       real    (c_double ) , intent(in)          :: A(lda,*)
@ -1165,59 +1125,238 @@ end function qmckl_distance_rescaled_f
   #+end_src

 *** Test                                                           :noexport:
+
+   #+BEGIN_SRC python :results output :exports none
+import numpy as np
+
+kappa     = 0.6
+kappa_inv = 1./kappa
+
+# For H2O we have the following data:
+elec_num     = 10
+nucl_num     = 2
+up_num       = 5
+down_num     = 5
+nucl_coord = np.array([ [0.000000,  0.000000 ],
+   [0.000000,  0.000000 ],
+   [0.000000,  2.059801 ] ])
+
+elec_coord = np.array(   [[[-0.250655104764153      ,  0.503070975550133      ,  -0.166554344502303],
+     [-0.587812193472177      , -0.128751981129274      ,   0.187773606533075],
+     [ 1.61335569047166       , -0.615556732874863      ,  -1.43165470979934 ],
+     [-4.901239896295210E-003 , -1.120440036458986E-002 ,   1.99761909330422 ],
+     [ 0.766647499681200      , -0.293515395797937      ,   3.66454589201239 ],
+     [-0.127732483187947      , -0.138975497694196      ,  -8.669850480215846E-002],
+     [-0.232271834949124      , -1.059321673434182E-002 ,  -0.504862241464867],
+     [ 1.09360863531826       , -2.036103063808752E-003 ,  -2.702796910818986E-002],
+     [-0.108090166832043      ,  0.189161729653261      ,   2.15398313919894],
+     [ 0.397978144318712      , -0.254277292595981      ,   2.54553335476344]]])
+
+ee_distance_rescaled = \
+  np.array([ [(1.-np.exp(-kappa*np.linalg.norm(elec_coord[0,j,:]-elec_coord[0,i,:])))/kappa \
+             for i in range(elec_num) ]
+             for j in range(elec_num) ])
+
+en_distance_rescaled = \
+  np.array([ [(1.-np.exp(-kappa*np.linalg.norm(elec_coord[0,j,:]-nucl_coord[:,i])))/kappa \
+             for j in range(elec_num) ]
+             for i in range(nucl_num) ])
+
+print(ee_distance_rescaled)
+print()
+print(en_distance_rescaled)
+   #+END_SRC
+
+   #+RESULTS:
+   #+begin_example
+   [[0.         0.63475074 1.29816415 1.23147027 1.51933127 0.54402406
+     0.51452479 0.96538731 1.25878564 1.3722995 ]
+    [0.63475074 0.         1.35148664 1.13524156 1.48940503 0.4582292
+     0.62758076 1.06560856 1.179133   1.30763703]
+    [1.29816415 1.35148664 0.         1.50021375 1.59200788 1.23488312
+     1.20844259 1.0355537  1.52064535 1.53049239]
+    [1.23147027 1.13524156 1.50021375 0.         1.12016142 1.19158954
+     1.29762585 1.24824277 0.25292267 0.58609336]
+    [1.51933127 1.48940503 1.59200788 1.12016142 0.         1.50217017
+     1.54012828 1.48753895 1.10441805 0.84504381]
+    [0.54402406 0.4582292  1.23488312 1.19158954 1.50217017 0.
+     0.39417354 0.87009603 1.23838502 1.33419121]
+    [0.51452479 0.62758076 1.20844259 1.29762585 1.54012828 0.39417354
+     0.         0.95118809 1.33068934 1.41097406]
+    [0.96538731 1.06560856 1.0355537  1.24824277 1.48753895 0.87009603
+     0.95118809 0.         1.29422213 1.33222493]
+    [1.25878564 1.179133   1.52064535 0.25292267 1.10441805 1.23838502
+     1.33068934 1.29422213 0.         0.62196802]
+    [1.3722995  1.30763703 1.53049239 0.58609336 0.84504381 1.33419121
+     1.41097406 1.33222493 0.62196802 0.        ]]
+
+   [[0.49421587 0.52486545 1.23280503 1.16396998 1.49156627 0.1952946
+     0.4726453  0.80211227 1.21198526 1.31411513]
+    [1.24641375 1.15444238 1.50565145 0.06218339 1.10153184 1.20919677
+     1.3111856  1.26122875 0.22122563 0.55669168]]
+   #+end_example
+
+
+    #+begin_src f90 :tangle (eval f_test)
+integer(qmckl_exit_code) function test_qmckl_dist_rescaled(context) bind(C)
+
+  use qmckl
+  use iso_c_binding
+
+  implicit none
+
+  integer(qmckl_context), intent(in), value :: context
+  integer*8                     :: m, n, LDA, LDB, LDC
+  double precision              :: x
+  integer*8                     :: i,j
+
+  double precision, parameter :: kappa = 0.6d0
+  double precision, parameter :: kappa_inv = 1.d0/kappa
+
+  integer*8, parameter :: elec_num = 10_8
+  integer*8, parameter :: nucl_num = 2_8
+
+  double precision :: nucl_coord(nucl_num,3) = reshape( (/ &
+        0.0d0,  0.0d0 ,  &
+        0.0d0,  0.0d0 ,  &
+        0.0d0,  2.059801d0 /), shape(nucl_coord) )
+
+  double precision :: elec_coord(3,elec_num) = reshape( (/ &
+        -0.250655104764153d0    ,  0.503070975550133d0    ,  -0.166554344502303d0 , &
+        -0.587812193472177d0    , -0.128751981129274d0    ,   0.187773606533075d0 , &
+         1.61335569047166d0     , -0.615556732874863d0    ,  -1.43165470979934d0  , &
+        -4.901239896295210d-003 , -1.120440036458986d-002 ,   1.99761909330422d0  , &
+         0.766647499681200d0    , -0.293515395797937d0    ,   3.66454589201239d0  , &
+        -0.127732483187947d0    , -0.138975497694196d0    ,  -8.669850480215846d-002 , &
+        -0.232271834949124d0    , -1.059321673434182d-002 ,  -0.504862241464867d0 , &
+         1.09360863531826d0     , -2.036103063808752d-003 ,  -2.702796910818986d-002 , &
+        -0.108090166832043d0    ,  0.189161729653261d0    ,   2.15398313919894d0 , &
+         0.397978144318712d0    , -0.254277292595981d0    ,   2.54553335476344d0  /), &
+        shape(elec_coord))
+
+  double precision :: ref_ee(elec_num,elec_num) = reshape( (/ &
+       0.d0, 0.63475074d0, 1.29816415d0, 1.23147027d0, 1.51933127d0, &
+       0.54402406d0, 0.51452479d0, 0.96538731d0, 1.25878564d0, 1.3722995d0 , &
+       0.63475074d0, 0.d0, 1.35148664d0, 1.13524156d0, 1.48940503d0, &
+       0.4582292d0, 0.62758076d0, 1.06560856d0, 1.179133d0, 1.30763703d0 , &
+       1.29816415d0, 1.35148664d0, 0.d0, 1.50021375d0, 1.59200788d0, &
+       1.23488312d0, 1.20844259d0, 1.0355537d0, 1.52064535d0, 1.53049239d0 , &
+       1.23147027d0, 1.13524156d0, 1.50021375d0, 0.d0, 1.12016142d0, &
+       1.19158954d0, 1.29762585d0, 1.24824277d0, 0.25292267d0, 0.58609336d0 , &
+       1.51933127d0, 1.48940503d0, 1.59200788d0, 1.12016142d0, 0.d0, &
+       1.50217017d0, 1.54012828d0, 1.48753895d0, 1.10441805d0, 0.84504381d0 , &
+       0.54402406d0, 0.4582292d0, 1.23488312d0, 1.19158954d0, 1.50217017d0, &
+       0.d0, 0.39417354d0, 0.87009603d0, 1.23838502d0, 1.33419121d0 , &
+       0.51452479d0, 0.62758076d0, 1.20844259d0, 1.29762585d0, 1.54012828d0, &
+       0.39417354d0, 0.d0, 0.95118809d0, 1.33068934d0, 1.41097406d0 , &
+       0.96538731d0, 1.06560856d0, 1.0355537d0, 1.24824277d0, 1.48753895d0, &
+       0.87009603d0, 0.95118809d0, 0.d0, 1.29422213d0, 1.33222493d0 , &
+       1.25878564d0, 1.179133d0, 1.52064535d0, 0.25292267d0, 1.10441805d0, &
+       1.23838502d0, 1.33068934d0, 1.29422213d0, 0.d0, 0.62196802d0 , &
+       1.3722995d0, 1.30763703d0, 1.53049239d0, 0.58609336d0, 0.84504381d0, &
+       1.33419121d0, 1.41097406d0, 1.33222493d0, 0.62196802d0, 0.d0 /), shape(ref_ee) )
+
+  double precision :: ref_en(elec_num, nucl_num) = reshape( (/ &
+       0.49421587d0, 0.52486545d0, 1.23280503d0, 1.16396998d0, 1.49156627d0, &
+       0.1952946d0, 0.4726453d0, 0.80211227d0, 1.21198526d0, 1.31411513d0, &
+       1.24641375d0, 1.15444238d0, 1.50565145d0, 0.06218339d0, 1.10153184d0, &
+       1.20919677d0, 1.3111856d0, 1.26122875d0, 0.22122563d0, 0.55669168d0 /), shape(ref_en) )
+
+  double precision, allocatable :: distance_ee(:,:), distance_en(:,:)
+
+  allocate( distance_ee(elec_num,elec_num), distance_en(elec_num,nucl_num) )
+
+  print *, 'ee'
+  test_qmckl_dist_rescaled = &
+       qmckl_distance_rescaled(context, 'N', 'N', elec_num, elec_num, elec_coord, &
+       size(elec_coord,1)*1_8, elec_coord, size(elec_coord,1)*1_8, &
+       distance_ee, size(distance_ee,1)*1_8, kappa)
+
+  if (test_qmckl_dist_rescaled /= QMCKL_SUCCESS) return
+
+  test_qmckl_dist_rescaled = QMCKL_FAILURE
+
+  do j=1,elec_num
+     do i=1,elec_num
+        print *, i,j,real(distance_ee(i,j)), real(ref_ee(i,j))
+        if (dabs(distance_ee(i,j) - ref_ee(i,j)) > 1.d-7) then
+           return
+        endif
+     end do
+  end do
+
+  print *, 'en'
+  test_qmckl_dist_rescaled = &
+       qmckl_distance_rescaled(context, 'N', 'T', elec_num, nucl_num, elec_coord, &
+       size(elec_coord,1)*1_8, nucl_coord, size(nucl_coord,1)*1_8, &
+       distance_en, size(distance_en,1)*1_8, kappa)
+
+  if (test_qmckl_dist_rescaled /= QMCKL_SUCCESS) return
+
+  test_qmckl_dist_rescaled = QMCKL_FAILURE
+
+  do j=1,nucl_num
+     do i=1,elec_num
+        print *, i,j,real(distance_en(i,j)), real(ref_en(i,j))
+        if (dabs(distance_en(i,j) - ref_en(i,j)) > 1.d-7) then
+           return
+        endif
+     end do
+  end do
+
+  test_qmckl_dist_rescaled = QMCKL_SUCCESS
+end function test_qmckl_dist_rescaled
+    #+end_src
+
+    #+begin_src c :comments link :tangle (eval c_test)
+    qmckl_exit_code test_qmckl_dist_rescaled(qmckl_context context);
+    assert(test_qmckl_dist_rescaled(context) == QMCKL_SUCCESS);
+    #+end_src
 * Rescaled Distance Derivatives

-** ~qmckl_distance_rescaled_deriv_e~
+** ~qmckl_distance_rescaled_gl~
   :PROPERTIES:
-   :Name:     qmckl_distance_rescaled_deriv_e
+   :Name:     qmckl_distance_rescaled_gl
   :CRetType: qmckl_exit_code
   :FRetType: qmckl_exit_code
   :END:

-   ~qmckl_distance_rescaled_deriv_e~ computes the matrix of the gradient and laplacian of the
+   ~qmckl_distance_rescaled_gl~ computes the matrix of the gradient and Laplacian of the
   rescaled distance with respect to the electron coordinates. The derivative is a rank 3 tensor.
   The first dimension has a dimension of 4 of which the first three coordinates
-   contains the gradient vector and the last index is the laplacian.
+   contains the gradient vector and the last index is the Laplacian.


   \[
-   C_{ij} = \left( 1 - \exp{-\kappa C_{ij}}\right)/\kappa
+   C(r_{ij}) = \left( 1 - \exp(-\kappa\, r_{ij})\right)/\kappa
   \]

   Here the gradient is defined as follows:

   \[
-   \nabla (C_{ij}(\mathbf{r}_{ee})) = \left(\frac{\delta C_{ij}(\mathbf{r}_{ee})}{\delta x},\frac{\delta C_{ij}(\mathbf{r}_{ee})}{\delta y},\frac{\delta C_{ij}(\mathbf{r}_{ee})}{\delta z} \right)
+   \nabla_i C(r_{ij}) = \left(\frac{\partial C(r_{ij})}{\partial x_i},\frac{\partial C(r_{ij})}{\partial y_i},\frac{\partial C(r_{ij})}{\partial z_i} \right)
   \]
-   and the laplacian is defined as follows:
+   and the Laplacian is defined as follows:

   \[
-   \triangle (C_{ij}(r_{ee})) = \frac{\delta^2}{\delta x^2} + \frac{\delta^2}{\delta y^2} + \frac{\delta^2}{\delta z^2}
+   \Delta_i C(r_{ij}) = \frac{\partial^2}{\partial x_i^2} + \frac{\partial^2}{\partial y_i^2} + \frac{\partial^2}{\partial z_i^2}
   \]

-   Using the above three formulae, the expression for the gradient and laplacian is
-   as follows:
+   Using the above three formulas, the expression for the gradient and Laplacian is:

   \[
-   \frac{\delta  C_{ij}(\mathbf{r}_{ee})}{\delta x} = \frac{|(x_i - x_j)|}{r_{ij}} (1 - \kappa R_{ij})
+   \frac{\partial  C(r_{ij})}{\partial x_i} = \frac{|(x_i -
+   x_j)|}{r_{ij}} \exp (- \kappa \, r_{ij})
   \]

   \[
-   \frac{\delta  C_{ij}(\mathbf{r}_{ee})}{\delta y} = \frac{|(y_i - y_j)|}{r_{ij}} (1 - \kappa R_{ij})
-   \]
-
-   \[
-   \frac{\delta  C_{ij}(\mathbf{r}_{ee})}{\delta z} = \frac{|(z_i - z_j)|}{r_{ij}} (1 - \kappa R_{ij})
-   \]
-
-   \[
-   \Delta(C_{ij}(r_{ee}) = \left[ \frac{2}{r_{ij}} - \kappa  \right] (1-\kappa R_{ij})
+   \Delta C_{ij}(r_{ij}) = \left[ \frac{2}{r_{ij}} - \kappa  \right] \exp (- \kappa \, r_{ij})
   \]

   If the input array is normal (~'N'~), the xyz coordinates are in
   the leading dimension: ~[n][3]~ in C and ~(3,n)~ in Fortran.

-   #+NAME: qmckl_distance_rescaled_deriv_e_args
+   #+NAME: qmckl_distance_rescaled_gl_args
   | Variable               | Type                | In/Out | Description                                           |
   |------------------------+---------------------+--------+-------------------------------------------------------|
   | ~context~              | ~qmckl_context~     | in     | Global state                                          |
@ -1246,12 +1385,12 @@ end function qmckl_distance_rescaled_f
    - ~A~ is allocated with at least $3 \times m \times 8$ bytes
    - ~B~ is allocated with at least $3 \times n \times 8$ bytes
    - ~C~ is allocated with at least $4 \times m \times n \times 8$ bytes
-      
-    #+CALL: generate_c_header(table=qmckl_distance_rescaled_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
+
+    #+CALL: generate_c_header(table=qmckl_distance_rescaled_gl_args,rettyp=get_value("CRetType"),fname=get_value("Name"))

    #+RESULTS:
    #+begin_src c :tangle (eval h_func) :comments org
-    qmckl_exit_code qmckl_distance_rescaled_deriv_e (
+    qmckl_exit_code qmckl_distance_rescaled_gl (
          const qmckl_context context,
          const char transa,
          const char transb,
@ -1263,11 +1402,11 @@ end function qmckl_distance_rescaled_f
          const int64_t ldb,
          double* const C,
          const int64_t ldc,
-          const double rescale_factor_kappa ); 
+          const double rescale_factor_kappa );
    #+end_src

    #+begin_src f90 :tangle (eval f)
-integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n, &
+integer function qmckl_distance_rescaled_gl_f(context, transa, transb, m, n, &
     A, LDA, B, LDB, C, LDC, rescale_factor_kappa) &
     result(info)
  use qmckl
@ -1285,11 +1424,9 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n

  integer*8 :: i,j
  real*8    :: x, y, z, dist, dist_inv
-  real*8    :: rescale_factor_kappa_inv, rij
+  real*8    :: rij
  integer   :: transab

-  rescale_factor_kappa_inv = 1.0d0/rescale_factor_kappa;
-
  info = QMCKL_SUCCESS

  if (context == QMCKL_NULL_CONTEXT) then
@ -1339,27 +1476,7 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n
     return
  endif

-  if (iand(transab,2) == 0 .and. LDA < 3) then
-     info = QMCKL_INVALID_ARG_7
-     return
-  endif
-
-  if (iand(transab,2) == 2 .and. LDA < m) then
-     info = QMCKL_INVALID_ARG_7
-     return
-  endif
-
  ! check for LDB
-  if (iand(transab,1) == 0 .and. LDB < 3) then
-     info = QMCKL_INVALID_ARG_9
-     return
-  endif
-
-  if (iand(transab,1) == 1 .and. LDB < n) then
-     info = QMCKL_INVALID_ARG_9
-     return
-  endif
-
  if (iand(transab,2) == 0 .and. LDB < 3) then
     info = QMCKL_INVALID_ARG_9
     return
@ -1387,12 +1504,16 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n
           y = A(2,i) - B(2,j)
           z = A(3,i) - B(3,j)
           dist = dsqrt(x*x + y*y + z*z)
+           ! Avoid floating-point exception
+           if (dist == 0.d0) then
+             dist = 69.d0/rescale_factor_kappa
+           endif
           dist_inv = 1.0d0/dist
-           rij = (1.0d0 - dexp(-rescale_factor_kappa * dist)) * rescale_factor_kappa_inv
-           C(1,i,j) = x * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(2,i,j) = y * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(3,i,j) = z * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa_inv) * ( 1.0d0 - rescale_factor_kappa_inv * rij)
+           rij = dexp(-rescale_factor_kappa * dist)
+           C(1,i,j) = x * dist_inv * rij
+           C(2,i,j) = y * dist_inv * rij
+           C(3,i,j) = z * dist_inv * rij
+           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa) * rij
        end do
     end do

@ -1404,12 +1525,16 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n
           y = A(i,2) - B(2,j)
           z = A(i,3) - B(3,j)
           dist = dsqrt(x*x + y*y + z*z)
+           ! Avoid floating-point exception
+           if (dist == 0.d0) then
+             dist = 69.d0/rescale_factor_kappa
+           endif
           dist_inv = 1.0d0/dist
-           rij = (1.0d0 - dexp(-rescale_factor_kappa * dist)) * rescale_factor_kappa_inv
-           C(1,i,j) = x * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(2,i,j) = y * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(3,i,j) = z * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa_inv) * ( 1.0d0 - rescale_factor_kappa_inv * rij)
+           rij = dexp(-rescale_factor_kappa * dist)
+           C(1,i,j) = x * dist_inv * rij
+           C(2,i,j) = y * dist_inv * rij
+           C(3,i,j) = z * dist_inv * rij
+           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa) * rij
        end do
     end do

@ -1421,12 +1546,16 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n
           y = A(2,i) - B(j,2)
           z = A(3,i) - B(j,3)
           dist = dsqrt(x*x + y*y + z*z)
+           ! Avoid floating-point exception
+           if (dist == 0.d0) then
+             dist = 69.d0/rescale_factor_kappa
+           endif
           dist_inv = 1.0d0/dist
-           rij = (1.0d0 - dexp(-rescale_factor_kappa * dist)) * rescale_factor_kappa_inv
-           C(1,i,j) = x * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(2,i,j) = y * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(3,i,j) = z * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa_inv) * ( 1.0d0 - rescale_factor_kappa_inv * rij)
+           rij = dexp(-rescale_factor_kappa * dist)
+           C(1,i,j) = x * dist_inv * rij
+           C(2,i,j) = y * dist_inv * rij
+           C(3,i,j) = z * dist_inv * rij
+           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa) * rij
        end do
     end do

@ -1438,27 +1567,31 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n
           y = A(i,2) - B(j,2)
           z = A(i,3) - B(j,3)
           dist = dsqrt(x*x + y*y + z*z)
+           ! Avoid floating-point exception
+           if (dist == 0.d0) then
+             dist = 69.d0/rescale_factor_kappa
+           endif
           dist_inv = 1.0d0/dist
-           rij = (1.0d0 - dexp(-rescale_factor_kappa * dist)) * rescale_factor_kappa_inv
-           C(1,i,j) = x * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(2,i,j) = y * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(3,i,j) = z * dist_inv * ( 1.0d0 - rescale_factor_kappa_inv * rij)
-           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa_inv) * ( 1.0d0 - rescale_factor_kappa_inv * rij)
+           rij = dexp(-rescale_factor_kappa * dist)
+           C(1,i,j) = x * dist_inv * rij
+           C(2,i,j) = y * dist_inv * rij
+           C(3,i,j) = z * dist_inv * rij
+           C(4,i,j) = (2.0d0 * dist_inv - rescale_factor_kappa) * rij
        end do
     end do

  end select

-end function qmckl_distance_rescaled_deriv_e_f
+end function qmckl_distance_rescaled_gl_f
    #+end_src

    This function is more efficient when ~A~ and ~B~ are transposed.
-    
-    #+CALL: generate_c_interface(table=qmckl_distance_rescaled_deriv_e_args,fname=get_value("Name"))
+
+    #+CALL: generate_c_interface(table=qmckl_distance_rescaled_gl_args,fname=get_value("Name"))

   #+RESULTS:
   #+begin_src f90 :tangle (eval f) :comments org :exports none
-   integer(c_int32_t) function qmckl_distance_rescaled_deriv_e &
+   integer(c_int32_t) function qmckl_distance_rescaled_gl &
       (context, transa, transb, m, n, A, lda, B, ldb, C, ldc, rescale_factor_kappa) &
       bind(C) result(info)

@ -1466,8 +1599,8 @@ end function qmckl_distance_rescaled_deriv_e_f
     implicit none

     integer (c_int64_t) , intent(in)  , value :: context
-     character           , intent(in)  , value :: transa
-     character           , intent(in)  , value :: transb
+     character(c_char)   , intent(in)  , value :: transa
+     character(c_char)   , intent(in)  , value :: transb
     integer (c_int64_t) , intent(in)  , value :: m
     integer (c_int64_t) , intent(in)  , value :: n
     real    (c_double ) , intent(in)          :: A(lda,*)
@ -1478,19 +1611,19 @@ end function qmckl_distance_rescaled_deriv_e_f
     integer (c_int64_t) , intent(in)  , value :: ldc
     real    (c_double ) , intent(in)  , value :: rescale_factor_kappa

-     integer(c_int32_t), external :: qmckl_distance_rescaled_deriv_e_f
-     info = qmckl_distance_rescaled_deriv_e_f &
+     integer(c_int32_t), external :: qmckl_distance_rescaled_gl_f
+     info = qmckl_distance_rescaled_gl_f &
            (context, transa, transb, m, n, A, lda, B, ldb, C, ldc, rescale_factor_kappa)

-   end function qmckl_distance_rescaled_deriv_e
+   end function qmckl_distance_rescaled_gl
   #+end_src

-   #+CALL: generate_f_interface(table=qmckl_distance_rescaled_deriv_e_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
+   #+CALL: generate_f_interface(table=qmckl_distance_rescaled_gl_args,rettyp=get_value("FRetType"),fname=get_value("Name"))

   #+RESULTS:
   #+begin_src f90 :tangle (eval fh_func) :comments org :exports none
   interface
-     integer(c_int32_t) function qmckl_distance_rescaled_deriv_e &
+     integer(c_int32_t) function qmckl_distance_rescaled_gl &
         (context, transa, transb, m, n, A, lda, B, ldb, C, ldc, rescale_factor_kappa) &
         bind(C)
       use, intrinsic :: iso_c_binding
@ -1498,8 +1631,8 @@ end function qmckl_distance_rescaled_deriv_e_f
       implicit none

       integer (c_int64_t) , intent(in)  , value :: context
-       character           , intent(in)  , value :: transa
-       character           , intent(in)  , value :: transb
+       character(c_char)   , intent(in)  , value :: transa
+       character(c_char)   , intent(in)  , value :: transb
       integer (c_int64_t) , intent(in)  , value :: m
       integer (c_int64_t) , intent(in)  , value :: n
       real    (c_double ) , intent(in)          :: A(lda,*)
@ -1510,7 +1643,7 @@ end function qmckl_distance_rescaled_deriv_e_f
       integer (c_int64_t) , intent(in)  , value :: ldc
       real    (c_double ) , intent(in)  , value :: rescale_factor_kappa

-     end function qmckl_distance_rescaled_deriv_e
+     end function qmckl_distance_rescaled_gl
   end interface
   #+end_src

--- a/org/qmckl_electron.org
+++ b/org/qmckl_electron.org
@ -112,7 +112,7 @@ typedef struct qmckl_walker_struct {
  int64_t num;
  qmckl_point_struct point;
 } qmckl_walker;
-  
+
 typedef struct qmckl_electron_struct {
  int64_t        num;
  int64_t        up_num;
@ -287,7 +287,7 @@ qmckl_set_electron_coord(qmckl_context context,
 {

  int32_t mask = 0;
-  
+
  <<pre2>>

  if (transp != 'N' && transp != 'T') {
@ -347,9 +347,9 @@ interface
    implicit none

    integer (c_int64_t) , intent(in)  , value :: context
-    character           , intent(in)  , value :: transp
+    character(c_char)   , intent(in)  , value :: transp
    integer (c_int64_t) , intent(in)  , value :: walk_num
-    double precision    , intent(in)          :: coord(*)
+    real(c_double)      , intent(in)          :: coord(*)
    integer (c_int64_t) , intent(in)  , value :: size_max
  end function
 end interface
@ -723,7 +723,7 @@ qmckl_exit_code qmckl_provide_ee_distance(qmckl_context context)
      free(ctx->electron.ee_distance);
      ctx->electron.ee_distance = NULL;
    }
-    
+
    /* Allocate array */
    if (ctx->electron.ee_distance == NULL) {

--- a/org/qmckl_error.org
+++ b/org/qmckl_error.org
@ -362,7 +362,7 @@ void qmckl_string_of_error_f(const qmckl_exit_code error, char result[<<MAX_STRI
       import
       implicit none
       integer (qmckl_exit_code), intent(in), value :: error
-       character, intent(out) :: string(<<MAX_STRING_LENGTH()>>)
+       character(c_char), intent(out) :: string(<<MAX_STRING_LENGTH()>>)
     end subroutine qmckl_string_of_error
  end interface
  #+end_src
@ -440,7 +440,7 @@ qmckl_set_error(qmckl_context context,

  Upon error, the error type and message can be obtained from the
  context using ~qmckl_get_error~. The message and function name
-  is returned in the variables provided. Therefore, passing a 
+  is returned in the variables provided. Therefore, passing a
  function name and message is mandatory.

  # Header
@ -494,7 +494,7 @@ qmckl_get_error(qmckl_context context,
    #+end_src

 * Failing
-  
+
   To make a function fail, the ~qmckl_failwith~ function should be
   called, such that information about the failure is stored in
   the context. The desired exit code is given as an argument, as
@ -511,7 +511,7 @@ qmckl_failwith(qmckl_context context,
               const char* function,
               const char* message) ;
   #+end_src
-   
+
   #+begin_src c :comments org :tangle (eval c) :exports none
 qmckl_exit_code
 qmckl_failwith(qmckl_context context,
@ -593,7 +593,7 @@ qmckl_last_error(qmckl_context context, char* buffer) {
  return QMCKL_SUCCESS;
 }
    #+end_src
-    
+
 ** Fortran inteface

   #+begin_src f90 :tangle (eval fh_func) :exports none :noweb yes
@ -603,7 +603,7 @@ qmckl_last_error(qmckl_context context, char* buffer) {
       import
       implicit none
       integer (c_int64_t) , intent(in), value :: context
-       character, intent(out) :: string(*)
+       character(c_char), intent(out) :: string(*)
     end subroutine qmckl_last_error
  end interface
   #+end_src
@ -625,7 +625,7 @@ qmckl_check(qmckl_context context, qmckl_exit_code rc);

 qmckl_exit_code
 qmckl_check(qmckl_context context, qmckl_exit_code rc)
-{  
+{

  if (rc != QMCKL_SUCCESS) {
    char fname[QMCKL_MAX_FUN_LEN];
@ -639,7 +639,7 @@ qmckl_check(qmckl_context context, qmckl_exit_code rc)
  return rc;
 }
    #+end_src
-    
+
  It should be used as:

    #+begin_src c
@ -648,7 +648,7 @@ rc = qmckl_check(context,
                 );
 assert (rc == QMCKL_SUCCESS);
     #+end_src
-    
+
 ** Fortran inteface

   #+begin_src f90 :tangle (eval fh_func) :exports none :noweb yes
--- a/org/qmckl_examples.org
+++ b/org/qmckl_examples.org
@ -1,28 +1,72 @@
 #+TITLE: Code examples
 #+SETUPFILE: ../tools/theme.setup
 #+INCLUDE: ../tools/lib.org
-  
-In this section, we present examples of usage of QMCkl.
-For simplicity, we assume that the wave function parameters are stored
-in a [[https://github.com/TREX-CoE/trexio][TREXIO]] file.

-* Python
-** Check numerically that MOs are orthonormal
+In this section, we provide hands-on examples to demonstrate the capabilities
+of the QMCkl library.  We furnish code samples in C, Fortran, and Python,
+serving as exhaustive tutorials for effectively leveraging QMCkl.
+For simplicity, we assume that the wave function parameters are stored in a
+[[https://github.com/TREX-CoE/trexio][TREXIO]] file.
+
+* Overlap matrix in the MO basis
+
+The focal point of this example is the numerical evaluation of the overlap
+matrix in the MO basis. Utilizing QMCkl, we approximate the MOs at
+discrete grid points to compute the overlap matrix \( S_{ij} \) as follows:
+\[
+S_{ij} = \int \phi_i(\mathbf{r})\, \phi_j(\mathbf{r}) \text{d}\mathbf{r} \approx
+         \sum_k \phi_i(\mathbf{r}_k)\, \phi_j(\mathbf{r}_k) \delta\mathbf{r}
+\]
+
+
+The code starts by reading a wave function from a TREXIO file.  This is
+accomplished using the ~qmckl_trexio_read~ function, which populates a
+~qmckl_context~ with the necessary wave function parameters.  The context
+serves as the primary interface for interacting with the QMCkl library,
+encapsulating the state and configurations of the system.
+Subsequently, the code retrieves various attributes such as the number of
+nuclei ~nucl_num~ and coordinates ~nucl_coord~.
+These attributes are essential for setting up the integration grid.
+
+The core of the example lies in the numerical computation of the overlap
+matrix. To achieve this, the code employs a regular grid in three-dimensional
+space, and the grid points are then populated into the ~qmckl_context~ using
+the ~qmckl_set_point~ function.
+
+The MO values at these grid points are computed using the
+~qmckl_get_mo_basis_mo_value~ function.  These values are then used to
+calculate the overlap matrix through a matrix multiplication operation
+facilitated by the ~qmckl_dgemm~ function.
+
+The code is also instrumented to measure the execution time for the MO
+value computation, providing an empirical assessment of the computational
+efficiency. Error handling is robustly implemented at each stage to ensure the
+reliability of the simulation.
+
+In summary, this example serves as a holistic guide for leveraging the QMCkl
+library, demonstrating its ease of use. It provides a concrete starting point
+for researchers and developers interested in integrating QMCkl into their QMC
+code.
+
+** Python
+   :PROPERTIES:
+   :header-args: :tangle mo_ortho.py
+   :END:

   In this example, we will compute numerically the overlap
   between the molecular orbitals:
-   
+
   \[
      S_{ij} = \int \phi_i(\mathbf{r}) \phi_j(\mathbf{r})
   \text{d}\mathbf{r} \sim \sum_{k=1}^{N} \phi_i(\mathbf{r}_k)
-   \phi_j(\mathbf{r}_k) \delta \mathbf{r} 
+   \phi_j(\mathbf{r}_k) \delta \mathbf{r}
   \]
   \[
-      S_{ij} = \langle \phi_i | \phi_j \rangle 
+      S_{ij} = \langle \phi_i | \phi_j \rangle
   \sim \sum_{k=1}^{N} \langle \phi_i | \mathbf{r}_k \rangle
   \langle \mathbf{r}_k | \phi_j \rangle
   \]
-   
+

   #+begin_src python :exports code
 import numpy as np
@ -30,11 +74,11 @@ import qmckl
   #+end_src

   #+RESULTS:
-   
+
   First, we create a context for the QMCkl calculation, and load the
   wave function stored in =h2o_5z.h5= inside it. It is a Hartree-Fock
   determinant for the water molecule in the cc-pV5Z basis set.
-   
+
   #+begin_src python :exports code
 trexio_filename = "..//share/qmckl/test_data/h2o_5z.h5"

@ -49,7 +93,7 @@ qmckl.trexio_read(context, trexio_filename)
   molecule.

   We fetch the nuclear coordinates from the context,
-   
+
   #+begin_src python :exports code
 nucl_num = qmckl.get_nucleus_num(context)

@ -72,7 +116,7 @@ for i in range(nucl_num):
   #+end_example

   and compute the coordinates of the grid points:
-   
+
   #+begin_src python :exports code
 nx = ( 120, 120, 120 )
 shift = np.array([5.,5.,5.])
@ -94,7 +138,7 @@ dr = step[0] * step[1] * step[2]
   #+end_src

   #+RESULTS:
-   
+
   Now the grid is ready, we can create the list of grid points
   $\mathbf{r}_k$ on which the MOs $\phi_i$ will be evaluated, and
   transfer them to the QMCkl context:
@ -113,7 +157,7 @@ qmckl.set_point(context, 'N', point_num, np.reshape(point, (point_num*3)))

   #+RESULTS:
   : None
-   
+
   Then, we evaluate all the MOs at the grid points (and time the execution),
   and thus obtain the matrix $M_{ki} = \langle \mathbf{r}_k | \phi_i \rangle =
   \phi_i(\mathbf{r}_k)$.
@ -127,7 +171,7 @@ before   = time.time()
 mo_value = qmckl.get_mo_basis_mo_value(context, point_num*mo_num)
 after    = time.time()

-mo_value = np.reshape( mo_value, (point_num, mo_num) )
+mo_value = np.reshape( mo_value, (point_num, mo_num) ).T   # Transpose to get mo_num x point_num

 print("Number of MOs: ", mo_num)
 print("Number of grid points: ", point_num)
@ -138,14 +182,14 @@ print("Execution time : ", (after - before), "seconds")
   #+begin_example
 Number of MOs:  201
 Number of grid points:  1728000
-Execution time :  3.511528968811035 seconds
+Execution time :  5.577778577804565 seconds
   #+end_example

   and finally we compute the overlap between all the MOs as
-   $M^\dagger M$.
+   $M.M^\dagger$.

   #+begin_src python :exports code
-overlap = mo_value.T @ mo_value * dr
+overlap = mo_value @ mo_value.T * dr
 print (overlap)
   #+end_src

@ -165,6 +209,477 @@ print (overlap)
      1.18264754e-09  8.97215950e-01]]
   #+end_example

+** C
+   In this example, electron-nucleus cusp fitting is added.
+
+   :PROPERTIES:
+   :header-args: :tangle mo_ortho.c
+   :END:
+
+   In this example, we will compute numerically the overlap
+   between the molecular orbitals:
+
+   \[
+      S_{ij} = \int \phi_i(\mathbf{r}) \phi_j(\mathbf{r})
+   \text{d}\mathbf{r} \sim \sum_{k=1}^{N} \phi_i(\mathbf{r}_k)
+   \phi_j(\mathbf{r}_k) \delta \mathbf{r}
+   \]
+   \[
+      S_{ij} = \langle \phi_i | \phi_j \rangle
+   \sim \sum_{k=1}^{N} \langle \phi_i | \mathbf{r}_k \rangle
+   \langle \mathbf{r}_k | \phi_j \rangle
+   \]
+
+   We apply the cusp fitting procedure, so the MOs might deviate
+   slightly from orthonormality.
+
+   #+begin_src c :exports code
+#include <qmckl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+int main(int argc, char** argv)
+{
+  const char* trexio_filename = "..//share/qmckl/test_data/h2o_5z.h5";
+  qmckl_exit_code rc = QMCKL_SUCCESS;
+   #+end_src
+
+   First, we create a context for the QMCkl calculation, and load the
+   wave function stored in =h2o_5z.h5= inside it. It is a Hartree-Fock
+   determinant for the water molecule in the cc-pV5Z basis set.
+
+   #+begin_src c :exports code
+  qmckl_context context = qmckl_context_create();
+
+  rc = qmckl_trexio_read(context, trexio_filename, strlen(trexio_filename));
+
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error reading TREXIO file:\n%s\n", qmckl_string_of_error(rc));
+    exit(1);
+  }
+
+   #+end_src
+
+   We impose the electron-nucleus cusp fitting to occur when the
+   electrons are close to the nuclei. The critical distance
+   is 0.5 atomic units for hydrogens and 0.1 for the oxygen.
+   To identify which atom is an oxygen and which are hydrogens, we
+   fetch the nuclear charges from the context.
+
+   #+begin_src c :exports code
+  int64_t nucl_num;
+
+  rc = qmckl_get_nucleus_num(context, &nucl_num);
+
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error getting nucl_num:\n%s\n", qmckl_string_of_error(rc));
+    exit(1);
+  }
+
+
+  double nucl_charge[nucl_num];
+
+  rc = qmckl_get_nucleus_charge(context, &(nucl_charge[0]), nucl_num);
+
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error getting nucl_charge:\n%s\n", qmckl_string_of_error(rc));
+    exit(1);
+  }
+
+
+  double r_cusp[nucl_num];
+
+  for (size_t i=0 ; i<nucl_num ; ++i) {
+
+    switch ((int) nucl_charge[i]) {
+
+    case 1:
+      r_cusp[i] = 0.5;
+      break;
+
+    case 8:
+      r_cusp[i] = 0.1;
+      break;
+    }
+
+  }
+
+
+  rc = qmckl_set_mo_basis_r_cusp(context, &(r_cusp[0]), nucl_num);
+
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error setting r_cusp:\n%s\n", qmckl_string_of_error(rc));
+    exit(1);
+  }
+
+
+   #+end_src
+
+
+   We now define the grid points $\mathbf{r}_k$ as a regular grid around the
+   molecule.
+   We fetch the nuclear coordinates from the context,
+
+   #+begin_src c :exports code
+  double nucl_coord[nucl_num][3];
+
+  rc = qmckl_get_nucleus_coord(context, 'N', &(nucl_coord[0][0]), nucl_num*3);
+
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error getting nucl_coord:\n%s\n", qmckl_string_of_error(rc));
+    exit(1);
+  }
+
+  for (size_t i=0 ; i<nucl_num ; ++i) {
+    printf("%d  %+f %+f %+f\n",
+           (int32_t) nucl_charge[i],
+           nucl_coord[i][0],
+           nucl_coord[i][1],
+           nucl_coord[i][2]);
+  }
+   #+end_src
+
+   #+begin_example
+8  +0.000000 +0.000000 +0.000000
+1  -1.430429 +0.000000 -1.107157
+1  +1.430429 +0.000000 -1.107157
+   #+end_example
+
+   and compute the coordinates of the grid points:
+
+   #+begin_src c :exports code
+  size_t nx[3] = { 120, 120, 120 };
+  double shift[3] = {5.,5.,5.};
+  int64_t point_num = nx[0] * nx[1] * nx[2];
+
+  double rmin[3] = { nucl_coord[0][0], nucl_coord[0][1], nucl_coord[0][2] } ;
+  double rmax[3] = { nucl_coord[0][0], nucl_coord[0][1], nucl_coord[0][2] } ;
+
+  for (size_t i=0 ; i<nucl_num ; ++i) {
+    for (int j=0 ; j<3 ; ++j) {
+      rmin[j] = nucl_coord[i][j] < rmin[j] ? nucl_coord[i][j] : rmin[j];
+      rmax[j] = nucl_coord[i][j] > rmax[j] ? nucl_coord[i][j] : rmax[j];
+    }
+  }
+
+  rmin[0] -= shift[0]; rmin[1] -= shift[1]; rmin[2] -= shift[2];
+  rmax[0] += shift[0]; rmax[1] += shift[1]; rmax[2] += shift[2];
+
+  double step[3];
+
+  double* linspace[3];
+  for (int i=0 ; i<3 ; ++i) {
+
+    linspace[i] = (double*) calloc( sizeof(double), nx[i] );
+
+    if (linspace[i] == NULL) {
+      fprintf(stderr, "Allocation failed (linspace)\n");
+      exit(1);
+    }
+
+    step[i] = (rmax[i] - rmin[i]) / ((double) (nx[i]-1));
+
+    for (size_t j=0 ; j<nx[i] ; ++j) {
+      linspace[i][j] = rmin[i] + j*step[i];
+    }
+
+  }
+
+  double dr = step[0] * step[1] * step[2];
+   #+end_src
+
+   Now the grid is ready, we can create the list of grid points
+   $\mathbf{r}_k$ on which the MOs $\phi_i$ will be evaluated, and
+   transfer them to the QMCkl context:
+
+   #+begin_src c :exports code
+  double* point = (double*) calloc(sizeof(double), 3*point_num);
+
+  if (point == NULL) {
+    fprintf(stderr, "Allocation failed (point)\n");
+    exit(1);
+  }
+
+  size_t m = 0;
+  for (size_t i=0 ; i<nx[0] ; ++i) {
+    for (size_t j=0 ; j<nx[1] ; ++j) {
+      for (size_t k=0 ; k<nx[2] ; ++k) {
+
+        point[m] = linspace[0][i];
+        m++;
+
+        point[m] = linspace[1][j];
+        m++;
+
+        point[m] = linspace[2][k];
+        m++;
+
+      }
+    }
+  }
+
+  rc = qmckl_set_point(context, 'N', point_num, point, (point_num*3));
+
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error setting points:\n%s\n", qmckl_string_of_error(rc));
+    exit(1);
+  }
+   #+end_src
+
+   #+RESULTS:
+   : None
+
+   Then, we evaluate all the MOs at the grid points (and time the execution),
+   and thus obtain the matrix $M_{ki} = \langle \mathbf{r}_k | \phi_i
+   \rangle = \phi_i(\mathbf{r}_k)$.
+
+   #+begin_src c :exports code
+
+  int64_t mo_num;
+  rc = qmckl_get_mo_basis_mo_num(context, &mo_num);
+
+  long before, after;
+  struct timeval timecheck;
+
+  double* mo_value = (double*) calloc(sizeof(double), point_num*mo_num);
+  if (mo_value == NULL) {
+    fprintf(stderr, "Allocation failed (mo_value)\n");
+    exit(1);
+  }
+
+  gettimeofday(&timecheck, NULL);
+  before = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000;
+
+  rc = qmckl_get_mo_basis_mo_value(context, mo_value, point_num*mo_num);
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error getting mo_value)\n");
+    exit(1);
+  }
+
+  gettimeofday(&timecheck, NULL);
+  after = (long)timecheck.tv_sec * 1000 + (long)timecheck.tv_usec / 1000;
+
+  printf("Number of MOs: %ld\n", mo_num);
+  printf("Number of grid points: %ld\n", point_num);
+  printf("Execution time : %f seconds\n", (after - before)*1.e-3);
+
+   #+end_src
+
+   #+begin_example
+Number of MOs:  201
+Number of grid points:  1728000
+Execution time :  5.608000 seconds
+   #+end_example
+
+   and finally we compute the overlap between all the MOs as
+   $M.M^\dagger$.
+
+   #+begin_src c :exports code
+  double* overlap = (double*) malloc (mo_num*mo_num*sizeof(double));
+
+  rc = qmckl_dgemm(context, 'N', 'T', mo_num, mo_num, point_num, dr,
+                   mo_value, mo_num, mo_value, mo_num, 0.0,
+                   overlap, mo_num);
+
+  for (size_t i=0 ; i<mo_num ; ++i) {
+    printf("%4ld", i);
+    for (size_t j=0 ; j<mo_num ; ++j) {
+      printf(" %f", overlap[i*mo_num+j]);
+    }
+    printf("\n");
+  }
+
+  // Clean-up and exit
+  free(mo_value);
+  free(overlap);
+
+  rc = qmckl_context_destroy(context);
+  if (rc != QMCKL_SUCCESS) {
+    fprintf(stderr, "Error destroying context)\n");
+    exit(1);
+  }
+
+  return 0;
+}
+   #+end_src
+
+   #+begin_example
+   0 0.988765 0.002336 0.000000 -0.000734 0.000000 0.000530 0.000000 0.000446 0.000000 -0.000000 -0.000511 -0.000000 -0.000267 0.000000 0.000000 0.001007 0.000000 0.000168 -0.000000 -0.000000 -0.000670 -0.000000 0.000000 -0.000251 -0.000261 -0.000000 -0.000000 -0.000000 -0.000397 -0.000000 -0.000810 0.000000 0.000231 -0.000000 -0.000000 0.000000 -0.000000
+   ...
+ 200 0.039017 -0.013066 -0.000000 -0.001935 -0.000000 -0.003829 -0.000000 0.000996 -0.000000 0.000000 -0.003733 0.000000 0.000065 -0.000000 -0.000000 -0.002220 -0.000000 -0.001961 0.000000 0.000000 -0.004182 0.000000 -0.000000 -0.000165 -0.002445 0.000000 -0.000000 0.000000 0.001985 0.000000 0.001685 -0.000000 -0.002899 0.000000 0.000000 0.000000 -0.000000 0.002591 0.000000 -0.000000 0.000000 0.002385 0.000000 -0.011086 0.000000 -0.003885 0.000000 -0.000000 0.003602 -0.000000 0.000000 -0.003241 0.000000 0.000000 0.002613 -0.007344 -0.000000 -0.000000 0.000000 0.000017 0.000000 0.000000 0.000000 -0.008719 0.000000 -0.001358 -0.003233 0.000000 -0.000000 -0.000000 -0.000000 0.000000 0.003778 0.000000 0.000000 -0.000000 0.000000 0.000000 -0.001190 0.000000 0.000000 -0.000000 0.005578 -0.000000 -0.001502 0.003899 -0.000000 -0.000000 0.000000 -0.003291 -0.001775 -0.000000 -0.002374 0.000000 -0.000000 -0.000000 -0.000000 -0.001290 -0.000000 0.002178 0.000000 0.000000 0.000000 -0.001252 0.000000 -0.000000 -0.000926 0.000000 -0.000000 -0.013130 -0.000000 0.012124 0.000000 -0.000000 -0.000000 -0.000000 0.000000 0.025194 0.000343 -0.000000 0.000000 -0.000000 -0.004421 0.000000 0.000000 -0.000599 -0.000000 0.005289 0.000000 -0.000000 0.012826 -0.000000 0.000000 0.006190 0.000000 0.000000 -0.000000 0.000000 -0.000321 0.000000 -0.000000 -0.000000 0.000000 -0.000000 0.001499 -0.006629 0.000000 0.000000 0.000000 -0.000000 0.008737 -0.000000 0.006880 0.000000 -0.001851 -0.000000 -0.000000 0.000000 -0.007464 0.000000 0.010298 -0.000000 -0.000000 -0.000000 -0.000000 -0.000000 0.000000 0.000540 0.000000 -0.006616 -0.000000 0.000000 -0.002927 -0.000000 0.000000 0.010352 0.000000 -0.003103 -0.000000 -0.007640 -0.000000 -0.000000 0.005302 0.000000 0.000000 -0.000000 -0.000000 -0.010181 0.000000 -0.001108 0.000000 0.000000 -0.000000 0.000000 0.000000 -0.000998 -0.009754 0.013562 0.000000 -0.000000 0.887510
+   #+end_example
+
+   
+** Fortran
+   Here is the same piece of code translated in Fortran
+   #+begin_src f90
+#include <qmckl_f.F90>
+
+program main
+  use iso_c_binding
+  use qmckl
+  implicit none
+
+  ! Declare variables
+  integer :: argc
+  character(128) :: trexio_filename, err_msg
+  integer(qmckl_exit_code) :: rc
+  integer(qmckl_context) :: context
+  integer*8 :: nucl_num, mo_num, point_num
+  double precision, allocatable :: nucl_coord(:,:)
+  integer*8  :: nx(3)
+  double precision, dimension(3) :: shift, step, rmin, rmax
+  double precision, allocatable :: mo_value(:,:), overlap(:,:), point(:), linspace(:,:)
+  double precision :: before, after, dr
+  integer*8 :: i,j,k,m
+
+  ! Initialize variables
+  err_msg = ' ' 
+  argc = command_argument_count()
+  if (argc /= 1) then
+    print *, "Usage: ./program <TREXIO filename>"
+    stop -1
+  end if
+  call get_command_argument(1, trexio_filename)
+  rc = QMCKL_SUCCESS
+
+  ! Create a QMCkl context
+  context = qmckl_context_create()
+
+  ! Read the TREXIO file into the context
+  rc = qmckl_trexio_read(context, trim(trexio_filename), len(trexio_filename)*1_8)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error reading TREXIO file:", err_msg
+    stop -1
+  end if
+
+  ! Retrieve the number of nuclei
+  rc = qmckl_get_nucleus_num(context, nucl_num)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error getting nucl_num:", err_msg
+    stop -1
+  end if
+
+  ! Retrieve the nuclear coordinates
+  allocate(nucl_coord(3, nucl_num))
+  rc = qmckl_get_nucleus_coord(context, 'N', nucl_coord, nucl_num * 3_8)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error getting nucl_coord:", err_msg
+    stop -1
+  end if
+
+  ! Retrieve the number of MOs
+  rc = qmckl_get_mo_basis_mo_num(context, mo_num)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error getting mo_num:", err_msg
+    stop -1
+  end if
+
+  ! Initialize grid points for the calculation
+  nx = (/ 120, 120, 120 /)
+  shift = (/ 5.d0, 5.d0, 5.d0 /)
+  point_num = nx(1) * nx(2) * nx(3)
+
+  ! Initialize rmin and rmax
+  rmin = nucl_coord(:,1)
+  rmax = nucl_coord(:,1)
+
+  ! Update rmin and rmax based on nucl_coord
+  do i = 1, 3
+    do j = 1, nucl_num
+      rmin(i) = min(nucl_coord(i,j), rmin(i))
+      rmax(i) = max(nucl_coord(i,j), rmax(i))
+    end do
+  end do
+
+  ! Apply shift
+  rmin = rmin - shift
+  rmax = rmax + shift
+
+  ! Initialize linspace and step
+  allocate(linspace(3, maxval(nx)))
+
+  do i = 1, 3
+    step(i) = (rmax(i) - rmin(i)) / real(nx(i) - 1, 8)
+    do j = 1, nx(i)
+      linspace(i, j) = rmin(i) + (j - 1) * step(i)
+    end do
+  end do
+
+  ! Initialize point array
+  allocate(point(3 * point_num))
+  m = 1
+  do i = 1, nx(1)
+    do j = 1, nx(2)
+      do k = 1, nx(3)
+        point(m) = linspace(1, i); m = m + 1
+        point(m) = linspace(2, j); m = m + 1
+        point(m) = linspace(3, k); m = m + 1
+      end do
+    end do
+  end do
+
+  deallocate(linspace)
+
+
+  ! Set points in QMCKL context
+  rc = qmckl_set_point(context, 'N', point_num, point, point_num * 3)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error setting point:", err_msg
+    stop -1
+  end if
+
+
+
+
+  ! Perform the actual calculation and measure the time taken
+  call cpu_time(before)
+  allocate(mo_value(point_num, mo_num))
+  rc = qmckl_get_mo_basis_mo_value(context, mo_value, point_num * mo_num)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error getting mo_value:", err_msg
+    stop
+  end if
+  call cpu_time(after)
+
+  write(*,*) "Number of MOs:", mo_num
+  write(*,*) "Number of grid points:", point_num
+  write(*,*) "Execution time:", (after - before), "seconds"
+
+  ! Compute the overlap matrix
+  dr = step(1) * step(2) * step(3)
+
+  allocate(overlap(mo_num, mo_num))
+  rc = qmckl_dgemm(context, 'N', 'T', mo_num, mo_num, point_num, dr, &
+                   mo_value, mo_num, mo_value, mo_num, 0.d0, overlap, mo_num)
+
+  ! Print the overlap matrix
+  do i = 1, mo_num
+    write(*,'(i4)', advance='no') i
+    do j = 1, mo_num
+      write(*,'(f8.4)', advance='no') overlap(i, j)
+    end do
+    write(*,*)
+  end do
+
+  ! Clean-up and exit
+  deallocate(mo_value, overlap)
+  rc = qmckl_context_destroy(context)
+  if (rc /= QMCKL_SUCCESS) then
+    call qmckl_string_of_error(rc, err_msg)
+    write(*,*) "Error destroying context:", err_msg
+    stop -1
+  end if
+
+end program main
+   #+end_src
 * Fortran
 ** Checking errors

@ -173,7 +688,7 @@ print (overlap)
   error in text format and exits the program.

   #+NAME: qmckl_check_error
-   #+begin_src f90 
+   #+begin_src f90
 subroutine qmckl_check_error(rc, message)
  use qmckl
  implicit none
@ -188,7 +703,7 @@ subroutine qmckl_check_error(rc, message)
  end if
 end subroutine qmckl_check_error
   #+end_src
-  
+
 ** Computing an atomic orbital on a grid
   :PROPERTIES:
   :header-args: :tangle ao_grid.f90
@ -200,14 +715,14 @@ end subroutine qmckl_check_error
   atomic units in the borders.

   This program uses the ~qmckl_check_error~ function defined above.
-  
+
   To use this program, run
-  
+
   #+begin_src bash :tangle no :exports code
 $ ao_grid <trexio_file> <AO_id> <point_num>
   #+end_src

-  
+
   #+begin_src f90  :noweb yes
 <<qmckl_check_error>>

@ -237,7 +752,7 @@ program ao_grid

   Start by fetching the command-line arguments:

-   #+begin_src f90 
+   #+begin_src f90
  if (iargc() /= 3) then
     print *, 'Syntax: ao_grid <trexio_file> <AO_id> <point_num>'
     call exit(-1)
@ -257,7 +772,7 @@ program ao_grid
   Create the QMCkl context and initialize it with the wave function
   present in the TREXIO file:

-   #+begin_src f90 
+   #+begin_src f90
  qmckl_ctx = qmckl_context_create()
  rc  = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename)))
  call qmckl_check_error(rc, 'Read TREXIO')
@ -265,8 +780,8 @@ program ao_grid

   We need to check that ~ao_id~ is in the range, so we get the total
   number of AOs from QMCkl:
-  
-   #+begin_src f90 
+
+   #+begin_src f90
  rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num)
  call qmckl_check_error(rc, 'Getting ao_num')

@ -279,7 +794,7 @@ program ao_grid
   Now we will compute the limits of the box in which the molecule fits.
   For that, we first need to ask QMCkl the coordinates of nuclei.

-   #+begin_src f90 
+   #+begin_src f90
  rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num)
  call qmckl_check_error(rc, 'Get nucleus num')

@ -291,11 +806,11 @@ program ao_grid
   We now compute the coordinates of opposite points of the box, and
   the distance between points along the 3 directions:

-   #+begin_src f90 
+   #+begin_src f90
  rmin(1) = minval( nucl_coord(1,:) ) - 5.d0
  rmin(2) = minval( nucl_coord(2,:) ) - 5.d0
  rmin(3) = minval( nucl_coord(3,:) ) - 5.d0
-     
+
  rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0
  rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0
  rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0
@ -305,8 +820,8 @@ program ao_grid

   We now produce the list of point coordinates where the AO will be
   evaluated:
-  
-   #+begin_src f90 
+
+   #+begin_src f90
  point_num = point_num_x**3
  allocate( points(point_num, 3) )
  ipoint=0
@ -327,19 +842,19 @@ program ao_grid
     z = z + dr(3)
  end do
   #+end_src
-  
+
   We give the points to QMCkl:

-   #+begin_src f90 
+   #+begin_src f90
  rc = qmckl_set_point(qmckl_ctx, 'T', point_num, points, size(points)*1_8 )
  call qmckl_check_error(rc, 'Setting points')
   #+end_src

   We allocate the space required to retrieve the values, gradients and
   Laplacian of all AOs, and ask to retrieve the values of the
-   AOs computed at the point positions. 
-  
-   #+begin_src f90 
+   AOs computed at the point positions.
+
+   #+begin_src f90
  allocate( ao_vgl(ao_num, 5, point_num) )
  rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num)
  call qmckl_check_error(rc, 'Setting points')
@ -347,13 +862,13 @@ program ao_grid

   We finally print the value and Laplacian of the AO:

-   #+begin_src f90 
+   #+begin_src f90
  do ipoint=1, point_num
     print '(3(F10.6,X),2(E20.10,X))', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint), ao_vgl(ao_id,5,ipoint)
  end do
   #+end_src

-   #+begin_src f90 
+   #+begin_src f90
  deallocate( nucl_coord, points, ao_vgl )
 end program ao_grid
   #+end_src
--- a/org/qmckl_jastrow_champ.org
+++ b/org/qmckl_jastrow_champ.org
--- a/org/qmckl_mo.org
+++ b/org/qmckl_mo.org
@ -48,7 +48,7 @@ distance is smaller than a certain radius $r_{\text{cusp}}$. At
 distances smaller than $r_{\text{cusp}}$, the MOs are replaced by
 functions that have the correct electron-nucleus cusp condition and
 that ensure that the values and the gradients of the MOs are
-continuous at $r_{\text{cusp}}$. 
+continuous at $r_{\text{cusp}}$.

 A radius $r_{\text{cusp}\, A}$ is given for each nucleus $A$, default is
 zero. If an electron is closer to the nucleus $A$ than
@ -282,7 +282,7 @@ qmckl_exit_code  qmckl_set_mo_basis_coefficient(qmckl_context context,
                             "qmckl_set_mo_basis_coefficient",
                             "Array too small");
  }
-  
+
  qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
  mem_info.size = ctx->ao_basis.ao_num * ctx->mo_basis.mo_num * sizeof(double);
  double* new_array = (double*) qmckl_malloc(context, mem_info);
@ -350,7 +350,7 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) {

  ctx->mo_basis.coefficient_t = new_array;

-  qmckl_exit_code rc; 
+  qmckl_exit_code rc;
  if (ctx->mo_basis.mo_vgl != NULL) {
    rc = qmckl_free(context, ctx->mo_basis.mo_vgl);
    if (rc != QMCKL_SUCCESS) return rc;
@ -373,11 +373,11 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) {

   To activate the cusp adjustment, the user must enter the radius of
   the fitting function for each atom.
-   
+
   This function requires the computation of the value and gradients
   of the $s$ AOs at the distance equal to the radius, and the values
   of the non-$s$ AOs at the center.
-   
+
   #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
 qmckl_exit_code
 qmckl_set_mo_basis_r_cusp(qmckl_context context,
@ -391,19 +391,19 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
  qmckl_exit_code rc;

  qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
-  
+
  if (r_cusp == NULL) {
      return qmckl_failwith( context, QMCKL_INVALID_ARG_2,
                             "qmckl_set_mo_basis_r_cusp",
                             "r_cusp: Null pointer");
  }
-        
+
  if (size_max < ctx->nucleus.num) {
    return qmckl_failwith( context, QMCKL_INVALID_ARG_3,
                             "qmckl_set_mo_basis_r_cusp",
                             "Array too small");
  }
-  
+

  // Nullify r_cusp
  if (ctx->mo_basis.r_cusp != NULL) {
@ -422,7 +422,7 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
    qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
    mem_info.size = old_point_num * 3 * sizeof(double);
    old_coord = (double*) qmckl_malloc(context, mem_info);
-    
+
    rc = qmckl_get_point(context, 'T', old_coord,  (old_point_num * 3));
    if (rc != QMCKL_SUCCESS) {
      return qmckl_failwith( context,
@ -450,25 +450,25 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
  }


-  
+
  // Allocate cusp parameters and set them to zero
  {
    if (ctx->mo_basis.cusp_param.size[0] != 0) {
      rc = qmckl_tensor_free(context, &(ctx->mo_basis.cusp_param));
      if (rc != QMCKL_SUCCESS) return rc;
    }
-    
+
    int64_t sze[3] = { ctx->mo_basis.mo_num, 4, ctx->nucleus.num };
    ctx->mo_basis.cusp_param = qmckl_tensor_alloc(context, 3, &(sze[0]));
    ctx->mo_basis.cusp_param = qmckl_tensor_set(ctx->mo_basis.cusp_param, 0.);
  }

-  
+
  // Evaluate MO value at nucleus without s components
  qmckl_matrix mo_value_at_nucl_no_s;
  {
    mo_value_at_nucl_no_s = qmckl_matrix_alloc(context, ctx->mo_basis.mo_num, ctx->nucleus.num);
-    
+
    rc = qmckl_double_of_matrix(context, ctx->nucleus.coord, coord, ctx->nucleus.num * 3);
    if (rc != QMCKL_SUCCESS) return rc;

@ -479,26 +479,26 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
                             "qmckl_set_mo_basis_r_cusp",
                             "Unable to set coordinates at the nuclei");
    }
-    
+
    rc = qmckl_get_mo_basis_mo_value(context,
-                                     &(qmckl_mat(mo_value_at_nucl_no_s,0,0)), 
+                                     &(qmckl_mat(mo_value_at_nucl_no_s,0,0)),
                                     ctx->mo_basis.mo_num * ctx->nucleus.num);
    if (rc != QMCKL_SUCCESS) return rc;
  }

-  
+
  // Evaluate MO vgl at r_cusp without s components
  qmckl_tensor mo_vgl_at_r_cusp_s;
  {
    int64_t sze[3] = { ctx->mo_basis.mo_num, 3, ctx->nucleus.num };
    mo_vgl_at_r_cusp_s = qmckl_tensor_alloc(context, 3, &(sze[0]));
  }
-    
+
  {
    qmckl_tensor ao_vgl_at_r_cusp_s;
    int64_t sze[3] = { ctx->ao_basis.ao_num, 5, ctx->nucleus.num };
    ao_vgl_at_r_cusp_s = qmckl_tensor_alloc(context, 3, &(sze[0]));
-    
+
    rc = qmckl_double_of_matrix(context, ctx->nucleus.coord, coord, ctx->nucleus.num * 3);
    if (rc != QMCKL_SUCCESS) return rc;

@ -513,11 +513,11 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
                             "qmckl_set_mo_basis_r_cusp",
                             "Unable to set coordinates at r_cusp");
    }
-    
+
    rc = qmckl_get_ao_basis_ao_vgl(context,
-                                   &(qmckl_ten3(ao_vgl_at_r_cusp_s,0,0,0)), 
+                                   &(qmckl_ten3(ao_vgl_at_r_cusp_s,0,0,0)),
                                   ctx->ao_basis.ao_num * 5 * ctx->point.num);
-    
+
    for (int64_t inucl=0 ; inucl<ctx->nucleus.num ; ++inucl) {
      for (int64_t i=0 ; i<ctx->mo_basis.mo_num ; ++i) {
        qmckl_ten3(mo_vgl_at_r_cusp_s,i,0,inucl) = 0.;
@ -565,11 +565,11 @@ qmckl_set_mo_basis_r_cusp(qmckl_context context,
       }
     }
  }
-  
+
  free(coord);
  qmckl_matrix_free(context, &mo_value_at_nucl_no_s);
  qmckl_tensor_free(context, &mo_vgl_at_r_cusp_s);
-  
+
  // Restore old points
  if (old_point_num > 0) {
    rc = qmckl_set_point(context, 'T', old_point_num, old_coord, (old_point_num * 3));
@ -688,7 +688,7 @@ qmckl_get_mo_basis_coefficient (const qmckl_context context,
   #+begin_src c :comments org :tangle (eval h_func)
 bool qmckl_mo_basis_provided (const qmckl_context context);
   #+end_src
-   
+
   #+begin_src c :comments org :tangle (eval c) :exports none
 bool qmckl_mo_basis_provided(const qmckl_context context) {

@ -726,7 +726,7 @@ interface
    import
    implicit none
    integer (c_int64_t) , intent(in), value   :: context
-    double precision    , intent(out)         :: coefficient(*)
+    real(c_double)      , intent(out)         :: coefficient(*)
    integer (c_int64_t) , intent(in), value   :: size_max
  end function qmckl_get_mo_basis_coefficient
 end interface
@ -738,7 +738,7 @@ interface
    import
    implicit none
    integer (c_int64_t) , intent(in), value   :: context
-    double precision    , intent(in)          :: r_cusp(*)
+    real(c_double)      , intent(in)          :: r_cusp(*)
    integer (c_int64_t) , intent(in), value   :: size_max
  end function qmckl_set_mo_basis_r_cusp
 end interface
@ -757,7 +757,7 @@ end interface
   size ~mo_num~ can be created. If the integer corresponding to an MO is
   zero, that MO is dropped and will not be included in the
   calculation. If the integer is non-zero, the MO will be kept.
-   
+

   #+begin_src c :comments org :tangle (eval h_func)
 qmckl_exit_code
@ -765,7 +765,7 @@ qmckl_mo_basis_select_mo (const qmckl_context context,
                          const int32_t* keep,
                          const int64_t size_max);
   #+end_src
-   
+
   #+begin_src c :comments org :tangle (eval c) :exports none
 qmckl_exit_code
 qmckl_mo_basis_select_mo (const qmckl_context context,
@ -796,8 +796,8 @@ qmckl_mo_basis_select_mo (const qmckl_context context,
                           "NULL pointer");
  }

-  const int64_t mo_num = ctx->mo_basis.mo_num; 
-  const int64_t ao_num = ctx->ao_basis.ao_num; 
+  const int64_t mo_num = ctx->mo_basis.mo_num;
+  const int64_t ao_num = ctx->ao_basis.ao_num;

  if (size_max < mo_num) {
    return qmckl_failwith( context,
@ -906,7 +906,7 @@ qmckl_get_mo_basis_mo_value(qmckl_context context,
       implicit none

       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: mo_value(*)
+       real(c_double),       intent(out)         :: mo_value(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_mo_basis_mo_value
  end interface
@ -972,7 +972,7 @@ qmckl_get_mo_basis_mo_value_inplace (qmckl_context context,
       import
       implicit none
       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: mo_value(*)
+       real(c_double),       intent(out)         :: mo_value(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_mo_basis_mo_value_inplace
  end interface
@ -1049,12 +1049,12 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)

 #+end_src

-    
+
    #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
    if (ctx->mo_basis.mo_vgl_date == ctx->point.date) {

      // mo_vgl has been computed at this step: Just copy the data.
-      
+
      double * v = &(ctx->mo_basis.mo_value[0]);
      double * vgl = &(ctx->mo_basis.mo_vgl[0]);
      for (int i=0 ; i<ctx->point.num ; ++i) {
@ -1099,7 +1099,7 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
                                                  ctx->ao_basis.ao_num,
                                                  ctx->mo_basis.mo_num,
                                                  ctx->point.num,
-                                                  ctx->ao_basis.ao_nucl, 
+                                                  ctx->ao_basis.ao_nucl,
                                                  ctx->ao_basis.ao_ang_mom,
                                                  ctx->electron.en_distance,
                                                  ctx->mo_basis.r_cusp,
@ -1177,7 +1177,7 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
        mo_value(:,j) = mo_value(:,j) + coefficient_t(:,k) * ao_value(k,j)
     end do
  end do
-     
+
 end function qmckl_compute_mo_basis_mo_value_doc_f
    #+end_src

@ -1394,7 +1394,7 @@ qmckl_get_mo_basis_mo_vgl(qmckl_context context,
       implicit none

       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: mo_vgl(*)
+       real(c_double),       intent(out)         :: mo_vgl(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_mo_basis_mo_vgl
  end interface
@ -1460,7 +1460,7 @@ qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context,
       import
       implicit none
       integer (c_int64_t) , intent(in)  , value :: context
-       double precision,     intent(out)         :: mo_vgl(*)
+       real(c_double),       intent(out)         :: mo_vgl(*)
       integer (c_int64_t) , intent(in)  , value :: size_max
     end function qmckl_get_mo_basis_mo_vgl_inplace
  end interface
@ -1568,7 +1568,7 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_vgl(qmckl_context context)
                                              ctx->ao_basis.ao_num,
                                              ctx->mo_basis.mo_num,
                                              ctx->point.num,
-                                              ctx->ao_basis.ao_nucl, 
+                                              ctx->ao_basis.ao_nucl,
                                              ctx->ao_basis.ao_ang_mom,
                                              ctx->electron.en_distance,
                                              ctx->nucleus.coord,
@ -1941,7 +1941,7 @@ integer function qmckl_compute_mo_basis_mo_value_cusp_doc_f(context, &
        if ( (en_distance(inucl,i) < r_cusp(inucl)) .and. (ao_ang_mom(k) == 0) ) cycle
        mo_value(:,i) = mo_value(:,i) + coefficient_t(:,k) * ao_value(k,i)
     end do ! k
-     
+
     do inucl=1,nucl_num
        r = en_distance(inucl,i)
        if (r > r_cusp(inucl)) cycle
@ -1974,7 +1974,7 @@ end function qmckl_compute_mo_basis_mo_value_cusp_doc_f
         const qmckl_tensor cusp_param,
         const double* coefficient_t,
         const double* ao_value,
-         double* const mo_value ); 
+         double* const mo_value );
   #+end_src

   #+CALL: generate_c_header(table=qmckl_mo_basis_mo_value_cusp_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_value_cusp_doc"))
@ -1994,7 +1994,7 @@ end function qmckl_compute_mo_basis_mo_value_cusp_doc_f
         const double* cusp_param,
         const double* coefficient_t,
         const double* ao_value,
-         double* const mo_value ); 
+         double* const mo_value );
   #+end_src

   #+CALL: generate_c_interface(table=qmckl_mo_basis_mo_value_cusp_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_value_cusp_doc"))
@ -2070,17 +2070,17 @@ qmckl_compute_mo_basis_mo_value_cusp (const qmckl_context context,
                                      double* const mo_value )
 {
  qmckl_exit_code rc;
-  
+
 #ifdef HAVE_HPC
-  rc = qmckl_compute_mo_basis_mo_value_cusp_hpc (context, nucl_num, ao_num, mo_num, point_num, 
+  rc = qmckl_compute_mo_basis_mo_value_cusp_hpc (context, nucl_num, ao_num, mo_num, point_num,
                                                 ao_nucl, ao_ang_mom, en_distance, r_cusp,
-                                                 cusp_param_tensor, coefficient_t, ao_value, mo_value ); 
+                                                 cusp_param_tensor, coefficient_t, ao_value, mo_value );
 #else
  double * cusp_param  = qmckl_alloc_double_of_tensor(context, cusp_param_tensor);

-  rc = qmckl_compute_mo_basis_mo_value_cusp_doc (context, nucl_num, ao_num, mo_num, point_num, 
+  rc = qmckl_compute_mo_basis_mo_value_cusp_doc (context, nucl_num, ao_num, mo_num, point_num,
                                                 ao_nucl, ao_ang_mom, en_distance, r_cusp,
-                                                 cusp_param, coefficient_t, ao_value, mo_value ); 
+                                                 cusp_param, coefficient_t, ao_value, mo_value );

  qmckl_free(context, cusp_param);
 #endif
@ -2106,7 +2106,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
                                          const qmckl_tensor cusp_param,
                                          const double* coefficient_t,
                                          const double* ao_value,
-                                          double* const mo_value ); 
+                                          double* const mo_value );
 #endif
    #+end_src

@ -2179,7 +2179,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
    for (int64_t m=n ; m < nidx ; m+=1) {
      const double* restrict ck = coefficient_t + idx[m]*mo_num;
      const double a1 = av1[m];
-      
+
 #ifdef HAVE_OPENMP
 #pragma omp simd
 #endif
@ -2283,7 +2283,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_cusp_doc_f(context, &
           mo_vgl(i,5,j) = mo_vgl(i,5,j) + coefficient_t(i,k) * c5
        end do
     end do
-     
+
     ! Cusp adjustment
     do inucl=1,nucl_num
        r = en_distance(inucl,j)
@ -2299,7 +2299,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_cusp_doc_f(context, &
                cusp_param(i,3,inucl) + r* cusp_param(i,4,inucl)     ))

           c1 = r_inv * cusp_param(i,2,inucl) + 2.d0*cusp_param(i,3,inucl) +  &
-                r * 3.d0 * cusp_param(i,4,inucl) 
+                r * 3.d0 * cusp_param(i,4,inucl)

           mo_vgl(i,2,j) = mo_vgl(i,2,j) + r_vec(1) * c1
           mo_vgl(i,3,j) = mo_vgl(i,3,j) + r_vec(2) * c1
@ -2319,7 +2319,7 @@ end function qmckl_compute_mo_basis_mo_vgl_cusp_doc_f
    #+end_src

 #   #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_cusp_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_cusp"))
-    
+
   #+RESULTS:
   #+begin_src c :tangle (eval h_private_func) :comments org
   qmckl_exit_code qmckl_compute_mo_basis_mo_vgl_cusp (
@ -2337,7 +2337,7 @@ end function qmckl_compute_mo_basis_mo_vgl_cusp_doc_f
         const qmckl_tensor cusp_param,
         const double* coefficient_t,
         const double* ao_vgl,
-         double* const mo_vgl ); 
+         double* const mo_vgl );
   #+end_src

   #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_cusp_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_cusp_doc"))
@ -2359,7 +2359,7 @@ end function qmckl_compute_mo_basis_mo_vgl_cusp_doc_f
         const double* cusp_param,
         const double* coefficient_t,
         const double* ao_vgl,
-         double* const mo_vgl ); 
+         double* const mo_vgl );
   #+end_src

   #+CALL: generate_c_interface(table=qmckl_mo_basis_mo_vgl_cusp_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_cusp_doc"))
@ -2443,7 +2443,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp (const qmckl_context context,
                                    double* const mo_vgl )
 {
  qmckl_exit_code rc;
-  
+
 #ifdef HAVE_HPC
  rc = qmckl_compute_mo_basis_mo_vgl_cusp_hpc (context, nucl_num, ao_num, mo_num, point_num,
                                               ao_nucl, ao_ang_mom, en_distance, nucl_coord_matrix,
@ -2487,7 +2487,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
                                        const qmckl_tensor cusp_param,
                                        const double* coefficient_t,
                                        const double* ao_vgl,
-                                        double* const mo_vgl ); 
+                                        double* const mo_vgl );
 #endif
    #+end_src

@ -2670,7 +2670,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
   may get out of the range of double precision. A simple fix is to
   rescale the MO coefficients to put back the determinants in the
   correct range.
-   
+
    #+begin_src c :comments org :tangle (eval h_func) :noweb yes
 qmckl_exit_code
 qmckl_mo_basis_rescale(qmckl_context context,
@ -2817,7 +2817,7 @@ const double*   nucl_charge   = chbrclf_charge;
 const double*   nucl_coord    = &(chbrclf_nucl_coord[0][0]);

 const int64_t point_num = walk_num*elec_num;
- 
+
 rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num);
 assert (rc == QMCKL_SUCCESS);

@ -2945,7 +2945,7 @@ for (int i=0 ; i< point_num; ++i) {

 rc = qmckl_context_touch(context);
 assert (rc == QMCKL_SUCCESS);
- 
+
 rc = qmckl_get_mo_basis_mo_value(context, &(mo_value[0][0]), point_num*chbrclf_mo_num);
 assert (rc == QMCKL_SUCCESS);

@ -3054,7 +3054,7 @@ printf("\n");
    assert(mo_coefficient_new[0][i] == mo_coefficient[i + ao_num*2]);
    assert(mo_coefficient_new[1][i] == mo_coefficient[i + ao_num*5]);
 }
- 
+

 }

--- a/org/qmckl_numprec.org
+++ b/org/qmckl_numprec.org
@ -40,6 +40,42 @@ int main() {
 #include <stdlib.h>
 #include <string.h>

+#ifdef HAVE_FPE
+#define _GNU_SOURCE
+#include <fenv.h>
+#include <signal.h>
+#include <stdio.h>
+#include <execinfo.h>
+
+
+#define MAX_BACKTRACE_SIZE 100
+
+void floatingPointExceptionHandler(int signal) {
+    void* backtraceArray[MAX_BACKTRACE_SIZE];
+    int backtraceSize = backtrace(backtraceArray, MAX_BACKTRACE_SIZE);
+    char** backtraceSymbols = backtrace_symbols(backtraceArray, backtraceSize);
+
+    // Print the backtrace
+    for (int i = 0; i < backtraceSize; ++i) {
+            printf("[%d] %s\n", i, backtraceSymbols[i]);
+    }
+
+    // Clean up the memory used by backtrace_symbols
+    free(backtraceSymbols);
+
+    exit(EXIT_FAILURE);
+}
+
+static void __attribute__ ((constructor))
+trapfpe ()
+{
+  /* Enable some exceptions.  At startup all exceptions are masked.  */
+
+  feenableexcept (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);
+  signal(SIGFPE, floatingPointExceptionHandler);
+}
+#endif
+
 #include "qmckl.h"
 #include "qmckl_context_private_type.h"
  #+end_src
--- a/org/qmckl_sherman_morrison_woodbury.org
+++ b/org/qmckl_sherman_morrison_woodbury.org
--- a/org/qmckl_tests.org
+++ b/org/qmckl_tests.org
@ -60114,8 +60114,8 @@ double n2_elec_coord[n2_walk_num][n2_elec_num][3] = { {
 #define n2_dim_c_vec        ((int64_t) 23)

 int64_t n2_type_nucl_vector[n2_nucl_num] = {
-  1,
-  1};
+  0,
+  0};

 double n2_a_vector[n2_aord_num + 1][n2_type_nucl_num] = {
  { 0.      },
--- a/python/setup.py
+++ b/python/setup.py
@ -46,7 +46,7 @@ qmckl_module   =  Extension(name                 = "._" + MODULE_NAME,


 setup(name             = MODULE_NAME,
-      version          = "0.3.1",
+      version          = "0.5.1",
      author           = "TREX-CoE",
      author_email     = "posenitskiy@irsamc.ups-tlse.fr",
      description      = """Python API of the QMCkl library""",
--- a/python/src/qmckl.i
+++ b/python/src/qmckl.i
@ -58,6 +58,11 @@ import_array();
 %apply ( double* ARGOUT_ARRAY1 , int64_t DIM1 ) { ( double* const C, const int64_t size_max_C) };
 %apply ( double* ARGOUT_ARRAY1 , int64_t DIM1 ) { ( double* const B, const int64_t size_max_B) };

+%apply ( int64_t* IN_ARRAY1 , int64_t DIM1 ) { ( const int64_t* A, const int64_t size_max_A) };
+%apply ( int64_t* IN_ARRAY1 , int64_t DIM1 ) { ( const int64_t* B, const int64_t size_max_B) };
+%apply ( int64_t* ARGOUT_ARRAY1 , int64_t DIM1 ) { ( int64_t* const C, const int64_t size_max_C) };
+%apply ( int64_t* ARGOUT_ARRAY1 , int64_t DIM1 ) { ( int64_t* const B, const int64_t size_max_B) };
+
 /* Handle properly get_point */


--- a/share/qmckl/test_data/sm_test.h
+++ b/share/qmckl/test_data/sm_test.h
--- a/tools/lib.org
+++ b/tools/lib.org
@ -46,7 +46,7 @@ f_of_c_d = { '' : ''
     , 'uint64_t'        : 'integer (c_int64_t)'
     , 'float'           : 'real    (c_float  )'
     , 'double'          : 'real    (c_double )'
-     , 'char'            : 'character'
+     , 'char'            : 'character(c_char  )'
 }
    #+END_SRC

--- a/tools/tangle.sh
+++ b/tools/tangle.sh
@ -18,6 +18,8 @@ if [[ -z ${top_builddir} ]] ; then
   exit 1
 fi

+EMACS="${VARIABLE:=emacs}" 
+
 EXTENSIONS="_f.F90 _fh_func.F90 _fh_type.F90 .c _func.h _type.h _private_type.h _private_func.h"

 function tangle()
@ -42,7 +44,7 @@ function tangle()
    done

    ${srcdir}/tools/missing \
-        emacs --no-init-file --no-site-lisp --quick --batch ${org_file} \
+        $EMACS --no-init-file --no-site-lisp --quick --batch ${org_file} \
         --load=${srcdir}/tools/config_tangle.el \
        -f org-babel-tangle