Merge branch 'master' into jastrow_c

2025-05-02 05:14:47 +02:00 · 2022-04-04 11:22:17 +02:00 · 2022-04-04 11:22:17 +02:00 · 35e15205df
commit 35e15205df
parent 3ce162a384 91811079d3
9 changed files with 1135 additions and 484 deletions
--- a/configure.ac
+++ b/configure.ac
@ -49,12 +49,12 @@ AS_IF([test -d ${srcdir}/.git],
 AC_ARG_WITH(ifort, [AS_HELP_STRING([--with-ifort],[Use Intel Fortran compiler])], with_ifort=$withval, with_ifort=no)
 AS_IF([test "$with_ifort" == "yes"], [
  FC=ifort
-  FCFLAGS="-xHost -ip -Ofast -ftz -finline -g -mkl=sequential" ])
+  FCFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ])

 AC_ARG_WITH(icc, [AS_HELP_STRING([--with-icc],[Use Intel C compiler])], with_icc=$withval, with_icc=no)
 AS_IF([test "$with_icc" == "yes"], [
  CC=icc
-  CFLAGS="-xHost -ip -Ofast -ftz -finline -g -mkl=sequential" ])
+  CFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ])

 AS_IF([test "$with_icc"."$with_ifort" == "yes.yes"], [
  ax_blas_ok="yes"
--- a/org/examples.org
+++ b/org/examples.org
@ -0,0 +1,199 @@
+#+TITLE: Code examples
+#+SETUPFILE: ../tools/theme.setup
+#+INCLUDE: ../tools/lib.org
+  
+In this section, we present examples of usage of QMCkl.
+For simplicity, we assume that the wave function parameters are stores
+in a [[https://github.com/TREX-CoE/trexio][TREXIO]] file.
+
+* Checking errors
+
+  All QMCkl functions return an error code. A convenient way to handle
+  errors is to write an error-checking function that displays the
+  error in text format and exits the program.
+
+  #+NAME: qmckl_check_error
+  #+begin_src f90 
+subroutine qmckl_check_error(rc, message)
+  use qmckl
+  implicit none
+  integer(qmckl_exit_code), intent(in) :: rc
+  character(len=*)        , intent(in) :: message
+  character(len=128)                   :: str_buffer
+  if (rc /= QMCKL_SUCCESS) then
+     print *, message
+     call qmckl_string_of_error(rc, str_buffer)
+     print *, str_buffer
+     call exit(rc)
+  end if
+end subroutine qmckl_check_error
+  #+end_src
+  
+* Computing an atomic orbital on a grid
+  :PROPERTIES:
+  :header-args: :tangle ao_grid.f90
+  :END:
+
+  The following program, in Fortran, computes the values of an atomic
+  orbital on a regular 3-dimensional grid. The 100^3 grid points are
+  automatically defined, such that the molecule fits in a box with 5
+  atomic units in the borders.
+
+  This program uses the ~qmckl_check_error~ function defined above.
+  
+  To use this program, run
+  
+  #+begin_src bash :tangle no
+$ ao_grid <trexio_file> <AO_id> <point_num>
+  #+end_src
+
+  
+  #+begin_src f90  :noweb yes
+<<qmckl_check_error>>
+
+program ao_grid
+  use qmckl
+  implicit none
+
+  integer(qmckl_context)    :: qmckl_ctx  ! QMCkl context
+  integer(qmckl_exit_code)  :: rc         ! Exit code of QMCkl functions
+
+  character(len=128)            :: trexio_filename
+  character(len=128)            :: str_buffer
+  integer                       :: ao_id
+  integer                       :: point_num_x
+
+  integer(c_int64_t)            :: nucl_num
+  double precision, allocatable :: nucl_coord(:,:)
+
+  integer(c_int64_t)            :: point_num
+  integer(c_int64_t)            :: ao_num
+  integer(c_int64_t)            :: ipoint, i, j, k
+  double precision              :: x, y, z, dr(3)
+  double precision              :: rmin(3), rmax(3)
+  double precision, allocatable :: points(:,:)
+  double precision, allocatable :: ao_vgl(:,:,:)
+  #+end_src
+
+  Start by fetching the command-line arguments:
+
+  #+begin_src f90 
+  if (iargc() /= 3) then
+     print *, 'Syntax: ao_grid <trexio_file> <AO_id> <point_num>'
+     call exit(-1)
+  end if
+  call getarg(1, trexio_filename)
+  call getarg(2, str_buffer)
+  read(str_buffer, *) ao_id
+  call getarg(3, str_buffer)
+  read(str_buffer, *) point_num_x
+
+  if (point_num_x < 0 .or. point_num_x > 300) then
+     print *, 'Error: 0 < point_num < 300'
+     call exit(-1)
+  end if
+  #+end_src
+
+  Create the QMCkl context and initialize it with the wave function
+  present in the TREXIO file:
+
+  #+begin_src f90 
+  qmckl_ctx = qmckl_context_create()
+  rc  = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename)))
+  call qmckl_check_error(rc, 'Read TREXIO')
+  #+end_src
+
+  We need to check that ~ao_id~ is in the range, so we get the total
+  number of AOs from QMCkl:
+  
+  #+begin_src f90 
+  rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num)
+  call qmckl_check_error(rc, 'Getting ao_num')
+
+  if (ao_id < 0 .or. ao_id > ao_num) then
+     print *, 'Error: 0 < ao_id < ', ao_num
+     call exit(-1)
+  end if
+  #+end_src
+
+  Now we will compute the limits of the box in which the molecule fits.
+  For that, we first need to ask QMCkl the coordinates of nuclei.
+
+  #+begin_src f90 
+  rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num)
+  call qmckl_check_error(rc, 'Get nucleus num')
+
+  allocate( nucl_coord(3, nucl_num) )
+  rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num)
+  call qmckl_check_error(rc, 'Get nucleus coord')
+  #+end_src
+
+  We now compute the coordinates of opposite points of the box, and
+  the distance between points along the 3 directions:
+
+  #+begin_src f90 
+  rmin(1) = minval( nucl_coord(1,:) ) - 5.d0
+  rmin(2) = minval( nucl_coord(2,:) ) - 5.d0
+  rmin(3) = minval( nucl_coord(3,:) ) - 5.d0
+     
+  rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0
+  rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0
+  rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0
+
+  dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1)
+  #+end_src
+
+  We now produce the list of point coordinates where the AO will be
+  evaluated:
+  
+  #+begin_src f90 
+  point_num = point_num_x**3
+  allocate( points(point_num, 3) )
+  ipoint=0
+  z = rmin(3)
+  do k=1,point_num_x
+     y = rmin(2)
+     do j=1,point_num_x
+        x = rmin(1)
+        do i=1,point_num_x
+           ipoint = ipoint+1
+           points(ipoint,1) = x
+           points(ipoint,2) = y
+           points(ipoint,3) = z
+           x = x + dr(1)
+        end do
+        y = y + dr(2)
+     end do
+     z = z + dr(3)
+  end do
+  #+end_src
+  
+  We give the points to QMCkl:
+
+  #+begin_src f90 
+  rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num)
+  call qmckl_check_error(rc, 'Setting points')
+  #+end_src
+
+  We allocate the space required to retrieve the values, gradients and
+  Laplacian of all AOs, and ask to retrieve the values of the
+  AOs computed at the point positions. 
+  
+  #+begin_src f90 
+  allocate( ao_vgl(ao_num, 5, point_num) )
+  rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num)
+  call qmckl_check_error(rc, 'Setting points')
+  #+end_src
+
+  We finally print the value of the AO:
+
+  #+begin_src f90 
+  do ipoint=1, point_num
+     print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint)
+  end do
+  #+end_src
+
+  #+begin_src f90 
+  deallocate( nucl_coord, points, ao_vgl )
+end program ao_grid
+  #+end_src
--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
--- a/org/qmckl_blas.org
+++ b/org/qmckl_blas.org
@ -85,7 +85,7 @@ are not intended to be passed to external codes.
   #+begin_src c :comments org :tangle (eval h_private_type) :exports none
 typedef struct qmckl_vector {
  int64_t size;
-  double* data;
+  double* restrict data;
 } qmckl_vector;
   #+end_src

@ -161,7 +161,7 @@ qmckl_vector_free( qmckl_context context,
   #+begin_src c :comments org :tangle (eval h_private_type) :exports none
 typedef struct qmckl_matrix {
  int64_t size[2];
-  double* data;
+  double* restrict data;
 } qmckl_matrix;
   #+end_src

@ -247,7 +247,7 @@ qmckl_matrix_free( qmckl_context context,
 typedef struct qmckl_tensor {
  int64_t order;
  int64_t size[QMCKL_TENSOR_ORDER_MAX];
-  double* data;
+  double* restrict data;
 } qmckl_tensor;
   #+end_src

--- a/org/qmckl_determinant.org
+++ b/org/qmckl_determinant.org
@ -1239,9 +1239,9 @@ assert(rc == QMCKL_SUCCESS);
 assert(qmckl_ao_basis_provided(context));


-double ao_vgl[5][walk_num][elec_num][chbrclf_ao_num];
+double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num];

-rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num);
+rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num);
 assert (rc == QMCKL_SUCCESS);

 /* Set up MO data */
@ -1256,8 +1256,8 @@ assert (rc == QMCKL_SUCCESS);

 assert(qmckl_mo_basis_provided(context));

-double mo_vgl[5][elec_num][chbrclf_mo_num];
-rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0]));
+double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num];
+rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num);
 assert (rc == QMCKL_SUCCESS);

 /* Set up determinant data */
--- a/org/qmckl_local_energy.org
+++ b/org/qmckl_local_energy.org
@ -655,9 +655,9 @@ assert(rc == QMCKL_SUCCESS);
 assert(qmckl_ao_basis_provided(context));


-double ao_vgl[5][walk_num][elec_num][chbrclf_ao_num];
+double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num];

-rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0][0]),
+rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]),
                               (int64_t) 5*walk_num*elec_num*chbrclf_ao_num);
 assert (rc == QMCKL_SUCCESS);

@ -673,8 +673,8 @@ assert (rc == QMCKL_SUCCESS);

 assert(qmckl_mo_basis_provided(context));

-double mo_vgl[5][elec_num][chbrclf_mo_num];
-rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0]));
+double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num];
+rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num);
 assert (rc == QMCKL_SUCCESS);

 /* Set up determinant data */
--- a/org/qmckl_mo.org
+++ b/org/qmckl_mo.org
@ -84,13 +84,14 @@ int main() {
  The following arrays are stored in the context:


-  |---------------+--------------------+----------------------|
-  | ~mo_num~      |                    | Number of MOs        |
-  | ~coefficient~ | ~[mo_num][ao_num]~ | Orbital coefficients |
+  |-----------------+--------------------+----------------------------------------|
+  | ~mo_num~        |                    | Number of MOs                          |
+  | ~coefficient~   | ~[mo_num][ao_num]~ | Orbital coefficients                   |
+  | ~coefficient_t~ | ~[ao_num][mo_num]~ | Transposed of the Orbital coefficients |
+  |-----------------+--------------------+----------------------------------------|

  Computed data:

-  |---------------+--------------------------+-------------------------------------------------------------------------------------|
  |---------------+--------------------------+-------------------------------------------------------------------------------------|
  | ~mo_vgl~      | ~[point_num][5][mo_num]~ | Value, gradients, Laplacian of the MOs at point positions                           |
  | ~mo_vgl_date~ | ~uint64_t~               | Late modification date of Value, gradients, Laplacian of the MOs at point positions |
@ -101,9 +102,10 @@ int main() {
   #+begin_src c :comments org :tangle (eval h_private_type)
 typedef struct qmckl_mo_basis_struct {
  int64_t   mo_num;
-  double  * coefficient;
+  double * restrict coefficient;
+  double * restrict coefficient_t;

-  double  * mo_vgl;
+  double * restrict mo_vgl;
  uint64_t  mo_vgl_date;

  int32_t   uninitialized;
@ -355,6 +357,34 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) {
  qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
  assert (ctx != NULL);

+  qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
+  mem_info.size = ctx->ao_basis.ao_num * ctx->mo_basis.mo_num * sizeof(double);
+  double* new_array = (double*) qmckl_malloc(context, mem_info);
+  if (new_array == NULL) {
+    return qmckl_failwith( context,
+                           QMCKL_ALLOCATION_FAILED,
+                           "qmckl_finalize_mo_basis",
+                           NULL);
+  }
+
+  assert (ctx->mo_basis.coefficient != NULL);
+  
+  if (ctx->mo_basis.coefficient_t != NULL) {
+    qmckl_exit_code rc = qmckl_free(context, ctx->mo_basis.coefficient);
+    if (rc != QMCKL_SUCCESS) {
+      return qmckl_failwith( context, rc,
+                             "qmckl_finalize_mo_basis",
+                             NULL);
+    }
+  }
+
+  for (int64_t i=0 ; i<ctx->ao_basis.ao_num ; ++i) {
+    for (int64_t j=0 ; j<ctx->mo_basis.mo_num ; ++j) {
+      new_array[i*ctx->mo_basis.mo_num + j] = ctx->mo_basis.coefficient[j*ctx->ao_basis.ao_num + i];
+    }
+  }
+
+  ctx->mo_basis.coefficient_t = new_array;
  qmckl_exit_code rc = QMCKL_SUCCESS;
  return rc;
 }
@ -367,11 +397,18 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) {
 *** Get

    #+begin_src c :comments org :tangle (eval h_func) :noweb yes
-qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_vgl);
+qmckl_exit_code
+qmckl_get_mo_basis_mo_vgl(qmckl_context context,
+                          double* const mo_vgl,
+                          const int64_t size_max);
    #+end_src

    #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
-qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_vgl) {
+qmckl_exit_code
+qmckl_get_mo_basis_mo_vgl(qmckl_context context,
+                          double* const mo_vgl,
+                          const int64_t size_max)
+{

  if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
    return QMCKL_NULL_CONTEXT;
@ -388,7 +425,13 @@ qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_v
  qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
  assert (ctx != NULL);

-  size_t sze = 5 * ctx->point.num * ctx->mo_basis.mo_num;
+  const int64_t sze = ctx->point.num * 5 * ctx->mo_basis.mo_num;
+  if (size_max < sze) {
+    return qmckl_failwith( context,
+                           QMCKL_INVALID_ARG_3,
+                           "qmckl_get_mo_basis_mo_vgl",
+                           "input array too small");
+  }
  memcpy(mo_vgl, ctx->mo_basis.mo_vgl, sze * sizeof(double));

  return QMCKL_SUCCESS;
@ -396,17 +439,84 @@ qmckl_exit_code qmckl_get_mo_basis_vgl(qmckl_context context, double* const mo_v
    #+end_src

    #+begin_src f90 :tangle (eval fh_func) :comments org :exports none
-    interface
-      integer(c_int32_t) function qmckl_get_mo_basis_vgl (context, mo_vgl) &
-          bind(C)
-        use, intrinsic :: iso_c_binding
-        import
-        implicit none
+  interface
+     integer(c_int32_t) function qmckl_get_mo_basis_mo_vgl (context, &
+          mo_vgl, size_max) bind(C)
+       use, intrinsic :: iso_c_binding
+       import
+       implicit none
       
-        integer (c_int64_t) , intent(in)  , value :: context
-        double precision,     intent(out)         :: mo_vgl(*)
-      end function
-    end interface
+       integer (c_int64_t) , intent(in)  , value :: context
+       double precision,     intent(out)         :: mo_vgl(*)
+       integer (c_int64_t) , intent(in)  , value :: size_max
+     end function qmckl_get_mo_basis_mo_vgl
+  end interface
+    #+end_src
+
+    Uses the given array to compute the VGL.
+
+    #+begin_src c :comments org :tangle (eval h_func) :noweb yes
+qmckl_exit_code
+qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context,
+                                   double* const mo_vgl,
+                                   const int64_t size_max);
+    #+end_src
+
+    #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
+qmckl_exit_code
+qmckl_get_mo_basis_mo_vgl_inplace (qmckl_context context,
+                                   double* const mo_vgl,
+                                   const int64_t size_max)
+{
+
+  if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
+    return qmckl_failwith( context,
+                           QMCKL_INVALID_CONTEXT,
+                           "qmckl_get_mo_basis_mo_vgl",
+                           NULL);
+  }
+
+  qmckl_exit_code rc;
+
+  qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
+  assert (ctx != NULL);
+
+  const int64_t sze = ctx->mo_basis.mo_num * 5 * ctx->point.num;
+  if (size_max < sze) {
+    return qmckl_failwith( context,
+                           QMCKL_INVALID_ARG_3,
+                           "qmckl_get_mo_basis_mo_vgl",
+                           "input array too small");
+  }
+
+  rc = qmckl_context_touch(context);
+  if (rc != QMCKL_SUCCESS) return rc;
+
+  double* old_array = ctx->mo_basis.mo_vgl;
+
+  ctx->mo_basis.mo_vgl = mo_vgl;
+
+  rc = qmckl_provide_mo_vgl(context);
+  if (rc != QMCKL_SUCCESS) return rc;
+
+  ctx->mo_basis.mo_vgl = old_array;
+
+  return QMCKL_SUCCESS;
+}
+    #+end_src
+
+    #+begin_src f90 :tangle (eval fh_func) :comments org :exports none
+  interface
+     integer(c_int32_t) function qmckl_get_mo_basis_mo_vgl_inplace (context, &
+          mo_vgl, size_max) bind(C)
+       use, intrinsic :: iso_c_binding
+       import
+       implicit none
+       integer (c_int64_t) , intent(in)  , value :: context
+       double precision,     intent(out)         :: mo_vgl(*)
+       integer (c_int64_t) , intent(in)  , value :: size_max
+     end function qmckl_get_mo_basis_mo_vgl_inplace
+  end interface
    #+end_src

 *** Provide
@ -462,19 +572,19 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context)
      if (mo_vgl == NULL) {
        return qmckl_failwith( context,
                               QMCKL_ALLOCATION_FAILED,
-                               "qmckl_mo_basis_vgl",
+                               "qmckl_mo_basis_mo_vgl",
                               NULL);
      }
      ctx->mo_basis.mo_vgl = mo_vgl;
    }

-    rc = qmckl_compute_mo_basis_vgl(context,
-                                    ctx->ao_basis.ao_num,
-                                    ctx->mo_basis.mo_num,
-                                    ctx->point.num,
-                                    ctx->mo_basis.coefficient,
-                                    ctx->ao_basis.ao_vgl,
-                                    ctx->mo_basis.mo_vgl);
+    rc = qmckl_compute_mo_basis_mo_vgl(context,
+                                       ctx->ao_basis.ao_num,
+                                       ctx->mo_basis.mo_num,
+                                       ctx->point.num,
+                                       ctx->mo_basis.coefficient_t,
+                                       ctx->ao_basis.ao_vgl,
+                                       ctx->mo_basis.mo_vgl);
    if (rc != QMCKL_SUCCESS) {
      return rc;
    }
@ -488,25 +598,33 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context)

 *** Compute
   :PROPERTIES:
-   :Name:     qmckl_compute_mo_basis_vgl
+   :Name:     qmckl_compute_mo_basis_mo_vgl
   :CRetType: qmckl_exit_code
   :FRetType: qmckl_exit_code
   :END:

-    #+NAME: qmckl_mo_basis_vgl_args
-   | ~qmckl_context~ | ~context~                         | in  | Global state                              |
-   | ~int64_t~       | ~ao_num~                          | in  | Number of AOs                             |
-   | ~int64_t~       | ~mo_num~                          | in  | Number of MOs                             |
-   | ~int64_t~       | ~point_num~                       | in  | Number of points                          |
-   | ~double~        | ~coef_normalized[mo_num][ao_num]~ | in  | AO to MO transformation matrix            |
-   | ~double~        | ~ao_vgl[point_num][5][ao_num]~    | in  | Value, gradients and Laplacian of the AOs |
-   | ~double~        | ~mo_vgl[point_num][5][mo_num]~    | out | Value, gradients and Laplacian of the MOs |
+    #+NAME: qmckl_mo_basis_mo_vgl_args
+    | Variable            | Type                           | In/Out | Description                                     |
+    |---------------------+--------------------------------+--------+-------------------------------------------------|
+    | ~context~           | ~qmckl_context~                | in     | Global state                                    |
+    | ~ao_num~            | ~int64_t~                      | in     | Number of AOs                                   |
+    | ~mo_num~            | ~int64_t~                      | in     | Number of MOs                                   |
+    | ~point_num~         | ~int64_t~                      | in     | Number of points                                |
+    | ~coef_normalized_t~ | ~double[mo_num][ao_num]~       | in     | Transpose of the AO to MO transformation matrix |
+    | ~ao_vgl~            | ~double[point_num][5][ao_num]~ | in     | Value, gradients and Laplacian of the AOs       |
+    | ~mo_vgl~            | ~double[point_num][5][mo_num]~ | out    | Value, gradients and Laplacian of the MOs       |
+
+
+    The matrix of AO values is very sparse, so we use a sparse-dense
+    matrix multiplication instead of a dgemm, as exposed in
+    https://dx.doi.org/10.1007/978-3-642-38718-0_14.
+
    
    
    #+begin_src f90 :comments org :tangle (eval f) :noweb yes
-integer function qmckl_compute_mo_basis_vgl_f(context, &
+integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, &
     ao_num, mo_num, point_num, &
-     coef_normalized, ao_vgl, mo_vgl) &
+     coef_normalized_t, ao_vgl, mo_vgl) &
     result(info)
  use qmckl
  implicit none
@ -514,55 +632,69 @@ integer function qmckl_compute_mo_basis_vgl_f(context, &
  integer*8             , intent(in)  :: ao_num, mo_num
  integer*8             , intent(in)  :: point_num
  double precision      , intent(in)  :: ao_vgl(ao_num,5,point_num)
-  double precision      , intent(in)  :: coef_normalized(ao_num,mo_num)
+  double precision      , intent(in)  :: coef_normalized_t(mo_num,ao_num)
  double precision      , intent(out) :: mo_vgl(mo_num,5,point_num)
-  character                     :: TransA, TransB
-  double precision              :: alpha, beta
-  integer*8                     :: M, N, K, LDA, LDB, LDC, i,j
+  integer*8 :: i,j,k
+  double precision :: c1, c2, c3, c4, c5

-  TransA = 'T'
-  TransB = 'N'
-  M = mo_num
-  N = point_num*5_8
-  K = int(ao_num,8)
-  alpha = 1.d0
-  beta  = 0.d0
-  LDA = size(coef_normalized,1)
-  LDB = size(ao_vgl,1)
-  LDC = size(mo_vgl,1)
+  do j=1,point_num
+     mo_vgl(:,:,j) = 0.d0
+     do k=1,ao_num
+        if (ao_vgl(k,1,j) /= 0.d0) then
+           c1 = ao_vgl(k,1,j)
+           c2 = ao_vgl(k,2,j)
+           c3 = ao_vgl(k,3,j)
+           c4 = ao_vgl(k,4,j)
+           c5 = ao_vgl(k,5,j)
+           do i=1,mo_num
+              mo_vgl(i,1,j) = mo_vgl(i,1,j) + coef_normalized_t(i,k) * c1
+              mo_vgl(i,2,j) = mo_vgl(i,2,j) + coef_normalized_t(i,k) * c2
+              mo_vgl(i,3,j) = mo_vgl(i,3,j) + coef_normalized_t(i,k) * c3
+              mo_vgl(i,4,j) = mo_vgl(i,4,j) + coef_normalized_t(i,k) * c4
+              mo_vgl(i,5,j) = mo_vgl(i,5,j) + coef_normalized_t(i,k) * c5
+           end do
+        end if
+     end do
+  end do

-  info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, &
-       coef_normalized, int(size(coef_normalized,1),8),      &
-       ao_vgl, int(size(ao_vgl,1),8), beta,                  &
-       mo_vgl,LDC)
-
-  info = QMCKL_SUCCESS
-
-end function qmckl_compute_mo_basis_vgl_f
+end function qmckl_compute_mo_basis_mo_vgl_doc_f
    #+end_src

-   #+CALL: generate_c_header(table=qmckl_mo_basis_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_vgl"))
+    #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl"))

   #+RESULTS:
   #+begin_src c :tangle (eval h_func) :comments org
-   qmckl_exit_code qmckl_compute_mo_basis_vgl (
-	 const qmckl_context context,
-	 const int64_t ao_num,
-	 const int64_t mo_num,
-	 const int64_t point_num,
-	 const double* coef_normalized,
-	 const double* ao_vgl,
-	 double* const mo_vgl );
+   qmckl_exit_code qmckl_compute_mo_basis_mo_vgl (
+         const qmckl_context context,
+         const int64_t ao_num,
+         const int64_t mo_num,
+         const int64_t point_num,
+         const double* coef_normalized_t,
+         const double* ao_vgl,
+         double* const mo_vgl ); 
   #+end_src

+   #+CALL: generate_c_header(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc"))

-    #+CALL: generate_c_interface(table=qmckl_mo_basis_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_vgl"))
+   #+RESULTS:
+   #+begin_src c :tangle (eval h_func) :comments org
+   qmckl_exit_code qmckl_compute_mo_basis_mo_vgl_doc (
+         const qmckl_context context,
+         const int64_t ao_num,
+         const int64_t mo_num,
+         const int64_t point_num,
+         const double* coef_normalized_t,
+         const double* ao_vgl,
+         double* const mo_vgl ); 
+   #+end_src
+
+   #+CALL: generate_c_interface(table=qmckl_mo_basis_mo_vgl_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_vgl_doc"))
   
    #+RESULTS:
    #+begin_src f90 :tangle (eval f) :comments org :exports none
-    integer(c_int32_t) function qmckl_compute_mo_basis_vgl &
-	(context, ao_num, mo_num, point_num, coef_normalized, ao_vgl, mo_vgl) &
-	bind(C) result(info)
+    integer(c_int32_t) function qmckl_compute_mo_basis_mo_vgl_doc &
+        (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl) &
+        bind(C) result(info)

      use, intrinsic :: iso_c_binding
      implicit none
@ -571,15 +703,176 @@ end function qmckl_compute_mo_basis_vgl_f
      integer (c_int64_t) , intent(in)  , value :: ao_num
      integer (c_int64_t) , intent(in)  , value :: mo_num
      integer (c_int64_t) , intent(in)  , value :: point_num
-      real    (c_double ) , intent(in)          :: coef_normalized(ao_num,mo_num)
+      real    (c_double ) , intent(in)          :: coef_normalized_t(ao_num,mo_num)
      real    (c_double ) , intent(in)          :: ao_vgl(ao_num,5,point_num)
      real    (c_double ) , intent(out)         :: mo_vgl(mo_num,5,point_num)

-      integer(c_int32_t), external :: qmckl_compute_mo_basis_vgl_f
-      info = qmckl_compute_mo_basis_vgl_f &
-	     (context, ao_num, mo_num, point_num, coef_normalized, ao_vgl, mo_vgl)
+      integer(c_int32_t), external :: qmckl_compute_mo_basis_mo_vgl_doc_f
+      info = qmckl_compute_mo_basis_mo_vgl_doc_f &
+             (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl)

-    end function qmckl_compute_mo_basis_vgl
+    end function qmckl_compute_mo_basis_mo_vgl_doc
+    #+end_src
+
+    #+begin_src c :tangle (eval c) :comments org                                                                             
+qmckl_exit_code
+qmckl_compute_mo_basis_mo_vgl (const qmckl_context context,
+                            const int64_t ao_num,
+                            const int64_t mo_num,
+                            const int64_t point_num,
+                            const double* coef_normalized_t,
+                            const double* ao_vgl,
+                            double* const mo_vgl )
+{
+#ifdef HAVE_HPC
+  return qmckl_compute_mo_basis_mo_vgl_hpc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl);
+#else
+  return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl);
+#endif
+}
+    #+end_src
+    
+    
+*** HPC version
+    
+
+    #+begin_src c :tangle (eval h_func) :comments org                                                                             
+#ifdef HAVE_HPC
+qmckl_exit_code
+qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
+                                   const int64_t ao_num,
+                                   const int64_t mo_num,
+                                   const int64_t point_num,
+                                   const double* coef_normalized_t,
+                                   const double* ao_vgl,
+                                   double* const mo_vgl );
+#endif
+    #+end_src
+
+    #+begin_src c :tangle (eval c) :comments org                                                                             
+#ifdef HAVE_HPC
+qmckl_exit_code
+qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
+                                   const int64_t ao_num,
+                                   const int64_t mo_num,
+                                   const int64_t point_num,
+                                   const double* restrict coef_normalized_t,
+                                   const double* restrict ao_vgl,
+                                   double* restrict const mo_vgl )
+{
+#ifdef HAVE_OPENMP
+  #pragma omp parallel for
+#endif
+  for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
+    double* restrict const vgl1 = &(mo_vgl[ipoint*5*mo_num]);
+    const double* restrict avgl1 = &(ao_vgl[ipoint*5*ao_num]);
+    double* restrict const vgl2 =  vgl1 + mo_num;
+    double* restrict const vgl3 =  vgl1 + (mo_num << 1);
+    double* restrict const vgl4 =  vgl1 + (mo_num << 1) + mo_num;
+    double* restrict const vgl5 =  vgl1 + (mo_num << 2);
+    const double* restrict avgl2 = avgl1 + ao_num;
+    const double* restrict avgl3 = avgl1 + (ao_num << 1);
+    const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num;
+    const double* restrict avgl5 = avgl1 + (ao_num << 2);
+
+    for (int64_t i=0 ; i<mo_num ; ++i) {
+      vgl1[i] = 0.;
+      vgl2[i] = 0.;
+      vgl3[i] = 0.;
+      vgl4[i] = 0.;
+      vgl5[i] = 0.;
+    }
+
+    int64_t nidx=0;
+    int64_t idx[ao_num];
+    double  av1[ao_num];
+    double  av2[ao_num];
+    double  av3[ao_num];
+    double  av4[ao_num];
+    double  av5[ao_num];
+    for (int64_t k=0 ; k<ao_num ; ++k) {
+      const double* restrict ck1 = coef_normalized_t + k*mo_num;
+      if (avgl1[k] != 0.) {
+        idx[nidx] = k;
+        av1[nidx] = avgl1[k];
+        av2[nidx] = avgl2[k];
+        av3[nidx] = avgl3[k];
+        av4[nidx] = avgl4[k];
+        av5[nidx] = avgl5[k];
+        ++nidx;
+      }
+    }
+
+    int64_t n;
+    for (n=0 ; n < nidx-4 ; n+=4) {
+      int64_t k = idx[n];
+      const double* restrict ck1 = coef_normalized_t + idx[n  ]*mo_num;
+      const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num;
+      const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num;
+      const double* restrict ck4 = coef_normalized_t + idx[n+3]*mo_num;
+
+      const double a11 = av1[n  ];
+      const double a21 = av1[n+1];
+      const double a31 = av1[n+2];
+      const double a41 = av1[n+3];
+      
+      const double a12 = av2[n  ];
+      const double a22 = av2[n+1];
+      const double a32 = av2[n+2];
+      const double a42 = av2[n+3];
+      
+      const double a13 = av3[n  ];
+      const double a23 = av3[n+1];
+      const double a33 = av3[n+2];
+      const double a43 = av3[n+3];
+      
+      const double a14 = av4[n  ];
+      const double a24 = av4[n+1];
+      const double a34 = av4[n+2];
+      const double a44 = av4[n+3];
+      
+      const double a15 = av5[n  ];
+      const double a25 = av5[n+1];
+      const double a35 = av5[n+2];
+      const double a45 = av5[n+3];
+
+#ifdef HAVE_OPENMP
+#pragma omp simd
+#endif
+      for (int64_t i=0 ; i<mo_num ; ++i) {
+        vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
+        vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
+        vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
+        vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
+        vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
+      }
+    }
+
+    int64_t n0 = nidx-4;
+    n0 = n0 < 0 ? 0 : n0;
+    for (int64_t n=n0 ; n < nidx ; n+=1) {
+      const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
+      const double a1 = av1[n];
+      const double a2 = av2[n];
+      const double a3 = av3[n];
+      const double a4 = av4[n];
+      const double a5 = av5[n];
+
+#ifdef HAVE_OPENMP
+  #pragma omp simd
+#endif
+      for (int64_t i=0 ; i<mo_num ; ++i) {
+        vgl1[i] += ck[i] * a1;
+        vgl2[i] += ck[i] * a2;
+        vgl3[i] += ck[i] * a3;
+        vgl4[i] += ck[i] * a4;
+        vgl5[i] += ck[i] * a5;
+      }
+    }
+  }
+  return QMCKL_SUCCESS;
+}
+#endif
    #+end_src

 *** Test
@ -772,7 +1065,7 @@ assert (rc == QMCKL_SUCCESS);
 assert(qmckl_mo_basis_provided(context));

 double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num];
-rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0]));
+rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), walk_num*elec_num*5*chbrclf_mo_num);
 assert (rc == QMCKL_SUCCESS);

 // Test overlap of MO
@ -807,7 +1100,7 @@ assert (rc == QMCKL_SUCCESS);
 //
 //      // Calculate value of MO (1st electron)
 //      double mo_vgl[5][walk_num][elec_num][chbrclf_mo_num];
-//      rc = qmckl_get_mo_basis_vgl(context, &(mo_vgl[0][0][0][0]));
+//      rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0][0]));
 //      assert (rc == QMCKL_SUCCESS);
 //      ovlmo1 += mo_vgl[0][0][0][0]*mo_vgl[0][0][0][0]*dr3;
 //    }
--- a/org/qmckl_point.org
+++ b/org/qmckl_point.org
@ -257,7 +257,8 @@ end interface
   enough, we overwrite the data contained in them.

   To set the data relative to the points in the context, one of the
-   following functions need to be called.
+   following functions need to be called. Here, ~num~ is the number of
+   points to set.

   #+begin_src c :comments org :tangle (eval h_func)
 qmckl_exit_code qmckl_set_point (qmckl_context context,
@ -348,7 +349,7 @@ qmckl_set_point (qmckl_context context,
   #+begin_src f90 :comments org :tangle (eval fh_func) :noweb yes
 interface
  integer(c_int32_t) function qmckl_set_point(context, &
-       transp, coord, size_max) bind(C)
+       transp, coord, num) bind(C)
    use, intrinsic :: iso_c_binding
    import
    implicit none
@ -356,7 +357,7 @@ interface
    integer (c_int64_t) , intent(in)  , value :: context
    character(c_char)   , intent(in)  , value :: transp
    real    (c_double ) , intent(in)          :: coord(*)
-    integer (c_int64_t) , intent(in)  , value :: size_max
+    integer (c_int64_t) , intent(in)  , value :: num
  end function
 end interface
   #+end_src
--- a/tools/theme.setup
+++ b/tools/theme.setup
@ -6,7 +6,9 @@
 #+INFOJS_OPT: toc:t mouse:underline path:org-info.js
 #+HTML_HEAD: <link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />

-#+STARTUP:    align fold nodlcheck hidestars oddeven lognotestate latexpreview
+# STARTUP: align fold nodlcheck hidestars oddeven lognotestate latexpreview
+#+STARTUP: showeverything
+
 #+AUTHOR:     TREX CoE
 #+LANGUAGE:   en