Working on HPC version of AOs

2024-12-22 20:36:01 +01:00 · 2022-02-25 20:39:20 +01:00 · 2022-02-25 20:39:20 +01:00 · ad86cb7d67
commit ad86cb7d67
parent 9eef4e8c12
2 changed files with 179 additions and 22 deletions
--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
@ -268,7 +268,7 @@ end interface

 *** Data structure                                                 :noexport:

-    #+begin_src c :comments org :tangle (eval h_private_type)
+    #+begin_src c :comments org :tangle (eval h_private_type) :noweb yes
 typedef struct qmckl_ao_basis_struct {
  int64_t   shell_num;
  int64_t   prim_num;
@ -299,6 +299,9 @@ typedef struct qmckl_ao_basis_struct {
  bool      provided;
  bool      ao_cartesian;
  char      type;
+#ifdef HAVE_HPC
+  <<HPC_struct>>
+#endif
 } qmckl_ao_basis_struct;
    #+end_src

@ -2485,7 +2488,6 @@ free(ao_factor_test);
   | ~ao_vgl~             | ~double[point_num][5][ao_num]~    | Value, gradients, Laplacian of the primitives at current  positions                          |
   | ~ao_vgl_date~        | ~uint64_t~                        | Last modification date of Value, gradients, Laplacian of the AOs at current positions        |

-
 *** After initialization

    When the basis set is completely entered, extra data structures may be
@ -2625,9 +2627,107 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
      }
    }
  }
- /* TODO : sort the basis set here */
+
+  rc = QMCKL_SUCCESS;
+#ifdef HAVE_HPC
+  rc = qmckl_finalize_basis_hpc(context);
+#endif
+
+  return rc;
+}
+      #+end_src
+
+*** HPC-specific data structures
+
+    For faster access, we provide 3D tensors for the shell information
+    as:
+    - $C_{psa}$ = =coef_per_nucleus (iprim, ishell, inucl)=
+    - $\gamma_{pa}$ =expo_per_nucleus (iprim, inucl)=
+
+    such that the computation of the radial parts can be vectorized.
+    Exponents are sorted in increasing order to exit loops quickly,
+    and only unique exponents are kept.
+
+    The computation of the radial part is made as
+    \[
+      R_{sa} = \sum_p C_{psa} \gamma_{pa}
+    \]
+    which is a matrix-vector product.
+    
+    #+NAME:HPC_struct 
+    #+begin_src c :comments org :tangle (eval h_private_type) :exports none
+ /* HPC specific data structures */
+ qmckl_tensor coef_per_nucleus;
+ qmckl_matrix expo_per_nucleus;
+    #+end_src
+
+    #+begin_src c :comments org :tangle (eval h_private_func) :exports none
+qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context);
+    #+end_src
+    
+    #+begin_src c :comments org :tangle (eval c) :exports none
+qmckl_exit_code qmckl_finalize_basis_hpc (qmckl_context context)
+{
+  qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
+
+  int64_t shell_max = 0;
+  int64_t prim_max  = 0;
+  int64_t nucl_num = ctx->nucleus.num;
+  for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
+    shell_max = ctx->ao_basis.nucleus_shell_num[inucl] > shell_max ?
+                ctx->ao_basis.nucleus_shell_num[inucl] : shell_max;
+
+    int64_t prim_num = 0;
+    const int64_t ishell_start = ctx->ao_basis.nucleus_index[inucl];
+    const int64_t ishell_end   = ctx->ao_basis.nucleus_index[inucl] + ctx->ao_basis.nucleus_shell_num[inucl];
+    for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
+      prim_num += ctx->ao_basis.shell_prim_num[ishell];
+    }
+    prim_max = prim_num > prim_max ?
+               prim_num : prim_max;
+  }
+
+  int64_t size[3] = { prim_max, shell_max, nucl_num };
+  ctx->ao_basis.coef_per_nucleus = qmckl_tensor_alloc( context, 3, size );
+  ctx->ao_basis.coef_per_nucleus = qmckl_tensor_set(ctx->ao_basis.coef_per_nucleus, 0.);
+
+  ctx->ao_basis.expo_per_nucleus = qmckl_matrix_alloc( context, prim_max, nucl_num );
+  ctx->ao_basis.expo_per_nucleus = qmckl_matrix_set(ctx->ao_basis.expo_per_nucleus, 0.);
+
+  double expo[prim_max];
+  double coef[shell_max][prim_max];
+  for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
+    memset(expo, 0, sizeof(expo));
+    memset(coef, 0, sizeof(expo));
+
+    int64_t idx = 0;
+    const int64_t ishell_start = ctx->ao_basis.nucleus_index[inucl];
+    const int64_t ishell_end   = ctx->ao_basis.nucleus_index[inucl] + ctx->ao_basis.nucleus_shell_num[inucl];
+    for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
+      
+      const int64_t iprim_start = ctx->ao_basis.shell_prim_index[ishell];
+      const int64_t iprim_end   = ctx->ao_basis.shell_prim_index[ishell] + ctx->ao_basis.shell_prim_num[ishell];
+      for (int64_t iprim = iprim_start ; iprim < iprim_end ; ++iprim) {
+        expo[idx] = ctx->ao_basis.coefficient_normalized[iprim];
+        coef[ishell - ishell_start][idx] = ctx->ao_basis.coefficient_normalized[iprim];
+        idx += 1;
+      }
+    }
+
+    for (int64_t i=0 ; i<prim_max ; ++i) {
+      qmckl_mat( ctx->ao_basis.expo_per_nucleus, i, inucl ) = expo[i];
+    }
+
+    for (int64_t j=0 ; j<shell_max ; ++j) {
+      for (int64_t i=0 ; i<prim_max ; ++i) {
+        qmckl_ten3( ctx->ao_basis.coef_per_nucleus, i, j, inucl ) = coef[j][i];
+      }
+    }
+
+  }
  return QMCKL_SUCCESS;
 }
+  
    #+end_src

 *** Access functions
@ -5477,6 +5577,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
                   poly_vgl_2 = &(poly_vgl[1][idx]);
                   poly_vgl_3 = &(poly_vgl[2][idx]);
                   poly_vgl_4 = &(poly_vgl[3][idx]);
+                   poly_vgl_5 = &(poly_vgl[4][idx]);
                 }
                 switch (n) {
                 case(1):
@ -5525,7 +5626,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
                       ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
                       ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
                       ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
-                       ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
+                       ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
                                       2.0*(poly_vgl_2[il] * s2 +
                                             poly_vgl_3[il] * s3 +
                                             poly_vgl_4[il] * s4 )) * f[il];
--- a/org/qmckl_blas.org
+++ b/org/qmckl_blas.org
@ -523,13 +523,71 @@ double qmckl_ten5(qmckl_tensor t, int64_t i, int64_t j, int64_t k, int64_t l, in
 #define qmckl_vec(v, i) v.data[i]
 #define qmckl_mat(m, i, j) m.data[(i) + (j)*m.size[0]]

-#define qmckl_ten3(t, i, j, k) t.data[(i) + m.size[0]*((j) + size[1]*(k))]
-#define qmckl_ten4(t, i, j, k, l) t.data[(i) + m.size[0]*((j) + size[1]*((k) + size[2]*(l)))]
-#define qmckl_ten5(t, i, j, k, l, m) t.data[(i) + m.size[0]*((j) + size[1]*((k) + size[2]*((l) + size[3]*(m))))]
+#define qmckl_ten3(t, i, j, k) t.data[(i) + t.size[0]*((j) + t.size[1]*(k))]
+#define qmckl_ten4(t, i, j, k, l) t.data[(i) + t.size[0]*((j) + t.size[1]*((k) + t.size[2]*(l)))]
+#define qmckl_ten5(t, i, j, k, l, m) t.data[(i) + t.size[0]*((j) + t.size[1]*((k) + t.size[2]*((l) + t.size[3]*(m))))]
    #+end_src
  
    For example:
    
+** Set all elements
+   
+*** Vector
+
+    #+begin_src c :comments org :tangle (eval h_private_func)
+qmckl_vector
+qmckl_vector_set(qmckl_vector vector, double value);
+    #+end_src
+
+    #+begin_src c :comments org :tangle (eval c) :exports none
+qmckl_vector
+qmckl_vector_set(qmckl_vector vector, double value)
+{
+  for (int64_t i=0 ; i<vector.size ; ++i) {
+    qmckl_vec(vector, i) = value;
+  }
+  return vector;
+}
+    #+end_src
+
+*** Matrix
+
+    #+begin_src c :comments org :tangle (eval h_private_func)
+qmckl_matrix
+qmckl_matrix_set(qmckl_matrix matrix, double value);
+    #+end_src
+
+    #+begin_src c :comments org :tangle (eval c) :exports none
+qmckl_matrix
+qmckl_matrix_set(qmckl_matrix matrix, double value)
+{
+  qmckl_vector vector = qmckl_vector_of_matrix(matrix);
+  for (int64_t i=0 ; i<vector.size ; ++i) {
+    qmckl_vec(vector, i) = value;
+  }
+  return qmckl_matrix_of_vector(vector, matrix.size[0], matrix.size[1]);
+}
+    #+end_src
+    
+*** Tensor
+
+    #+begin_src c :comments org :tangle (eval h_private_func)
+qmckl_tensor
+qmckl_tensor_set(qmckl_tensor tensor, double value);
+    #+end_src
+
+    #+begin_src c :comments org :tangle (eval c) :exports none
+qmckl_tensor
+qmckl_tensor_set(qmckl_tensor tensor, double value)
+{
+  qmckl_vector vector = qmckl_vector_of_tensor(tensor);
+  for (int64_t i=0 ; i<vector.size ; ++i) {
+    qmckl_vec(vector, i) = value;
+  }
+  return qmckl_tensor_of_vector(vector, tensor.order, tensor.size);
+}
+    #+end_src
+    
 ** Copy to/from to ~double*~

    #+begin_src c :comments org :tangle (eval h_private_func)
@ -707,8 +765,6 @@ qmckl_tensor_of_double(const qmckl_context context,
 }
    #+end_src

-
-
 ** Tests                                                           :noexport:

    #+begin_src c :comments link :tangle (eval c_test) :exports none