Reduced OMP stack size

2025-04-28 19:34:46 +02:00 · 2023-09-13 13:23:16 +02:00 · 2023-09-13 13:23:16 +02:00 · 3db1765cdb
commit 3db1765cdb
parent 5303bf88b3
4 changed files with 19 additions and 7 deletions
--- a/configure.ac
+++ b/configure.ac
@ -35,7 +35,7 @@

 AC_PREREQ([2.69])

-AC_INIT([qmckl],[0.4.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html])
+AC_INIT([qmckl],[0.5.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html])
 AC_CONFIG_AUX_DIR([tools])
 AM_INIT_AUTOMAKE([subdir-objects color-tests parallel-tests silent-rules 1.11])

--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
@ -5766,8 +5766,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
    double exp_mat[prim_max] __attribute__((aligned(64)));
    double ce_mat[shell_max] __attribute__((aligned(64)));

-    int32_t coef_mat_sparse_idx[nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
-    double  coef_mat_sparse    [nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
+    int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = 
+      malloc(nucl_num * sizeof (*coef_mat_sparse_idx));
+
+    double  (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) =
+      malloc(nucl_num * sizeof (*coef_mat_sparse));
+
    for (int i=0 ; i<nucl_num ; ++i) {
      for (int j=0 ; j<shell_max; ++j) {
        int l=1;
@ -5935,6 +5939,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
        }
      }
    }
+    free(coef_mat_sparse_idx);
+    free(coef_mat_sparse);
  }

  return QMCKL_SUCCESS;
@ -6559,8 +6565,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
    double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
    double ce_mat[shell_max][8] __attribute__((aligned(64))) ;

-    int32_t coef_mat_sparse_idx[nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
-    double  coef_mat_sparse    [nucl_num][shell_max][prim_max+1] __attribute__((aligned(64)));
+    int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = 
+      malloc(nucl_num * sizeof (*coef_mat_sparse_idx));
+
+    double  (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) =
+      malloc(nucl_num * sizeof (*coef_mat_sparse));
+
    for (int i=0 ; i<nucl_num ; ++i) {
      for (int j=0 ; j<shell_max; ++j) {
        int l=1;
@ -6840,6 +6850,8 @@ qmckl_compute_ao_vgl_hpc_gaussian (
        }
      }
    }
+    free(coef_mat_sparse_idx);
+    free(coef_mat_sparse);
  }

  return QMCKL_SUCCESS;
--- a/org/qmckl_jastrow_champ.org
+++ b/org/qmckl_jastrow_champ.org
@ -2869,7 +2869,7 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_gl_hpc(
        factor_ee_gl_3[i] -= f*grad_c2*invdenom*2.0 * b_vector[1];


-        double xk[bord_num+1];
+        double xk[bord_num+1];  // Nvidia C 23.1-0 compiler crashes here (skylake avx512) nvc nvfoftran --enable-hpc
        xk[0] = 1.0;
        for (int k=1 ; k<= bord_num ; ++k) {
          xk[k] = xk[k-1]*x;
--- a/python/setup.py
+++ b/python/setup.py
@ -46,7 +46,7 @@ qmckl_module   =  Extension(name                 = "._" + MODULE_NAME,


 setup(name             = MODULE_NAME,
-      version          = "0.4.1",
+      version          = "0.5.1",
      author           = "TREX-CoE",
      author_email     = "posenitskiy@irsamc.ups-tlse.fr",
      description      = """Python API of the QMCkl library""",