1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 10:06:09 +01:00

Reduced OMP stack size

This commit is contained in:
Anthony Scemama 2023-09-13 13:23:16 +02:00
parent 5303bf88b3
commit 3db1765cdb
4 changed files with 19 additions and 7 deletions

View File

@ -35,7 +35,7 @@
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([qmckl],[0.4.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html]) AC_INIT([qmckl],[0.5.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html])
AC_CONFIG_AUX_DIR([tools]) AC_CONFIG_AUX_DIR([tools])
AM_INIT_AUTOMAKE([subdir-objects color-tests parallel-tests silent-rules 1.11]) AM_INIT_AUTOMAKE([subdir-objects color-tests parallel-tests silent-rules 1.11])

View File

@ -5766,8 +5766,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
double exp_mat[prim_max] __attribute__((aligned(64))); double exp_mat[prim_max] __attribute__((aligned(64)));
double ce_mat[shell_max] __attribute__((aligned(64))); double ce_mat[shell_max] __attribute__((aligned(64)));
int32_t coef_mat_sparse_idx[nucl_num][shell_max][prim_max+1] __attribute__((aligned(64))); int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
double coef_mat_sparse [nucl_num][shell_max][prim_max+1] __attribute__((aligned(64))); malloc(nucl_num * sizeof (*coef_mat_sparse_idx));
double (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) =
malloc(nucl_num * sizeof (*coef_mat_sparse));
for (int i=0 ; i<nucl_num ; ++i) { for (int i=0 ; i<nucl_num ; ++i) {
for (int j=0 ; j<shell_max; ++j) { for (int j=0 ; j<shell_max; ++j) {
int l=1; int l=1;
@ -5935,6 +5939,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
} }
} }
} }
free(coef_mat_sparse_idx);
free(coef_mat_sparse);
} }
return QMCKL_SUCCESS; return QMCKL_SUCCESS;
@ -6559,8 +6565,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
double exp_mat[prim_max][8] __attribute__((aligned(64))) ; double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
double ce_mat[shell_max][8] __attribute__((aligned(64))) ; double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
int32_t coef_mat_sparse_idx[nucl_num][shell_max][prim_max+1] __attribute__((aligned(64))); int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
double coef_mat_sparse [nucl_num][shell_max][prim_max+1] __attribute__((aligned(64))); malloc(nucl_num * sizeof (*coef_mat_sparse_idx));
double (*coef_mat_sparse)[shell_max][prim_max+1] __attribute__((aligned(64))) =
malloc(nucl_num * sizeof (*coef_mat_sparse));
for (int i=0 ; i<nucl_num ; ++i) { for (int i=0 ; i<nucl_num ; ++i) {
for (int j=0 ; j<shell_max; ++j) { for (int j=0 ; j<shell_max; ++j) {
int l=1; int l=1;
@ -6840,6 +6850,8 @@ qmckl_compute_ao_vgl_hpc_gaussian (
} }
} }
} }
free(coef_mat_sparse_idx);
free(coef_mat_sparse);
} }
return QMCKL_SUCCESS; return QMCKL_SUCCESS;

View File

@ -2869,7 +2869,7 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_gl_hpc(
factor_ee_gl_3[i] -= f*grad_c2*invdenom*2.0 * b_vector[1]; factor_ee_gl_3[i] -= f*grad_c2*invdenom*2.0 * b_vector[1];
double xk[bord_num+1]; double xk[bord_num+1]; // Nvidia C 23.1-0 compiler crashes here (skylake avx512) nvc nvfoftran --enable-hpc
xk[0] = 1.0; xk[0] = 1.0;
for (int k=1 ; k<= bord_num ; ++k) { for (int k=1 ; k<= bord_num ; ++k) {
xk[k] = xk[k-1]*x; xk[k] = xk[k-1]*x;

View File

@ -46,7 +46,7 @@ qmckl_module = Extension(name = "._" + MODULE_NAME,
setup(name = MODULE_NAME, setup(name = MODULE_NAME,
version = "0.4.1", version = "0.5.1",
author = "TREX-CoE", author = "TREX-CoE",
author_email = "posenitskiy@irsamc.ups-tlse.fr", author_email = "posenitskiy@irsamc.ups-tlse.fr",
description = """Python API of the QMCkl library""", description = """Python API of the QMCkl library""",