From e07b8bfa55e5d380ed5e47e85acf6b6c00d573c3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 20 Feb 2024 23:59:28 +0100 Subject: [PATCH] Avoid memset in Jastrow --- org/qmckl_jastrow_champ.org | 51 +++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/org/qmckl_jastrow_champ.org b/org/qmckl_jastrow_champ.org index 2eddedf..ca4455e 100644 --- a/org/qmckl_jastrow_champ.org +++ b/org/qmckl_jastrow_champ.org @@ -2968,7 +2968,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context, #endif for (int nw = 0; nw < walk_num; ++nw) { double xk[bord_num+1]; - memset(&(factor_ee_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double)); + bool touched = false; for (int j = 0; j < elec_num; ++j) { const double* dxj = &ee_distance_rescaled_gl[4*elec_num*(j+nw*elec_num)]; @@ -2999,13 +2999,20 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context, f *= 0.5; } - factor_ee_gl_0[i] = factor_ee_gl_0[i] + f*dx[0]; - factor_ee_gl_1[i] = factor_ee_gl_1[i] + f*dx[1]; - factor_ee_gl_2[i] = factor_ee_gl_2[i] + f*dx[2]; - factor_ee_gl_3[i] = factor_ee_gl_3[i] + f*dx[3]; + if (touched) { + factor_ee_gl_0[i] = factor_ee_gl_0[i] + f*dx[0]; + factor_ee_gl_1[i] = factor_ee_gl_1[i] + f*dx[1]; + factor_ee_gl_2[i] = factor_ee_gl_2[i] + f*dx[2]; + factor_ee_gl_3[i] = factor_ee_gl_3[i] + f*dx[3]; + } else { + touched = true; + factor_ee_gl_0[i] = f*dx[0]; + factor_ee_gl_1[i] = f*dx[1]; + factor_ee_gl_2[i] = f*dx[2]; + factor_ee_gl_3[i] = f*dx[3]; + } factor_ee_gl_3[i] = factor_ee_gl_3[i] - f*grad_c2*invdenom*2.0 * b_vector[1]; - xk[0] = 1.0; for (int k=1 ; k<= bord_num ; ++k) { xk[k] = xk[k-1]*x; @@ -3022,6 +3029,9 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context, } } } + if (!touched) { + memset(&(factor_ee_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double)); + } } return QMCKL_SUCCESS; @@ -4869,7 +4879,7 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context, #pragma omp parallel for #endif for (int64_t nw = 0; nw < walk_num; ++nw) { - memset(&(factor_en_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double)); + bool touched = false; for (int64_t a = 0; a < nucl_num; ++a) { const double* dxj = &en_distance_rescaled_gl[4*elec_num*(a+nw*nucl_num)]; @@ -4896,12 +4906,20 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context, double f = a_vec[0] * invdenom2; - factor_en_gl_0[i] = factor_en_gl_0[i] + f*dx[0]; - factor_en_gl_1[i] = factor_en_gl_1[i] + f*dx[1]; - factor_en_gl_2[i] = factor_en_gl_2[i] + f*dx[2]; - factor_en_gl_3[i] = factor_en_gl_3[i] + f*dx[3]; - factor_en_gl_3[i] = factor_en_gl_3[i] - f*grad_c2*invdenom*2.0 * a_vec[1]; + if (touched) { + factor_en_gl_0[i] = factor_en_gl_0[i] + f*dx[0]; + factor_en_gl_1[i] = factor_en_gl_1[i] + f*dx[1]; + factor_en_gl_2[i] = factor_en_gl_2[i] + f*dx[2]; + factor_en_gl_3[i] = factor_en_gl_3[i] + f*dx[3]; + } else { + touched = true; + factor_en_gl_0[i] = f*dx[0]; + factor_en_gl_1[i] = f*dx[1]; + factor_en_gl_2[i] = f*dx[2]; + factor_en_gl_3[i] = f*dx[3]; + } + factor_en_gl_3[i] = factor_en_gl_3[i] - f*grad_c2*invdenom*2.0 * a_vec[1]; double xk[aord_num+1]; xk[0] = 1.0; @@ -4920,6 +4938,9 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context, } } } + if (!touched) { + memset(&(factor_en_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double)); + } } return QMCKL_SUCCESS; @@ -6051,9 +6072,7 @@ integer function qmckl_compute_een_rescaled_e_doc_f( & allocate(een_rescaled_e_ij(elec_num * (elec_num - 1) / 2, cord_num + 1)) ! Prepare table of exponentiated distances raised to appropriate power - een_rescaled_e = 0.0d0 do nw = 1, walk_num - een_rescaled_e_ij = 0.0d0 een_rescaled_e_ij(:, 1) = 1.0d0 @@ -6191,8 +6210,6 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( #endif for (size_t nw = 0; nw < (size_t) walk_num; ++nw) { - memset(&een_rescaled_e[nw*(cord_num+1)*elec_num*elec_num],0,(cord_num+1)*elec_num*elec_num*sizeof(double)); - size_t kk = 0; for (size_t i = 0; i < (size_t) elec_num; ++i) { double* restrict ee1 = &een_rescaled_e_ij[kk + elec_pairs]; @@ -6231,7 +6248,9 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } } + double* restrict const een_rescaled_e_ = &(een_rescaled_e[nw*(cord_num+1)*e2]); + // prepare the actual een table #ifdef HAVE_OPENMP #pragma omp simd