Avoid memset in Jastrow

This commit is contained in:
Anthony Scemama 2024-02-20 23:59:28 +01:00
parent 41615ba14b
commit e07b8bfa55
1 changed files with 35 additions and 16 deletions

View File

@ -2968,7 +2968,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
#endif
for (int nw = 0; nw < walk_num; ++nw) {
double xk[bord_num+1];
memset(&(factor_ee_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double));
bool touched = false;
for (int j = 0; j < elec_num; ++j) {
const double* dxj = &ee_distance_rescaled_gl[4*elec_num*(j+nw*elec_num)];
@ -2999,13 +2999,20 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
f *= 0.5;
}
factor_ee_gl_0[i] = factor_ee_gl_0[i] + f*dx[0];
factor_ee_gl_1[i] = factor_ee_gl_1[i] + f*dx[1];
factor_ee_gl_2[i] = factor_ee_gl_2[i] + f*dx[2];
factor_ee_gl_3[i] = factor_ee_gl_3[i] + f*dx[3];
if (touched) {
factor_ee_gl_0[i] = factor_ee_gl_0[i] + f*dx[0];
factor_ee_gl_1[i] = factor_ee_gl_1[i] + f*dx[1];
factor_ee_gl_2[i] = factor_ee_gl_2[i] + f*dx[2];
factor_ee_gl_3[i] = factor_ee_gl_3[i] + f*dx[3];
} else {
touched = true;
factor_ee_gl_0[i] = f*dx[0];
factor_ee_gl_1[i] = f*dx[1];
factor_ee_gl_2[i] = f*dx[2];
factor_ee_gl_3[i] = f*dx[3];
}
factor_ee_gl_3[i] = factor_ee_gl_3[i] - f*grad_c2*invdenom*2.0 * b_vector[1];
xk[0] = 1.0;
for (int k=1 ; k<= bord_num ; ++k) {
xk[k] = xk[k-1]*x;
@ -3022,6 +3029,9 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
}
}
}
if (!touched) {
memset(&(factor_ee_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double));
}
}
return QMCKL_SUCCESS;
@ -4869,7 +4879,7 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context,
#pragma omp parallel for
#endif
for (int64_t nw = 0; nw < walk_num; ++nw) {
memset(&(factor_en_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double));
bool touched = false;
for (int64_t a = 0; a < nucl_num; ++a) {
const double* dxj = &en_distance_rescaled_gl[4*elec_num*(a+nw*nucl_num)];
@ -4896,12 +4906,20 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context,
double f = a_vec[0] * invdenom2;
factor_en_gl_0[i] = factor_en_gl_0[i] + f*dx[0];
factor_en_gl_1[i] = factor_en_gl_1[i] + f*dx[1];
factor_en_gl_2[i] = factor_en_gl_2[i] + f*dx[2];
factor_en_gl_3[i] = factor_en_gl_3[i] + f*dx[3];
factor_en_gl_3[i] = factor_en_gl_3[i] - f*grad_c2*invdenom*2.0 * a_vec[1];
if (touched) {
factor_en_gl_0[i] = factor_en_gl_0[i] + f*dx[0];
factor_en_gl_1[i] = factor_en_gl_1[i] + f*dx[1];
factor_en_gl_2[i] = factor_en_gl_2[i] + f*dx[2];
factor_en_gl_3[i] = factor_en_gl_3[i] + f*dx[3];
} else {
touched = true;
factor_en_gl_0[i] = f*dx[0];
factor_en_gl_1[i] = f*dx[1];
factor_en_gl_2[i] = f*dx[2];
factor_en_gl_3[i] = f*dx[3];
}
factor_en_gl_3[i] = factor_en_gl_3[i] - f*grad_c2*invdenom*2.0 * a_vec[1];
double xk[aord_num+1];
xk[0] = 1.0;
@ -4920,6 +4938,9 @@ qmckl_compute_jastrow_champ_factor_en_gl_hpc (const qmckl_context context,
}
}
}
if (!touched) {
memset(&(factor_en_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double));
}
}
return QMCKL_SUCCESS;
@ -6051,9 +6072,7 @@ integer function qmckl_compute_een_rescaled_e_doc_f( &
allocate(een_rescaled_e_ij(elec_num * (elec_num - 1) / 2, cord_num + 1))
! Prepare table of exponentiated distances raised to appropriate power
een_rescaled_e = 0.0d0
do nw = 1, walk_num
een_rescaled_e_ij = 0.0d0
een_rescaled_e_ij(:, 1) = 1.0d0
@ -6191,8 +6210,6 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
#endif
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
memset(&een_rescaled_e[nw*(cord_num+1)*elec_num*elec_num],0,(cord_num+1)*elec_num*elec_num*sizeof(double));
size_t kk = 0;
for (size_t i = 0; i < (size_t) elec_num; ++i) {
double* restrict ee1 = &een_rescaled_e_ij[kk + elec_pairs];
@ -6231,7 +6248,9 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
}
}
double* restrict const een_rescaled_e_ = &(een_rescaled_e[nw*(cord_num+1)*e2]);
// prepare the actual een table
#ifdef HAVE_OPENMP
#pragma omp simd