Vectorization

This commit is contained in:
Anthony Scemama 2024-02-14 10:42:55 +01:00
parent 48b80f68f1
commit 2228ab23c5
1 changed files with 63 additions and 31 deletions

View File

@ -2943,7 +2943,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
const double* b_vector,
const double* ee_distance_rescaled,
const double* ee_distance_rescaled_gl,
const int32_t spin_independent,
const int32_t spin_independent,
double* const factor_ee_gl )
{
@ -3041,7 +3041,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl (const qmckl_context context,
const double* b_vector,
const double* ee_distance_rescaled,
const double* ee_distance_rescaled_gl,
const int32_t spin_independent,
const int32_t spin_independent,
double* const factor_ee_gl );
#+end_src
@ -3056,7 +3056,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc (const qmckl_context context,
const double* b_vector,
const double* ee_distance_rescaled,
const double* ee_distance_rescaled_gl,
const int32_t spin_independent,
const int32_t spin_independent,
double* const factor_ee_gl );
#+end_src
@ -3071,7 +3071,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_doc (const qmckl_context context,
const double* b_vector,
const double* ee_distance_rescaled,
const double* ee_distance_rescaled_gl,
const int32_t spin_independent,
const int32_t spin_independent,
double* const factor_ee_gl );
#+end_src
@ -3086,7 +3086,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl (const qmckl_context context,
const double* b_vector,
const double* ee_distance_rescaled,
const double* ee_distance_rescaled_gl,
const int32_t spin_independent,
const int32_t spin_independent,
double* const factor_ee_gl )
{
#ifdef HAVE_HPC
@ -6219,7 +6219,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
for (size_t k = 0; k < elec_pairs; ++k) {
// een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2)
// een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] *
// een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] *
// een_rescaled_e_ij[k + elec_pairs];
ee1[k] = ee2[k] * ee3[k];
}
@ -6709,46 +6709,75 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
if (elec_num <= 0) return QMCKL_INVALID_ARG_3;
if (cord_num < 0) return QMCKL_INVALID_ARG_4;
double* restrict elec_dist_gl = (double*) calloc(elec_num * 4 * elec_num, sizeof(double));
assert (elec_dist_gl != NULL);
double* restrict elec_dist_gl0 = (double*) calloc(elec_num * elec_num, sizeof(double));
double* restrict elec_dist_gl1 = (double*) calloc(elec_num * elec_num, sizeof(double));
double* restrict elec_dist_gl2 = (double*) calloc(elec_num * elec_num, sizeof(double));
double* restrict elec_dist_gl3 = (double*) calloc(elec_num * elec_num, sizeof(double));
assert (elec_dist_gl0 != NULL);
assert (elec_dist_gl1 != NULL);
assert (elec_dist_gl2 != NULL);
assert (elec_dist_gl3 != NULL);
#pragma omp parallel for
for (int64_t nw = 0; nw < walk_num; ++nw) {
double rij_inv[elec_num];
for (int64_t j = 0; j < elec_num; ++j) {
for (int64_t i = 0; i < j ; ++i) {
double rij_inv = 1.0 / ee_distance[i + j * elec_num + nw * elec_num * elec_num];
for (int64_t ii = 0; ii < 3; ++ii) {
elec_dist_gl[i + ii * elec_num + j * 4 * elec_num] =
(coord_ee[i + ii * elec_num + nw * elec_num * 3] - coord_ee[j + ii * elec_num + nw * elec_num * 3]) * rij_inv;
}
elec_dist_gl[i + 3 * elec_num + j * 4 * elec_num] = 2.0 * rij_inv;
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
rij_inv[i] = ee_distance[i + j * elec_num + nw * elec_num * elec_num] + 1.e-30;
}
for (int64_t i = j+1; i < elec_num; ++i) {
double rij_inv = 1.0 / ee_distance[i + j * elec_num + nw * elec_num * elec_num];
for (int64_t ii = 0; ii < 3; ++ii) {
elec_dist_gl[i + ii * elec_num + j * 4 * elec_num] =
(coord_ee[i + ii * elec_num + nw * elec_num * 3] - coord_ee[j + ii * elec_num + nw * elec_num * 3]) * rij_inv;
}
elec_dist_gl[i + 3 * elec_num + j * 4 * elec_num] = 2.0 * rij_inv;
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
rij_inv[i] = 1.0/rij_inv[i];
}
rij_inv[j] = 0.;
const double xj = coord_ee[j + nw * elec_num * 3];
const double yj = coord_ee[j + elec_num + nw * elec_num * 3];
const double zj = coord_ee[j + 2 * elec_num + nw * elec_num * 3];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
const double xi = coord_ee[i + nw * elec_num * 3];
const double yi = coord_ee[i + elec_num + nw * elec_num * 3];
const double zi = coord_ee[i + 2 * elec_num + nw * elec_num * 3];
elec_dist_gl0[i + j * elec_num] = rij_inv[i] * (xi-xj);
elec_dist_gl1[i + j * elec_num] = rij_inv[i] * (yi-yj);
elec_dist_gl2[i + j * elec_num] = rij_inv[i] * (zi-zj);
elec_dist_gl3[i + j * elec_num] = rij_inv[i] + rij_inv[i];
}
}
for (int64_t l = 1; l <= cord_num; ++l) {
double kappa_l = - (double)l * rescale_factor_ee;
for (int64_t j = 0; j < elec_num; ++j) {
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
const double* restrict ee = &een_rescaled_e [ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
for (int64_t k = 0; k < 4; ++k) {
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i + elec_num * k] = kappa_l * elec_dist_gl[i + k * elec_num + j * 4 * elec_num];
}
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
const double* restrict ee = &een_rescaled_e [ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i ] = kappa_l * elec_dist_gl0[i + j * elec_num];
eegl[i + elec_num ] = kappa_l * elec_dist_gl1[i + j * elec_num];
eegl[i + elec_num * 2] = kappa_l * elec_dist_gl2[i + j * elec_num];
eegl[i + elec_num * 3] = kappa_l * elec_dist_gl3[i + j * elec_num];
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i + elec_num*3] = eegl[i + elec_num*3] +
eegl[i] * eegl[i] +
eegl[i + elec_num*1] * eegl[i + elec_num*1] +
eegl[i + elec_num*2] * eegl[i + elec_num*2];
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i ] *= ee[i];
eegl[i + elec_num * 1] *= ee[i];
@ -6759,12 +6788,15 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
}
}
free(elec_dist_gl);
free(elec_dist_gl0);
free(elec_dist_gl1);
free(elec_dist_gl2);
free(elec_dist_gl3);
return QMCKL_SUCCESS;
}
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes
qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl (
const qmckl_context context,
@ -6778,7 +6810,7 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl (
double* const een_rescaled_e_gl )
{
#ifdef HAVE_HPC
return qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc
return qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc
#else
return qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_doc
#endif