mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-04 13:14:02 +01:00
Vectorization
This commit is contained in:
parent
48b80f68f1
commit
2228ab23c5
@ -2943,7 +2943,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context,
|
||||
const double* b_vector,
|
||||
const double* ee_distance_rescaled,
|
||||
const double* ee_distance_rescaled_gl,
|
||||
const int32_t spin_independent,
|
||||
const int32_t spin_independent,
|
||||
double* const factor_ee_gl )
|
||||
{
|
||||
|
||||
@ -3041,7 +3041,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl (const qmckl_context context,
|
||||
const double* b_vector,
|
||||
const double* ee_distance_rescaled,
|
||||
const double* ee_distance_rescaled_gl,
|
||||
const int32_t spin_independent,
|
||||
const int32_t spin_independent,
|
||||
double* const factor_ee_gl );
|
||||
#+end_src
|
||||
|
||||
@ -3056,7 +3056,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc (const qmckl_context context,
|
||||
const double* b_vector,
|
||||
const double* ee_distance_rescaled,
|
||||
const double* ee_distance_rescaled_gl,
|
||||
const int32_t spin_independent,
|
||||
const int32_t spin_independent,
|
||||
double* const factor_ee_gl );
|
||||
|
||||
#+end_src
|
||||
@ -3071,7 +3071,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl_doc (const qmckl_context context,
|
||||
const double* b_vector,
|
||||
const double* ee_distance_rescaled,
|
||||
const double* ee_distance_rescaled_gl,
|
||||
const int32_t spin_independent,
|
||||
const int32_t spin_independent,
|
||||
double* const factor_ee_gl );
|
||||
#+end_src
|
||||
|
||||
@ -3086,7 +3086,7 @@ qmckl_compute_jastrow_champ_factor_ee_gl (const qmckl_context context,
|
||||
const double* b_vector,
|
||||
const double* ee_distance_rescaled,
|
||||
const double* ee_distance_rescaled_gl,
|
||||
const int32_t spin_independent,
|
||||
const int32_t spin_independent,
|
||||
double* const factor_ee_gl )
|
||||
{
|
||||
#ifdef HAVE_HPC
|
||||
@ -6219,7 +6219,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
|
||||
for (size_t k = 0; k < elec_pairs; ++k) {
|
||||
// een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2)
|
||||
|
||||
// een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] *
|
||||
// een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] *
|
||||
// een_rescaled_e_ij[k + elec_pairs];
|
||||
ee1[k] = ee2[k] * ee3[k];
|
||||
}
|
||||
@ -6709,46 +6709,75 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
|
||||
if (elec_num <= 0) return QMCKL_INVALID_ARG_3;
|
||||
if (cord_num < 0) return QMCKL_INVALID_ARG_4;
|
||||
|
||||
double* restrict elec_dist_gl = (double*) calloc(elec_num * 4 * elec_num, sizeof(double));
|
||||
assert (elec_dist_gl != NULL);
|
||||
double* restrict elec_dist_gl0 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||
double* restrict elec_dist_gl1 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||
double* restrict elec_dist_gl2 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||
double* restrict elec_dist_gl3 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||
assert (elec_dist_gl0 != NULL);
|
||||
assert (elec_dist_gl1 != NULL);
|
||||
assert (elec_dist_gl2 != NULL);
|
||||
assert (elec_dist_gl3 != NULL);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int64_t nw = 0; nw < walk_num; ++nw) {
|
||||
double rij_inv[elec_num];
|
||||
for (int64_t j = 0; j < elec_num; ++j) {
|
||||
for (int64_t i = 0; i < j ; ++i) {
|
||||
double rij_inv = 1.0 / ee_distance[i + j * elec_num + nw * elec_num * elec_num];
|
||||
for (int64_t ii = 0; ii < 3; ++ii) {
|
||||
elec_dist_gl[i + ii * elec_num + j * 4 * elec_num] =
|
||||
(coord_ee[i + ii * elec_num + nw * elec_num * 3] - coord_ee[j + ii * elec_num + nw * elec_num * 3]) * rij_inv;
|
||||
}
|
||||
elec_dist_gl[i + 3 * elec_num + j * 4 * elec_num] = 2.0 * rij_inv;
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i = 0; i < elec_num ; ++i) {
|
||||
rij_inv[i] = ee_distance[i + j * elec_num + nw * elec_num * elec_num] + 1.e-30;
|
||||
}
|
||||
for (int64_t i = j+1; i < elec_num; ++i) {
|
||||
double rij_inv = 1.0 / ee_distance[i + j * elec_num + nw * elec_num * elec_num];
|
||||
for (int64_t ii = 0; ii < 3; ++ii) {
|
||||
elec_dist_gl[i + ii * elec_num + j * 4 * elec_num] =
|
||||
(coord_ee[i + ii * elec_num + nw * elec_num * 3] - coord_ee[j + ii * elec_num + nw * elec_num * 3]) * rij_inv;
|
||||
}
|
||||
elec_dist_gl[i + 3 * elec_num + j * 4 * elec_num] = 2.0 * rij_inv;
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i = 0; i < elec_num ; ++i) {
|
||||
rij_inv[i] = 1.0/rij_inv[i];
|
||||
}
|
||||
rij_inv[j] = 0.;
|
||||
const double xj = coord_ee[j + nw * elec_num * 3];
|
||||
const double yj = coord_ee[j + elec_num + nw * elec_num * 3];
|
||||
const double zj = coord_ee[j + 2 * elec_num + nw * elec_num * 3];
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i = 0; i < elec_num ; ++i) {
|
||||
const double xi = coord_ee[i + nw * elec_num * 3];
|
||||
const double yi = coord_ee[i + elec_num + nw * elec_num * 3];
|
||||
const double zi = coord_ee[i + 2 * elec_num + nw * elec_num * 3];
|
||||
elec_dist_gl0[i + j * elec_num] = rij_inv[i] * (xi-xj);
|
||||
elec_dist_gl1[i + j * elec_num] = rij_inv[i] * (yi-yj);
|
||||
elec_dist_gl2[i + j * elec_num] = rij_inv[i] * (zi-zj);
|
||||
elec_dist_gl3[i + j * elec_num] = rij_inv[i] + rij_inv[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int64_t l = 1; l <= cord_num; ++l) {
|
||||
double kappa_l = - (double)l * rescale_factor_ee;
|
||||
for (int64_t j = 0; j < elec_num; ++j) {
|
||||
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
|
||||
const double* restrict ee = &een_rescaled_e [ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
|
||||
for (int64_t k = 0; k < 4; ++k) {
|
||||
for (int64_t i = 0; i < elec_num; ++i) {
|
||||
eegl[i + elec_num * k] = kappa_l * elec_dist_gl[i + k * elec_num + j * 4 * elec_num];
|
||||
}
|
||||
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
|
||||
const double* restrict ee = &een_rescaled_e [ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i = 0; i < elec_num; ++i) {
|
||||
eegl[i ] = kappa_l * elec_dist_gl0[i + j * elec_num];
|
||||
eegl[i + elec_num ] = kappa_l * elec_dist_gl1[i + j * elec_num];
|
||||
eegl[i + elec_num * 2] = kappa_l * elec_dist_gl2[i + j * elec_num];
|
||||
eegl[i + elec_num * 3] = kappa_l * elec_dist_gl3[i + j * elec_num];
|
||||
}
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i = 0; i < elec_num; ++i) {
|
||||
eegl[i + elec_num*3] = eegl[i + elec_num*3] +
|
||||
eegl[i] * eegl[i] +
|
||||
eegl[i + elec_num*1] * eegl[i + elec_num*1] +
|
||||
eegl[i + elec_num*2] * eegl[i + elec_num*2];
|
||||
}
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i = 0; i < elec_num; ++i) {
|
||||
eegl[i ] *= ee[i];
|
||||
eegl[i + elec_num * 1] *= ee[i];
|
||||
@ -6759,12 +6788,15 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
|
||||
}
|
||||
}
|
||||
|
||||
free(elec_dist_gl);
|
||||
free(elec_dist_gl0);
|
||||
free(elec_dist_gl1);
|
||||
free(elec_dist_gl2);
|
||||
free(elec_dist_gl3);
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||
qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl (
|
||||
const qmckl_context context,
|
||||
@ -6778,7 +6810,7 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl (
|
||||
double* const een_rescaled_e_gl )
|
||||
{
|
||||
#ifdef HAVE_HPC
|
||||
return qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc
|
||||
return qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc
|
||||
#else
|
||||
return qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_doc
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user