mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-05 11:00:36 +01:00
Vectorization
This commit is contained in:
parent
48b80f68f1
commit
2228ab23c5
@ -6709,27 +6709,46 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
|
|||||||
if (elec_num <= 0) return QMCKL_INVALID_ARG_3;
|
if (elec_num <= 0) return QMCKL_INVALID_ARG_3;
|
||||||
if (cord_num < 0) return QMCKL_INVALID_ARG_4;
|
if (cord_num < 0) return QMCKL_INVALID_ARG_4;
|
||||||
|
|
||||||
double* restrict elec_dist_gl = (double*) calloc(elec_num * 4 * elec_num, sizeof(double));
|
double* restrict elec_dist_gl0 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||||
assert (elec_dist_gl != NULL);
|
double* restrict elec_dist_gl1 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||||
|
double* restrict elec_dist_gl2 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||||
|
double* restrict elec_dist_gl3 = (double*) calloc(elec_num * elec_num, sizeof(double));
|
||||||
|
assert (elec_dist_gl0 != NULL);
|
||||||
|
assert (elec_dist_gl1 != NULL);
|
||||||
|
assert (elec_dist_gl2 != NULL);
|
||||||
|
assert (elec_dist_gl3 != NULL);
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int64_t nw = 0; nw < walk_num; ++nw) {
|
for (int64_t nw = 0; nw < walk_num; ++nw) {
|
||||||
|
double rij_inv[elec_num];
|
||||||
for (int64_t j = 0; j < elec_num; ++j) {
|
for (int64_t j = 0; j < elec_num; ++j) {
|
||||||
for (int64_t i = 0; i < j ; ++i) {
|
#ifdef HAVE_OPENMP
|
||||||
double rij_inv = 1.0 / ee_distance[i + j * elec_num + nw * elec_num * elec_num];
|
#pragma omp simd
|
||||||
for (int64_t ii = 0; ii < 3; ++ii) {
|
#endif
|
||||||
elec_dist_gl[i + ii * elec_num + j * 4 * elec_num] =
|
for (int64_t i = 0; i < elec_num ; ++i) {
|
||||||
(coord_ee[i + ii * elec_num + nw * elec_num * 3] - coord_ee[j + ii * elec_num + nw * elec_num * 3]) * rij_inv;
|
rij_inv[i] = ee_distance[i + j * elec_num + nw * elec_num * elec_num] + 1.e-30;
|
||||||
}
|
|
||||||
elec_dist_gl[i + 3 * elec_num + j * 4 * elec_num] = 2.0 * rij_inv;
|
|
||||||
}
|
}
|
||||||
for (int64_t i = j+1; i < elec_num; ++i) {
|
#ifdef HAVE_OPENMP
|
||||||
double rij_inv = 1.0 / ee_distance[i + j * elec_num + nw * elec_num * elec_num];
|
#pragma omp simd
|
||||||
for (int64_t ii = 0; ii < 3; ++ii) {
|
#endif
|
||||||
elec_dist_gl[i + ii * elec_num + j * 4 * elec_num] =
|
for (int64_t i = 0; i < elec_num ; ++i) {
|
||||||
(coord_ee[i + ii * elec_num + nw * elec_num * 3] - coord_ee[j + ii * elec_num + nw * elec_num * 3]) * rij_inv;
|
rij_inv[i] = 1.0/rij_inv[i];
|
||||||
}
|
}
|
||||||
elec_dist_gl[i + 3 * elec_num + j * 4 * elec_num] = 2.0 * rij_inv;
|
rij_inv[j] = 0.;
|
||||||
|
const double xj = coord_ee[j + nw * elec_num * 3];
|
||||||
|
const double yj = coord_ee[j + elec_num + nw * elec_num * 3];
|
||||||
|
const double zj = coord_ee[j + 2 * elec_num + nw * elec_num * 3];
|
||||||
|
#ifdef HAVE_OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
|
for (int64_t i = 0; i < elec_num ; ++i) {
|
||||||
|
const double xi = coord_ee[i + nw * elec_num * 3];
|
||||||
|
const double yi = coord_ee[i + elec_num + nw * elec_num * 3];
|
||||||
|
const double zi = coord_ee[i + 2 * elec_num + nw * elec_num * 3];
|
||||||
|
elec_dist_gl0[i + j * elec_num] = rij_inv[i] * (xi-xj);
|
||||||
|
elec_dist_gl1[i + j * elec_num] = rij_inv[i] * (yi-yj);
|
||||||
|
elec_dist_gl2[i + j * elec_num] = rij_inv[i] * (zi-zj);
|
||||||
|
elec_dist_gl3[i + j * elec_num] = rij_inv[i] + rij_inv[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6738,17 +6757,27 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
|
|||||||
for (int64_t j = 0; j < elec_num; ++j) {
|
for (int64_t j = 0; j < elec_num; ++j) {
|
||||||
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
|
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
|
||||||
const double* restrict ee = &een_rescaled_e [ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
|
const double* restrict ee = &een_rescaled_e [ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
|
||||||
for (int64_t k = 0; k < 4; ++k) {
|
#ifdef HAVE_OPENMP
|
||||||
for (int64_t i = 0; i < elec_num; ++i) {
|
#pragma omp simd
|
||||||
eegl[i + elec_num * k] = kappa_l * elec_dist_gl[i + k * elec_num + j * 4 * elec_num];
|
#endif
|
||||||
}
|
for (int64_t i = 0; i < elec_num; ++i) {
|
||||||
|
eegl[i ] = kappa_l * elec_dist_gl0[i + j * elec_num];
|
||||||
|
eegl[i + elec_num ] = kappa_l * elec_dist_gl1[i + j * elec_num];
|
||||||
|
eegl[i + elec_num * 2] = kappa_l * elec_dist_gl2[i + j * elec_num];
|
||||||
|
eegl[i + elec_num * 3] = kappa_l * elec_dist_gl3[i + j * elec_num];
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
for (int64_t i = 0; i < elec_num; ++i) {
|
for (int64_t i = 0; i < elec_num; ++i) {
|
||||||
eegl[i + elec_num*3] = eegl[i + elec_num*3] +
|
eegl[i + elec_num*3] = eegl[i + elec_num*3] +
|
||||||
eegl[i] * eegl[i] +
|
eegl[i] * eegl[i] +
|
||||||
eegl[i + elec_num*1] * eegl[i + elec_num*1] +
|
eegl[i + elec_num*1] * eegl[i + elec_num*1] +
|
||||||
eegl[i + elec_num*2] * eegl[i + elec_num*2];
|
eegl[i + elec_num*2] * eegl[i + elec_num*2];
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
for (int64_t i = 0; i < elec_num; ++i) {
|
for (int64_t i = 0; i < elec_num; ++i) {
|
||||||
eegl[i ] *= ee[i];
|
eegl[i ] *= ee[i];
|
||||||
eegl[i + elec_num * 1] *= ee[i];
|
eegl[i + elec_num * 1] *= ee[i];
|
||||||
@ -6759,7 +6788,10 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free(elec_dist_gl);
|
free(elec_dist_gl0);
|
||||||
|
free(elec_dist_gl1);
|
||||||
|
free(elec_dist_gl2);
|
||||||
|
free(elec_dist_gl3);
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user