mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 12:23:56 +01:00
Avoid memset in Jastrow
This commit is contained in:
parent
2f0ca9f674
commit
41615ba14b
@ -6219,6 +6219,9 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
|
||||
for (size_t l = 2; l < (size_t) (cord_num+1); ++l) {
|
||||
double* restrict ee1 = &een_rescaled_e_ij[l*elec_pairs];
|
||||
const double* restrict ee2 = &een_rescaled_e_ij[(l-1)*elec_pairs];
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t k = 0; k < elec_pairs; ++k) {
|
||||
// een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2)
|
||||
|
||||
@ -10337,10 +10340,10 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
const size_t elec_num3 = elec_num * 3;
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp parallel for schedule(guided)
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
|
||||
memset(&factor_een_gl[elec_num*4*nw], 0, elec_num*4*sizeof(double));
|
||||
bool touched = false;
|
||||
double* const restrict factor_een_gl_0nw = &(factor_een_gl[elec_num*4*nw]);
|
||||
for (size_t n = 0; n < (size_t) dim_c_vector; ++n) {
|
||||
const size_t l = lkpm_combined_index[n];
|
||||
@ -10397,62 +10400,125 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
|
||||
double tmp3[elec_num];
|
||||
|
||||
if (touched) {
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
tmp3[j] =
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
tmp3[j] =
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
||||
}
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||
}
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
||||
tmp3[j]*2.0);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
touched = true;
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
||||
tmp3[j]*2.0);
|
||||
}
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_0nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
tmp3[j] =
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_1nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_2nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_3nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
||||
tmp3[j]*2.0);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!touched) {
|
||||
memset(factor_een_gl_0nw, 0, elec_num*4*sizeof(double));
|
||||
}
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user