1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 01:56:18 +01:00

Fixed HPC

This commit is contained in:
Anthony Scemama 2024-12-14 14:50:20 +01:00
parent bae64e75ab
commit 6ad40e0cf2

View File

@ -11298,6 +11298,7 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
const double* restrict dtmp_c_2amlknw = dtmp_c_0amlknw + elec_num2;
const double* restrict dtmp_c_3amknw = dtmp_c_0amknw + elec_num3;
const double* restrict dtmp_c_3amlknw = dtmp_c_0amlknw + elec_num3;
const double* restrict een_rescaled_n_gl_1amnw = een_rescaled_n_gl_0amnw + elec_num;
const double* restrict een_rescaled_n_gl_1amlnw = een_rescaled_n_gl_0amlnw + elec_num;
const double* restrict een_rescaled_n_gl_2amnw = een_rescaled_n_gl_0amnw + elec_num2;
@ -11738,18 +11739,18 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc (
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code
qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
const int64_t walk_num,
const int64_t elec_num,
const int64_t nucl_num,
const int64_t cord_num,
const int64_t dim_c_vector,
const double *c_vector_full,
const int64_t *lkpm_combined_index,
const double *tmp_c,
const double *dtmp_c,
const double *een_rescaled_n,
const double *een_rescaled_n_gl,
double* const factor_een_grad)
const int64_t walk_num,
const int64_t elec_num,
const int64_t nucl_num,
const int64_t cord_num,
const int64_t dim_c_vector,
const double *c_vector_full,
const int64_t *lkpm_combined_index,
const double *tmp_c,
const double *dtmp_c,
const double *een_rescaled_n,
const double *een_rescaled_n_gl,
double* const factor_een_grad)
{
int64_t info = QMCKL_SUCCESS;
@ -11771,13 +11772,12 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
return QMCKL_SUCCESS;
}
const size_t elec_num2 = elec_num << 1;
const size_t elec_num2 = elec_num + elec_num;
#ifdef HAVE_OPENMP
#pragma omp parallel for
#endif
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
bool touched = false;
double* const restrict factor_een_grad_0nw = &(factor_een_grad[elec_num*3*nw]);
for (size_t n = 0; n < (size_t) dim_c_vector; ++n) {
const size_t l = lkpm_combined_index[n];
@ -11786,11 +11786,11 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
const size_t en = elec_num*nucl_num;
const size_t len = l*en;
const size_t len4 = len << 2;
const size_t len4 = len*4;
const size_t cn = cord_num*nw;
const size_t c1 = cord_num+1;
const size_t addr0 = en*(m+c1*(k+cn));
const size_t addr1 = en*(m+cn);
const size_t addr1 = en*(m+c1*nw);
const double* restrict tmp_c_mkn = &(tmp_c[addr0]);
const double* restrict tmp_c_mlkn = tmp_c_mkn + len;
@ -11800,12 +11800,13 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4;
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]);
const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4;
for (size_t a = 0; a < (size_t) nucl_num; a++) {
double cn = c_vector_full[a+n*nucl_num];
if (cn == 0.0) continue;
const size_t ishift = elec_num*a;
const size_t ishift4 = ishift << 2;
const size_t ishift4 = ishift*4;
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift;
@ -11827,83 +11828,41 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
double* const restrict factor_een_grad_1nw = factor_een_grad_0nw + elec_num;
double* const restrict factor_een_grad_2nw = factor_een_grad_0nw + elec_num2;
if (touched) {
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_0nw[j] = factor_een_grad_0nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_1nw[j] = factor_een_grad_1nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_2nw[j] = factor_een_grad_2nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
}
} else {
touched = true;
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_0nw[j] = cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_1nw[j] = cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_2nw[j] = cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
}
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_0nw[j] = factor_een_grad_0nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_1nw[j] = factor_een_grad_1nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_grad_2nw[j] = factor_een_grad_2nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
}
}
}
if (!touched) {
memset(factor_een_grad_0nw, 0, elec_num*3*sizeof(double));
}
}
return info;
}