mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 01:56:18 +01:00
Fixed HPC
This commit is contained in:
parent
bae64e75ab
commit
6ad40e0cf2
@ -11298,6 +11298,7 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
const double* restrict dtmp_c_2amlknw = dtmp_c_0amlknw + elec_num2;
|
||||
const double* restrict dtmp_c_3amknw = dtmp_c_0amknw + elec_num3;
|
||||
const double* restrict dtmp_c_3amlknw = dtmp_c_0amlknw + elec_num3;
|
||||
|
||||
const double* restrict een_rescaled_n_gl_1amnw = een_rescaled_n_gl_0amnw + elec_num;
|
||||
const double* restrict een_rescaled_n_gl_1amlnw = een_rescaled_n_gl_0amlnw + elec_num;
|
||||
const double* restrict een_rescaled_n_gl_2amnw = een_rescaled_n_gl_0amnw + elec_num2;
|
||||
@ -11738,18 +11739,18 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc (
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
qmckl_exit_code
|
||||
qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
|
||||
const int64_t walk_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t cord_num,
|
||||
const int64_t dim_c_vector,
|
||||
const double *c_vector_full,
|
||||
const int64_t *lkpm_combined_index,
|
||||
const double *tmp_c,
|
||||
const double *dtmp_c,
|
||||
const double *een_rescaled_n,
|
||||
const double *een_rescaled_n_gl,
|
||||
double* const factor_een_grad)
|
||||
const int64_t walk_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t cord_num,
|
||||
const int64_t dim_c_vector,
|
||||
const double *c_vector_full,
|
||||
const int64_t *lkpm_combined_index,
|
||||
const double *tmp_c,
|
||||
const double *dtmp_c,
|
||||
const double *een_rescaled_n,
|
||||
const double *een_rescaled_n_gl,
|
||||
double* const factor_een_grad)
|
||||
{
|
||||
|
||||
int64_t info = QMCKL_SUCCESS;
|
||||
@ -11771,13 +11772,12 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
const size_t elec_num2 = elec_num << 1;
|
||||
const size_t elec_num2 = elec_num + elec_num;
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
|
||||
bool touched = false;
|
||||
double* const restrict factor_een_grad_0nw = &(factor_een_grad[elec_num*3*nw]);
|
||||
for (size_t n = 0; n < (size_t) dim_c_vector; ++n) {
|
||||
const size_t l = lkpm_combined_index[n];
|
||||
@ -11786,11 +11786,11 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
|
||||
|
||||
const size_t en = elec_num*nucl_num;
|
||||
const size_t len = l*en;
|
||||
const size_t len4 = len << 2;
|
||||
const size_t len4 = len*4;
|
||||
const size_t cn = cord_num*nw;
|
||||
const size_t c1 = cord_num+1;
|
||||
const size_t addr0 = en*(m+c1*(k+cn));
|
||||
const size_t addr1 = en*(m+cn);
|
||||
const size_t addr1 = en*(m+c1*nw);
|
||||
|
||||
const double* restrict tmp_c_mkn = &(tmp_c[addr0]);
|
||||
const double* restrict tmp_c_mlkn = tmp_c_mkn + len;
|
||||
@ -11800,12 +11800,13 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
|
||||
const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4;
|
||||
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]);
|
||||
const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4;
|
||||
|
||||
for (size_t a = 0; a < (size_t) nucl_num; a++) {
|
||||
double cn = c_vector_full[a+n*nucl_num];
|
||||
if (cn == 0.0) continue;
|
||||
|
||||
const size_t ishift = elec_num*a;
|
||||
const size_t ishift4 = ishift << 2;
|
||||
const size_t ishift4 = ishift*4;
|
||||
|
||||
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
|
||||
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift;
|
||||
@ -11827,82 +11828,40 @@ qmckl_compute_jastrow_champ_factor_een_grad_hpc(const qmckl_context context,
|
||||
double* const restrict factor_een_grad_1nw = factor_een_grad_0nw + elec_num;
|
||||
double* const restrict factor_een_grad_2nw = factor_een_grad_0nw + elec_num2;
|
||||
|
||||
if (touched) {
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_0nw[j] = factor_een_grad_0nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_1nw[j] = factor_een_grad_1nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_2nw[j] = factor_een_grad_2nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
touched = true;
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_0nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_1nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_2nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_0nw[j] = factor_een_grad_0nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!touched) {
|
||||
memset(factor_een_grad_0nw, 0, elec_num*3*sizeof(double));
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_1nw[j] = factor_een_grad_1nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_grad_2nw[j] = factor_een_grad_2nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
return info;
|
||||
|
Loading…
Reference in New Issue
Block a user