From 949cfb6f82cf56e44cf127a36e43b97c7d4d8bcd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 13 Feb 2024 16:51:13 +0100 Subject: [PATCH] Accelerated Jastrow (OpenMP) --- org/qmckl_jastrow_champ.org | 134 +++++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 54 deletions(-) diff --git a/org/qmckl_jastrow_champ.org b/org/qmckl_jastrow_champ.org index 4f36b9f..4b5686f 100644 --- a/org/qmckl_jastrow_champ.org +++ b/org/qmckl_jastrow_champ.org @@ -2481,8 +2481,12 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context, const int64_t dn_num = elec_num - up_num; const double fshift = 0.5 * (double) ((dn_num-1)*dn_num + (up_num-1)*up_num) * asymp_jasb[0] + (float) (up_num*dn_num) * asymp_jasb[1]; + +#ifdef HAVE_OPENMP + #pragma omp parallel +#endif for (int nw = 0; nw < walk_num; ++nw) { - factor_ee[nw] = 0.; + double result = 0.; size_t ishift = nw * elec_num * elec_num; @@ -2491,7 +2495,7 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context, for (int j = 0; j < elec_num; ++j ) { const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]); for (int i = 0; i < j ; ++i) { - factor_ee[nw] = factor_ee[nw] + b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); + result = result + b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); } } @@ -2500,23 +2504,23 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context, for (int j = 0; j < up_num; ++j ) { const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]); for (int i = 0; i < j ; ++i) { - factor_ee[nw] = factor_ee[nw] + 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); + result = result + 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); } } for (int j = up_num ; j < elec_num; ++j ) { const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]); for (int i = 0; i < up_num; ++i) { - factor_ee[nw] = factor_ee[nw] + b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); + result = result + b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); } for (int i = up_num ; i < j ; ++i) { - factor_ee[nw] = factor_ee[nw] + 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); + result = result + 0.5 * b_vector[0]*xj[i] / (1. + b_vector[1]*xj[i]); } } } - factor_ee[nw] = factor_ee[nw] - fshift; + result = result - fshift; for (int j=0; j < elec_num; ++j ) { const double* xj = &(ee_distance_rescaled[j * elec_num + ishift]); @@ -2525,11 +2529,12 @@ qmckl_compute_jastrow_champ_factor_ee_hpc (const qmckl_context context, double xk = x; for (int k = 2; k <= bord_num; ++k) { xk *= x; - factor_ee[nw] = factor_ee[nw] + b_vector[k] * xk; + result = result + b_vector[k] * xk; } } } + factor_ee[nw] = result; } return QMCKL_SUCCESS; @@ -2958,8 +2963,11 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context, kf[k] = (double) k; } +#ifdef HAVE_OPENMP #pragma omp parallel for +#endif for (int nw = 0; nw < walk_num; ++nw) { + double xk[bord_num+1]; memset(&(factor_ee_gl[nw*4*elec_num]), 0, elec_num*4*sizeof(double)); for (int j = 0; j < elec_num; ++j) { @@ -2998,7 +3006,6 @@ qmckl_compute_jastrow_champ_factor_ee_gl_hpc(const qmckl_context context, factor_ee_gl_3[i] = factor_ee_gl_3[i] - f*grad_c2*invdenom*2.0 * b_vector[1]; - double xk[bord_num+1]; // Nvidia C 23.1-0 compiler crashes here (skylake avx512) nvc nvfoftran --enable-hpc xk[0] = 1.0; for (int k=1 ; k<= bord_num ; ++k) { xk[k] = xk[k-1]*x; @@ -3471,8 +3478,10 @@ qmckl_exit_code qmckl_compute_ee_distance_rescaled_hpc ( qmckl_exit_code result = QMCKL_SUCCESS; +#ifdef HAVE_OPENMP #pragma omp parallel { +#endif qmckl_exit_code rc = QMCKL_SUCCESS; #pragma omp for for (int64_t k=0 ; k