diff --git a/org/qmckl_jastrow_champ.org b/org/qmckl_jastrow_champ.org index 4b5686f..24beab7 100644 --- a/org/qmckl_jastrow_champ.org +++ b/org/qmckl_jastrow_champ.org @@ -6579,8 +6579,9 @@ integer function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f( & endif ! Prepare table of exponentiated distances raised to appropriate power - een_rescaled_e_gl = 0.0d0 +!$OMP PARALLEL DO do nw = 1, walk_num + een_rescaled_e_gl(:,:,:,:,nw) = 0.d0 do j = 1, elec_num do i = 1, j-1 rij_inv = 1.0d0 / ee_distance(i, j, nw) @@ -6624,6 +6625,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f( & end do end do end do +!$OMP END PARALLEL DO end function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f #+end_src @@ -10177,7 +10179,7 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, const size_t elec_num3 = elec_num * 3; #ifdef HAVE_OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(guided) #endif for (size_t nw = 0; nw < (size_t) walk_num; ++nw) { memset(&factor_een_gl[elec_num*4*nw], 0, elec_num*4*sizeof(double)); @@ -10241,44 +10243,54 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context, #pragma omp simd #endif for (size_t j = 0; j < (size_t) elec_num; ++j) { - - const double v1 = - tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]; - - const double v2 = - tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]; - - const double v3 = - tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + - dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + - tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]; - + factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * + (tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + + dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]); tmp3[j] = dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] + - dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j] + + dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j]; + } + +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (size_t j = 0; j < (size_t) elec_num; ++j) { + factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * + (tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + + dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]); + tmp3[j] = tmp3[j] + dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] + - dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j] + + dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j]; + } + +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (size_t j = 0; j < (size_t) elec_num; ++j) { + factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * + (tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + + dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + + dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + + tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]); + tmp3[j] = tmp3[j] + dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] + - dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; + dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; + } - const double v4 = - tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + - dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + - dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (size_t j = 0; j < (size_t) elec_num; ++j) { + factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * + (tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + + dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + + dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] + - tmp3[j]*2.0; - - factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * v1; - factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * v2; - factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * v3; - factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * v4; + tmp3[j]*2.0); } }