1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 12:23:56 +01:00

OpenMP in Fortran function

This commit is contained in:
Anthony Scemama 2024-02-13 17:21:53 +01:00
parent 949cfb6f82
commit 24e3f8dd11

View File

@ -6579,8 +6579,9 @@ integer function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f( &
endif endif
! Prepare table of exponentiated distances raised to appropriate power ! Prepare table of exponentiated distances raised to appropriate power
een_rescaled_e_gl = 0.0d0 !$OMP PARALLEL DO
do nw = 1, walk_num do nw = 1, walk_num
een_rescaled_e_gl(:,:,:,:,nw) = 0.d0
do j = 1, elec_num do j = 1, elec_num
do i = 1, j-1 do i = 1, j-1
rij_inv = 1.0d0 / ee_distance(i, j, nw) rij_inv = 1.0d0 / ee_distance(i, j, nw)
@ -6624,6 +6625,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f( &
end do end do
end do end do
end do end do
!$OMP END PARALLEL DO
end function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f end function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f
#+end_src #+end_src
@ -10177,7 +10179,7 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
const size_t elec_num3 = elec_num * 3; const size_t elec_num3 = elec_num * 3;
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp parallel for #pragma omp parallel for schedule(guided)
#endif #endif
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) { for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
memset(&factor_een_gl[elec_num*4*nw], 0, elec_num*4*sizeof(double)); memset(&factor_een_gl[elec_num*4*nw], 0, elec_num*4*sizeof(double));
@ -10241,44 +10243,54 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
const double v1 = (tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j];
const double v2 =
tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j];
const double v3 =
tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j];
tmp3[j] = tmp3[j] =
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] + dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j] + dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
tmp3[j] = tmp3[j] +
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] + dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j] + dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
tmp3[j] = tmp3[j] +
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] + dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
}
const double v4 = #ifdef HAVE_OPENMP
tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + #pragma omp simd
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + #endif
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] + tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
tmp3[j]*2.0; tmp3[j]*2.0);
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * v1;
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * v2;
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * v3;
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * v4;
} }
} }