mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-04-30 04:15:00 +02:00
restrict
This commit is contained in:
parent
04a01add99
commit
fcacc6823a
@ -3352,8 +3352,8 @@ integer(qmckl_exit_code) function qmckl_compute_jastrow_champ_delta_p_gl_doc( &
|
||||
|
||||
do nw=1, walk_num
|
||||
do m=1, cord_num-1
|
||||
do j = 1, elec_num
|
||||
do k = 1, 4
|
||||
do k = 1, 4
|
||||
do j = 1, elec_num
|
||||
delta_e_gl(j,k) = een_rescaled_single_e_gl(k,j,m,nw) - een_rescaled_e_gl(num, k, j, m, nw)
|
||||
end do
|
||||
end do
|
||||
@ -3371,7 +3371,7 @@ integer(qmckl_exit_code) function qmckl_compute_jastrow_champ_delta_p_gl_doc( &
|
||||
cummu = 0.0d0
|
||||
do i = 1, elec_num
|
||||
|
||||
delta_p_gl(i,a,k,l,m,nw) = -een_rescaled_e_gl(i,k,num,m,nw) * een_re_n&
|
||||
delta_p_gl(i,a,k,l,m,nw) = -een_rescaled_e_gl(i,k,num,m,nw) * een_re_n &
|
||||
- een_rescaled_single_e_gl(k,i,m,nw) * een_re_single_n
|
||||
|
||||
cummu = cummu + delta_e_gl(i,k) * een_rescaled_n(i,a,l,nw)
|
||||
@ -3790,27 +3790,12 @@ integer(qmckl_exit_code) function qmckl_compute_jastrow_champ_factor_single_een_
|
||||
do a = 1, nucl_num
|
||||
cn = c_vector_full(a, n)
|
||||
if(cn == 0.d0) cycle
|
||||
!do i = 1, elec_num
|
||||
! delta_een_gl(i,kk,nw) = delta_een_gl(i,kk,nw) + ( &
|
||||
! delta_p_gl(i,a,kk,m ,k,nw) * een_rescaled_n(i,a,m+l,nw) + &
|
||||
! delta_p_gl(i,a,kk,m+l,k,nw) * een_rescaled_n(i,a,m ,nw) + &
|
||||
! delta_p(i,a,m ,k,nw) * een_rescaled_n_gl(i,kk,a,m+l,nw) + &
|
||||
! delta_p(i,a,m+l,k,nw) * een_rescaled_n_gl(i,kk,a,m ,nw) ) * cn
|
||||
!end do
|
||||
do i = 1, elec_num
|
||||
! Cache repeated accesses
|
||||
dpg1_m = delta_p_gl(i,a,kk,m ,k,nw)
|
||||
dpg1_ml = delta_p_gl(i,a,kk,m+l,k,nw)
|
||||
dp_m = delta_p(i,a,m ,k,nw)
|
||||
dp_ml = delta_p(i,a,m+l,k,nw)
|
||||
|
||||
een_r_m = een_rescaled_n(i,a,m ,nw)
|
||||
een_r_ml = een_rescaled_n(i,a,m+l,nw)
|
||||
een_r_gl_m = een_rescaled_n_gl(i,kk,a,m ,nw)
|
||||
een_r_gl_ml = een_rescaled_n_gl(i,kk,a,m+l,nw)
|
||||
|
||||
delta_een_gl(i,kk,nw) = delta_een_gl(i,kk,nw) + cn * &
|
||||
(dpg1_m * een_r_ml + dpg1_ml * een_r_m + dp_m * een_r_gl_ml + dp_ml * een_r_gl_m)
|
||||
delta_een_gl(i,kk,nw) = delta_een_gl(i,kk,nw) + ( &
|
||||
delta_p_gl(i,a,kk,m ,k,nw) * een_rescaled_n(i,a,m+l,nw) + &
|
||||
delta_p_gl(i,a,kk,m+l,k,nw) * een_rescaled_n(i,a,m ,nw) + &
|
||||
delta_p(i,a,m ,k,nw) * een_rescaled_n_gl(i,kk,a,m+l,nw) + &
|
||||
delta_p(i,a,m+l,k,nw) * een_rescaled_n_gl(i,kk,a,m ,nw) ) * cn
|
||||
end do
|
||||
|
||||
delta_een_gl(num,kk,nw) = delta_een_gl(num,kk,nw) + ( &
|
||||
@ -3856,17 +3841,17 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
|
||||
const int64_t nucl_num,
|
||||
const int64_t cord_num,
|
||||
const int64_t dim_c_vector,
|
||||
const double* c_vector_full,
|
||||
const int64_t* lkpm_combined_index,
|
||||
const double* tmp_c,
|
||||
const double* dtmp_c,
|
||||
const double* delta_p,
|
||||
const double* delta_p_gl,
|
||||
const double* een_rescaled_n,
|
||||
const double* een_rescaled_single_n,
|
||||
const double* een_rescaled_n_gl,
|
||||
const double* een_rescaled_single_n_gl,
|
||||
double* const delta_een_gl )
|
||||
const double* restrict c_vector_full,
|
||||
const int64_t* restrict lkpm_combined_index,
|
||||
const double* restrict tmp_c,
|
||||
const double* restrict dtmp_c,
|
||||
const double* restrict delta_p,
|
||||
const double* restrict delta_p_gl,
|
||||
const double* restrict een_rescaled_n,
|
||||
const double* restrict een_rescaled_single_n,
|
||||
const double* restrict een_rescaled_n_gl,
|
||||
const double* restrict een_rescaled_single_n_gl,
|
||||
double* restrict const delta_een_gl )
|
||||
{
|
||||
|
||||
|
||||
@ -3884,7 +3869,9 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int64_t nw=0 ; nw<walk_num ; nw++) {
|
||||
for (size_t i=0 ; i<4*elec_num ; ++i) {
|
||||
delta_een_gl[i+nw*4*elec_num] = 0.;
|
||||
@ -3905,21 +3892,24 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
|
||||
const int64_t m = lkpm_combined_index[n+3*dim_c_vector];
|
||||
|
||||
for (int64_t kk=0 ; kk<4 ; ++kk) {
|
||||
double* dgl = &delta_een_gl[elec_num*(kk+4*nw)];
|
||||
double* restrict dgl = &delta_een_gl[elec_num*(kk+4*nw)];
|
||||
|
||||
for (int64_t a=0 ; a<nucl_num ; ++a) {
|
||||
const double cn = c_vector_full[a+n*nucl_num];
|
||||
if (cn == 0.) continue;
|
||||
|
||||
const double* dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dp_m = &delta_p[elec_num*(a+nucl_num*(m+(cord_num+1)*(k+cord_num*nw)))];
|
||||
const double* dp_ml = &delta_p[elec_num*(a+nucl_num*(m+l+(cord_num+1)*(k+cord_num*nw)))];
|
||||
const double* een_r_m = &een_rescaled_n[elec_num*(a+nucl_num*(m+(cord_num+1)*nw))];
|
||||
const double* een_r_ml = &een_rescaled_n[elec_num*(a+nucl_num*(m+l+(cord_num+1)*nw))];
|
||||
const double* een_r_gl_m = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* een_r_gl_ml = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
const double* restrict dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(kk+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dp_m = &delta_p[elec_num*(a+nucl_num*(m+(cord_num+1)*(k+cord_num*nw)))];
|
||||
const double* restrict dp_ml = &delta_p[elec_num*(a+nucl_num*(m+l+(cord_num+1)*(k+cord_num*nw)))];
|
||||
const double* restrict een_r_m = &een_rescaled_n[elec_num*(a+nucl_num*(m+(cord_num+1)*nw))];
|
||||
const double* restrict een_r_ml = &een_rescaled_n[elec_num*(a+nucl_num*(m+l+(cord_num+1)*nw))];
|
||||
const double* restrict een_r_gl_m = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* restrict een_r_gl_ml = &een_rescaled_n_gl[elec_num*(kk+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i=0 ; i<elec_num ; ++i) {
|
||||
dgl[i] += cn * (dpg1_m[i] * een_r_ml[i] + dpg1_ml[i] * een_r_m[i] +
|
||||
dp_m[i] * een_r_gl_ml[i] + dp_ml[i] * een_r_gl_m[i]);
|
||||
@ -3944,21 +3934,24 @@ qmckl_compute_jastrow_champ_factor_single_een_gl_hpc (const qmckl_context contex
|
||||
const double cn = 2. * c_vector_full[a+n*nucl_num];
|
||||
if (cn == 0.) continue;
|
||||
|
||||
double* dgl4 = &delta_een_gl[elec_num*(3+4*nw)];
|
||||
double* restrict dgl4 = &delta_een_gl[elec_num*(3+4*nw)];
|
||||
|
||||
const double* dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dpg2_m = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dpg3_m = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dpg2_ml = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* dpg3_ml = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* een_r_gl1_m = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* een_r_gl2_m = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* een_r_gl3_m = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* een_r_gl1_ml = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
const double* een_r_gl2_ml = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
const double* een_r_gl3_ml = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
const double* restrict dpg1_m = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dpg2_m = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dpg3_m = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dpg1_ml = &delta_p_gl[elec_num*(a+nucl_num*(0+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dpg2_ml = &delta_p_gl[elec_num*(a+nucl_num*(1+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict dpg3_ml = &delta_p_gl[elec_num*(a+nucl_num*(2+4*(m+l+(cord_num+1)*(k+cord_num*nw))))];
|
||||
const double* restrict een_r_gl1_m = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* restrict een_r_gl2_m = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* restrict een_r_gl3_m = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+(cord_num+1)*nw)))];
|
||||
const double* restrict een_r_gl1_ml = &een_rescaled_n_gl[elec_num*(0+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
const double* restrict een_r_gl2_ml = &een_rescaled_n_gl[elec_num*(1+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
const double* restrict een_r_gl3_ml = &een_rescaled_n_gl[elec_num*(2+4*(a+nucl_num*(m+l+(cord_num+1)*nw)))];
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i=0 ; i<elec_num ; ++i) {
|
||||
dgl4[i] += (dpg1_m[i] * een_r_gl1_ml[i] + dpg1_ml[i] * een_r_gl1_m[i] +
|
||||
dpg2_m[i] * een_r_gl2_ml[i] + dpg2_ml[i] * een_r_gl2_m[i] +
|
||||
|
Loading…
x
Reference in New Issue
Block a user