1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 20:36:01 +01:00

Fixed HPC Jastrow

This commit is contained in:
Anthony Scemama 2024-12-13 17:46:06 +01:00
parent fb86c01008
commit 2402b064b2

View File

@ -10436,20 +10436,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een(qmckl_context context)
ctx->jastrow_champ.factor_een = factor_een; ctx->jastrow_champ.factor_een = factor_een;
} }
/*
rc = qmckl_compute_jastrow_champ_factor_een_naive(context,
ctx->electron.walker.num,
ctx->electron.num,
ctx->nucleus.num,
ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.een_rescaled_e,
ctx->jastrow_champ.een_rescaled_n,
ctx->jastrow_champ.factor_een);
*/
rc = qmckl_compute_jastrow_champ_factor_een(context, rc = qmckl_compute_jastrow_champ_factor_een(context,
ctx->electron.walker.num, ctx->electron.walker.num,
ctx->electron.num, ctx->electron.num,
@ -10522,44 +10508,16 @@ integer function qmckl_compute_jastrow_champ_factor_een_naive_f( &
if (cord_num < 0) info = QMCKL_INVALID_ARG_5 if (cord_num < 0) info = QMCKL_INVALID_ARG_5
if (info /= QMCKL_SUCCESS) return if (info /= QMCKL_SUCCESS) return
! do nw =1, walk_num
! factor_een(nw) = 0.0d0
! do n = 1, dim_c_vector
! l = lkpm_combined_index(n, 1)
! k = lkpm_combined_index(n, 2)
! p = lkpm_combined_index(n, 3)
! m = lkpm_combined_index(n, 4)
! do a = 1, nucl_num
! accu2 = 0.0d0
! cn = c_vector_full(a, n)
! do j = 1, elec_num
! accu = 0.0d0
! do i = 1, elec_num
! accu = accu + een_rescaled_e(i,j,k,nw) * &
! een_rescaled_n(i,a,m,nw)
! end do
! accu2 = accu2 + accu * een_rescaled_n(j,a,m + l,nw)
! end do
! factor_een(nw) = factor_een(nw) + accu2 * cn
! end do
! end do
! end do
do nw =1, walk_num do nw =1, walk_num
factor_een(nw) = 0.d0 factor_een(nw) = 0.d0
do n = 1, dim_c_vector do n = 1, dim_c_vector
l = lkpm_combined_index(n, 1) l = lkpm_combined_index(n, 1)
k = lkpm_combined_index(n, 2) k = lkpm_combined_index(n, 2)
p = lkpm_combined_index(n, 3)
m = lkpm_combined_index(n, 4) m = lkpm_combined_index(n, 4)
do a = 1, nucl_num do a = 1, nucl_num
accu2 = 0.0d0 accu2 = 0.0d0
cn = c_vector_full(a, n) cn = c_vector_full(a, n)
print *, a, l, k, p, cn
do j = 1, elec_num do j = 1, elec_num
accu = 0.0d0 accu = 0.0d0
do i = 1, j-1 do i = 1, j-1
@ -10683,7 +10641,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( &
double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num)
double precision , intent(out) :: factor_een(walk_num) double precision , intent(out) :: factor_een(walk_num)
integer*8 :: i, a, j, l, k, p, m, n, nw integer*8 :: i, a, j, l, k, m, n, nw
double precision :: accu, accu2, cn double precision :: accu, accu2, cn
info = QMCKL_SUCCESS info = QMCKL_SUCCESS
@ -10703,7 +10661,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( &
do n = 1, dim_c_vector do n = 1, dim_c_vector
l = lkpm_combined_index(n, 1) l = lkpm_combined_index(n, 1)
k = lkpm_combined_index(n, 2) k = lkpm_combined_index(n, 2)
p = lkpm_combined_index(n, 3)
m = lkpm_combined_index(n, 4) m = lkpm_combined_index(n, 4)
do a = 1, nucl_num do a = 1, nucl_num
@ -10872,6 +10829,32 @@ double factor_een[walk_num];
rc = qmckl_get_jastrow_champ_factor_een(context, &(factor_een[0]),walk_num); rc = qmckl_get_jastrow_champ_factor_een(context, &(factor_een[0]),walk_num);
assert(fabs(factor_een[0] + 0.382580260174321) < 1e-12); assert(fabs(factor_een[0] + 0.382580260174321) < 1e-12);
{
double factor_een_naive[walk_num];
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
assert (ctx != NULL);
rc = qmckl_compute_jastrow_champ_factor_een_naive(context,
ctx->electron.walker.num,
ctx->electron.num,
ctx->nucleus.num,
ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.een_rescaled_e,
ctx->jastrow_champ.een_rescaled_n,
factor_een_naive);
for (int64_t i = 0; i < walk_num; i++) {
if (fabs(factor_een[i] - factor_een_naive[i]) > 1e-12) {
printf("factor_een[%ld] = %e\n", i, factor_een[i]);
printf("factor_een_naive[%ld] = %e\n", i, factor_een_naive[i]);
}
assert(fabs(factor_een[i] - factor_een_naive[i]) < 1e-12);
}
}
#+end_src #+end_src
*** Electron-electron-nucleus Jastrow $f_{een}$ derivative *** Electron-electron-nucleus Jastrow $f_{een}$ derivative
@ -11030,22 +11013,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een_gl(qmckl_context context)
ctx->jastrow_champ.factor_een_gl = factor_een_gl; ctx->jastrow_champ.factor_een_gl = factor_een_gl;
} }
/*
rc = qmckl_compute_jastrow_champ_factor_een_gl_naive (context,
ctx->electron.walker.num,
ctx->electron.num,
ctx->nucleus.num,
ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.een_rescaled_e,
ctx->jastrow_champ.een_rescaled_n,
ctx->jastrow_champ.een_rescaled_e_gl,
ctx->jastrow_champ.een_rescaled_n_gl,
ctx->jastrow_champ.factor_een_gl);
,*/
rc = qmckl_compute_jastrow_champ_factor_een_gl(context, rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
ctx->electron.walker.num, ctx->electron.walker.num,
ctx->electron.num, ctx->electron.num,
@ -11113,7 +11080,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( &
double precision , intent(in) :: een_rescaled_n_gl(elec_num, 4, nucl_num, 0:cord_num, walk_num) double precision , intent(in) :: een_rescaled_n_gl(elec_num, 4, nucl_num, 0:cord_num, walk_num)
double precision , intent(out) :: factor_een_gl(elec_num, 4, walk_num) double precision , intent(out) :: factor_een_gl(elec_num, 4, walk_num)
integer*8 :: i, a, j, l, k, p, m, n, nw integer*8 :: i, a, j, l, k, m, n, nw
double precision :: accu, accu2, cn double precision :: accu, accu2, cn
double precision :: daccu(1:4), daccu2(1:4) double precision :: daccu(1:4), daccu2(1:4)
@ -11132,7 +11099,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( &
do n = 1, dim_c_vector do n = 1, dim_c_vector
l = lkpm_combined_index(n, 1) l = lkpm_combined_index(n, 1)
k = lkpm_combined_index(n, 2) k = lkpm_combined_index(n, 2)
p = lkpm_combined_index(n, 3)
m = lkpm_combined_index(n, 4) m = lkpm_combined_index(n, 4)
do a = 1, nucl_num do a = 1, nucl_num
@ -11303,11 +11269,13 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
if (cord_num < 0) info = QMCKL_INVALID_ARG_5 if (cord_num < 0) info = QMCKL_INVALID_ARG_5
if (info /= QMCKL_SUCCESS) return if (info /= QMCKL_SUCCESS) return
factor_een_gl = 0.0d0 if (cord_num == 0) then
factor_een_gl = 0.0d0
if (cord_num == 0) return return
end if
do nw =1, walk_num do nw =1, walk_num
factor_een_gl(:,:,nw) = 0.0d0
do n = 1, dim_c_vector do n = 1, dim_c_vector
l = lkpm_combined_index(n, 1) l = lkpm_combined_index(n, 1)
k = lkpm_combined_index(n, 2) k = lkpm_combined_index(n, 2)
@ -11319,24 +11287,24 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
do ii = 1, 4 do ii = 1, 4
do j = 1, elec_num do j = 1, elec_num
factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( & factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( &
tmp_c(j,a,m,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + & tmp_c (j, a,m ,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + &
(dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & tmp_c (j, a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m ,nw) + &
(dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & dtmp_c(j,ii,a,m ,k,nw) * een_rescaled_n (j, a,m+l,nw) + &
tmp_c(j,a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m,nw) & dtmp_c(j,ii,a,m+l,k,nw) * een_rescaled_n (j, a,m ,nw) &
) * cn ) * cn
end do end do
end do end do
cn = cn + cn cn = cn + cn
do j = 1, elec_num do j = 1, elec_num
factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( & factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( &
(dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_gl(j,1,a,m+l,nw) + & dtmp_c(j,1,a,m ,k,nw) * een_rescaled_n_gl(j,1,a,m+l,nw) + &
(dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_gl(j,2,a,m+l,nw) + & dtmp_c(j,2,a,m ,k,nw) * een_rescaled_n_gl(j,2,a,m+l,nw) + &
(dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_gl(j,3,a,m+l,nw) + & dtmp_c(j,3,a,m ,k,nw) * een_rescaled_n_gl(j,3,a,m+l,nw) + &
(dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_gl(j,1,a,m ,nw) + & dtmp_c(j,1,a,m+l,k,nw) * een_rescaled_n_gl(j,1,a,m ,nw) + &
(dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_gl(j,2,a,m ,nw) + & dtmp_c(j,2,a,m+l,k,nw) * een_rescaled_n_gl(j,2,a,m ,nw) + &
(dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_gl(j,3,a,m ,nw) & dtmp_c(j,3,a,m+l,k,nw) * een_rescaled_n_gl(j,3,a,m ,nw) &
) * cn ) * cn
end do end do
end do end do
@ -11528,8 +11496,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
return QMCKL_SUCCESS; return QMCKL_SUCCESS;
} }
const size_t elec_num2 = elec_num << 1; const size_t elec_num2 = elec_num + elec_num;
const size_t elec_num3 = elec_num * 3; const size_t elec_num3 = elec_num2 + elec_num;
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp parallel for #pragma omp parallel for
@ -11544,34 +11512,35 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
const size_t en = elec_num*nucl_num; const size_t en = elec_num*nucl_num;
const size_t len = l*en; const size_t len = l*en;
const size_t len4 = len << 2; const size_t len4 = len*4;
const size_t cn = cord_num*nw;
const size_t c1 = cord_num+1; const size_t c1 = cord_num+1;
const size_t cn = cord_num*nw;
const size_t addr0 = en*(m+c1*(k+cn)); const size_t addr0 = en*(m+c1*(k+cn));
const size_t addr1 = en*(m+cn); const size_t addr1 = en*(m+c1*nw);
const double* restrict tmp_c_mkn = &(tmp_c[addr0]); const double* restrict tmp_c_mkn = &(tmp_c[addr0]);
const double* restrict tmp_c_mlkn = tmp_c_mkn + len; const double* restrict tmp_c_mlkn = tmp_c_mkn + len;
const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]); const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]);
const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len; const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len;
const double* restrict dtmp_c_mknw = &(dtmp_c[addr0 << 2]); const double* restrict dtmp_c_mknw = &(dtmp_c[addr0*4]);
const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4; const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4;
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]); const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1*4]); // ?
const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4; const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4;
for (size_t a = 0; a < (size_t) nucl_num; a++) { for (size_t a = 0; a < (size_t) nucl_num; a++) {
double cn = c_vector_full[a+n*nucl_num]; double cn = c_vector_full[a+n*nucl_num];
if (cn == 0.0) continue; if (cn == 0.0) continue;
const size_t ishift = elec_num*a; const size_t ishift = elec_num*a;
const size_t ishift4 = ishift << 2; const size_t ishift4 = ishift*4;
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift;
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift; const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift; const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift;
const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift;
const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift; const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift;
const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4; const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4;
const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4; const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4;
const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4; const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4;
const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4; const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4;
const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num; const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num;
@ -11597,53 +11566,59 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * (
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]); tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j] );
tmp3[j] = tmp3[j] =
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] + dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j]; dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
} }
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * (
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]); tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);
tmp3[j] = tmp3[j] + tmp3[j] = tmp3[j] +
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] + dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j]; dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
} }
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * (
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]); tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);
tmp3[j] = tmp3[j] + tmp3[j] = tmp3[j] +
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] + dtmp_c_2amknw [j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw [j];
} }
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * (
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] + tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
tmp3[j]*2.0); tmp3[j]*2.0);
} }
@ -11655,39 +11630,45 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_0nw[j] = cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] + factor_een_gl_0nw[j] = cn * (
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] + dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]); tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j]);
tmp3[j] = tmp3[j] =
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] + dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j]; dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
} }
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_1nw[j] = cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] + factor_een_gl_1nw[j] = cn * (
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] + dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]); tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);
tmp3[j] = tmp3[j] + tmp3[j] = tmp3[j] +
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] + dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j]; dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
} }
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_2nw[j] = cn *
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] + factor_een_gl_2nw[j] = cn * (
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] + dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]); tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);
tmp3[j] = tmp3[j] + tmp3[j] = tmp3[j] +
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] + dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j]; dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
@ -11697,11 +11678,11 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
#pragma omp simd #pragma omp simd
#endif #endif
for (size_t j = 0; j < (size_t) elec_num; ++j) { for (size_t j = 0; j < (size_t) elec_num; ++j) {
factor_een_gl_3nw[j] = cn * factor_een_gl_3nw[j] = cn * (
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] + dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] + dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] + tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] + tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
tmp3[j]*2.0); tmp3[j]*2.0);
} }
@ -11797,25 +11778,8 @@ TODO
printf("factor_een_gl_hpc\n"); printf("factor_een_gl_hpc\n");
qmckl_context_struct* const ctx = (qmckl_context_struct*) context; qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
assert (ctx != NULL); assert (ctx != NULL);
double factor_een_gl_doc[walk_num*4*elec_num];
memset(&(factor_een_gl_doc[0]), 0, walk_num*4*elec_num*sizeof(double));
rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context,
ctx->electron.walker.num,
ctx->electron.num,
ctx->nucleus.num,
ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.tmp_c,
ctx->jastrow_champ.dtmp_c,
ctx->jastrow_champ.een_rescaled_n,
ctx->jastrow_champ.een_rescaled_n_gl,
factor_een_gl_doc);
assert(rc == QMCKL_SUCCESS);
assert(walk_num == 2); assert(walk_num == 2);
assert(elec_num == 10); assert(elec_num == 10);
assert(nucl_num == 2); assert(nucl_num == 2);
@ -11828,21 +11792,68 @@ TODO
assert(ctx->jastrow_champ.cord_num == cord_num); assert(ctx->jastrow_champ.cord_num == cord_num);
assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector); assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector);
double factor_een_gl_naive[walk_num*4*elec_num];
memset(&(factor_een_gl_naive[0]), 0, sizeof(factor_een_gl_naive));
rc = qmckl_compute_jastrow_champ_factor_een_gl_naive(context,
ctx->electron.walker.num,
ctx->electron.num,
ctx->nucleus.num,
ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.een_rescaled_e,
ctx->jastrow_champ.een_rescaled_n,
ctx->jastrow_champ.een_rescaled_e_gl,
ctx->jastrow_champ.een_rescaled_n_gl,
factor_een_gl_naive);
assert(rc == QMCKL_SUCCESS);
double factor_een_gl_doc[walk_num*4*elec_num];
memset(&(factor_een_gl_doc[0]), 0, sizeof(factor_een_gl_doc));
rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context,
ctx->electron.walker.num,
ctx->electron.num,
ctx->nucleus.num,
ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.tmp_c,
ctx->jastrow_champ.dtmp_c,
ctx->jastrow_champ.een_rescaled_n,
ctx->jastrow_champ.een_rescaled_n_gl,
factor_een_gl_doc);
assert(rc == QMCKL_SUCCESS);
for (int64_t i = 0; i < walk_num*4*elec_num; ++i) {
if (fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) > 1e-12) {
printf("i = %ld\n", i);
printf("factor_een_gl_naive = %20.15e\n", factor_een_gl_naive[i]);
printf("factor_een_gl_doc = %20.15e\n", factor_een_gl_doc[i]);
fflush(stdout);
}
assert(fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) < 1e-8);
}
double factor_een_gl_hpc[walk_num*4*elec_num]; double factor_een_gl_hpc[walk_num*4*elec_num];
memset(&(factor_een_gl_hpc[0]), 0, walk_num*4*elec_num*sizeof(double)); memset(&(factor_een_gl_hpc[0]), 0, sizeof(factor_een_gl_hpc));
rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
ctx->electron.walker.num, rc = qmckl_compute_jastrow_champ_factor_een_gl_hpc(context,
ctx->electron.num, ctx->electron.walker.num,
ctx->nucleus.num, ctx->electron.num,
ctx->jastrow_champ.cord_num, ctx->nucleus.num,
ctx->jastrow_champ.dim_c_vector, ctx->jastrow_champ.cord_num,
ctx->jastrow_champ.c_vector_full, ctx->jastrow_champ.dim_c_vector,
ctx->jastrow_champ.lkpm_combined_index, ctx->jastrow_champ.c_vector_full,
ctx->jastrow_champ.tmp_c, ctx->jastrow_champ.lkpm_combined_index,
ctx->jastrow_champ.dtmp_c, ctx->jastrow_champ.tmp_c,
ctx->jastrow_champ.een_rescaled_n, ctx->jastrow_champ.dtmp_c,
ctx->jastrow_champ.een_rescaled_n_gl, ctx->jastrow_champ.een_rescaled_n,
factor_een_gl_hpc); ctx->jastrow_champ.een_rescaled_n_gl,
factor_een_gl_hpc);
for (int64_t i = 0; i < walk_num*4*elec_num; ++i) { for (int64_t i = 0; i < walk_num*4*elec_num; ++i) {
if (fabs(factor_een_gl_doc[i] - factor_een_gl_hpc[i]) > 1e-12) { if (fabs(factor_een_gl_doc[i] - factor_een_gl_hpc[i]) > 1e-12) {