mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 12:23:56 +01:00
Fixed HPC Jastrow
This commit is contained in:
parent
fb86c01008
commit
2402b064b2
@ -10436,20 +10436,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een(qmckl_context context)
|
||||
ctx->jastrow_champ.factor_een = factor_een;
|
||||
}
|
||||
|
||||
/*
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_naive(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->jastrow_champ.cord_num,
|
||||
ctx->jastrow_champ.dim_c_vector,
|
||||
ctx->jastrow_champ.c_vector_full,
|
||||
ctx->jastrow_champ.lkpm_combined_index,
|
||||
ctx->jastrow_champ.een_rescaled_e,
|
||||
ctx->jastrow_champ.een_rescaled_n,
|
||||
ctx->jastrow_champ.factor_een);
|
||||
*/
|
||||
|
||||
rc = qmckl_compute_jastrow_champ_factor_een(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
@ -10522,44 +10508,16 @@ integer function qmckl_compute_jastrow_champ_factor_een_naive_f( &
|
||||
if (cord_num < 0) info = QMCKL_INVALID_ARG_5
|
||||
if (info /= QMCKL_SUCCESS) return
|
||||
|
||||
|
||||
! do nw =1, walk_num
|
||||
! factor_een(nw) = 0.0d0
|
||||
! do n = 1, dim_c_vector
|
||||
! l = lkpm_combined_index(n, 1)
|
||||
! k = lkpm_combined_index(n, 2)
|
||||
! p = lkpm_combined_index(n, 3)
|
||||
! m = lkpm_combined_index(n, 4)
|
||||
|
||||
! do a = 1, nucl_num
|
||||
! accu2 = 0.0d0
|
||||
! cn = c_vector_full(a, n)
|
||||
! do j = 1, elec_num
|
||||
! accu = 0.0d0
|
||||
! do i = 1, elec_num
|
||||
|
||||
! accu = accu + een_rescaled_e(i,j,k,nw) * &
|
||||
! een_rescaled_n(i,a,m,nw)
|
||||
! end do
|
||||
! accu2 = accu2 + accu * een_rescaled_n(j,a,m + l,nw)
|
||||
! end do
|
||||
! factor_een(nw) = factor_een(nw) + accu2 * cn
|
||||
! end do
|
||||
! end do
|
||||
! end do
|
||||
|
||||
do nw =1, walk_num
|
||||
factor_een(nw) = 0.d0
|
||||
do n = 1, dim_c_vector
|
||||
l = lkpm_combined_index(n, 1)
|
||||
k = lkpm_combined_index(n, 2)
|
||||
p = lkpm_combined_index(n, 3)
|
||||
m = lkpm_combined_index(n, 4)
|
||||
|
||||
do a = 1, nucl_num
|
||||
accu2 = 0.0d0
|
||||
cn = c_vector_full(a, n)
|
||||
print *, a, l, k, p, cn
|
||||
do j = 1, elec_num
|
||||
accu = 0.0d0
|
||||
do i = 1, j-1
|
||||
@ -10683,7 +10641,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( &
|
||||
double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num)
|
||||
double precision , intent(out) :: factor_een(walk_num)
|
||||
|
||||
integer*8 :: i, a, j, l, k, p, m, n, nw
|
||||
integer*8 :: i, a, j, l, k, m, n, nw
|
||||
double precision :: accu, accu2, cn
|
||||
|
||||
info = QMCKL_SUCCESS
|
||||
@ -10703,7 +10661,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( &
|
||||
do n = 1, dim_c_vector
|
||||
l = lkpm_combined_index(n, 1)
|
||||
k = lkpm_combined_index(n, 2)
|
||||
p = lkpm_combined_index(n, 3)
|
||||
m = lkpm_combined_index(n, 4)
|
||||
|
||||
do a = 1, nucl_num
|
||||
@ -10872,6 +10829,32 @@ double factor_een[walk_num];
|
||||
rc = qmckl_get_jastrow_champ_factor_een(context, &(factor_een[0]),walk_num);
|
||||
|
||||
assert(fabs(factor_een[0] + 0.382580260174321) < 1e-12);
|
||||
|
||||
{
|
||||
double factor_een_naive[walk_num];
|
||||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||
assert (ctx != NULL);
|
||||
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_naive(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->jastrow_champ.cord_num,
|
||||
ctx->jastrow_champ.dim_c_vector,
|
||||
ctx->jastrow_champ.c_vector_full,
|
||||
ctx->jastrow_champ.lkpm_combined_index,
|
||||
ctx->jastrow_champ.een_rescaled_e,
|
||||
ctx->jastrow_champ.een_rescaled_n,
|
||||
factor_een_naive);
|
||||
|
||||
for (int64_t i = 0; i < walk_num; i++) {
|
||||
if (fabs(factor_een[i] - factor_een_naive[i]) > 1e-12) {
|
||||
printf("factor_een[%ld] = %e\n", i, factor_een[i]);
|
||||
printf("factor_een_naive[%ld] = %e\n", i, factor_een_naive[i]);
|
||||
}
|
||||
assert(fabs(factor_een[i] - factor_een_naive[i]) < 1e-12);
|
||||
}
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Electron-electron-nucleus Jastrow $f_{een}$ derivative
|
||||
@ -11030,22 +11013,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een_gl(qmckl_context context)
|
||||
ctx->jastrow_champ.factor_een_gl = factor_een_gl;
|
||||
}
|
||||
|
||||
/*
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_gl_naive (context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->jastrow_champ.cord_num,
|
||||
ctx->jastrow_champ.dim_c_vector,
|
||||
ctx->jastrow_champ.c_vector_full,
|
||||
ctx->jastrow_champ.lkpm_combined_index,
|
||||
ctx->jastrow_champ.een_rescaled_e,
|
||||
ctx->jastrow_champ.een_rescaled_n,
|
||||
ctx->jastrow_champ.een_rescaled_e_gl,
|
||||
ctx->jastrow_champ.een_rescaled_n_gl,
|
||||
ctx->jastrow_champ.factor_een_gl);
|
||||
,*/
|
||||
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
@ -11113,7 +11080,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( &
|
||||
double precision , intent(in) :: een_rescaled_n_gl(elec_num, 4, nucl_num, 0:cord_num, walk_num)
|
||||
double precision , intent(out) :: factor_een_gl(elec_num, 4, walk_num)
|
||||
|
||||
integer*8 :: i, a, j, l, k, p, m, n, nw
|
||||
integer*8 :: i, a, j, l, k, m, n, nw
|
||||
double precision :: accu, accu2, cn
|
||||
double precision :: daccu(1:4), daccu2(1:4)
|
||||
|
||||
@ -11132,7 +11099,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( &
|
||||
do n = 1, dim_c_vector
|
||||
l = lkpm_combined_index(n, 1)
|
||||
k = lkpm_combined_index(n, 2)
|
||||
p = lkpm_combined_index(n, 3)
|
||||
m = lkpm_combined_index(n, 4)
|
||||
|
||||
do a = 1, nucl_num
|
||||
@ -11303,11 +11269,13 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
|
||||
if (cord_num < 0) info = QMCKL_INVALID_ARG_5
|
||||
if (info /= QMCKL_SUCCESS) return
|
||||
|
||||
if (cord_num == 0) then
|
||||
factor_een_gl = 0.0d0
|
||||
|
||||
if (cord_num == 0) return
|
||||
return
|
||||
end if
|
||||
|
||||
do nw =1, walk_num
|
||||
factor_een_gl(:,:,nw) = 0.0d0
|
||||
do n = 1, dim_c_vector
|
||||
l = lkpm_combined_index(n, 1)
|
||||
k = lkpm_combined_index(n, 2)
|
||||
@ -11320,10 +11288,10 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
|
||||
do ii = 1, 4
|
||||
do j = 1, elec_num
|
||||
factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( &
|
||||
tmp_c(j,a,m,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + &
|
||||
(dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + &
|
||||
(dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + &
|
||||
tmp_c(j,a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m,nw) &
|
||||
tmp_c (j, a,m ,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + &
|
||||
tmp_c (j, a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m ,nw) + &
|
||||
dtmp_c(j,ii,a,m ,k,nw) * een_rescaled_n (j, a,m+l,nw) + &
|
||||
dtmp_c(j,ii,a,m+l,k,nw) * een_rescaled_n (j, a,m ,nw) &
|
||||
) * cn
|
||||
end do
|
||||
end do
|
||||
@ -11331,12 +11299,12 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
|
||||
cn = cn + cn
|
||||
do j = 1, elec_num
|
||||
factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( &
|
||||
(dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_gl(j,1,a,m+l,nw) + &
|
||||
(dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_gl(j,2,a,m+l,nw) + &
|
||||
(dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_gl(j,3,a,m+l,nw) + &
|
||||
(dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_gl(j,1,a,m ,nw) + &
|
||||
(dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_gl(j,2,a,m ,nw) + &
|
||||
(dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_gl(j,3,a,m ,nw) &
|
||||
dtmp_c(j,1,a,m ,k,nw) * een_rescaled_n_gl(j,1,a,m+l,nw) + &
|
||||
dtmp_c(j,2,a,m ,k,nw) * een_rescaled_n_gl(j,2,a,m+l,nw) + &
|
||||
dtmp_c(j,3,a,m ,k,nw) * een_rescaled_n_gl(j,3,a,m+l,nw) + &
|
||||
dtmp_c(j,1,a,m+l,k,nw) * een_rescaled_n_gl(j,1,a,m ,nw) + &
|
||||
dtmp_c(j,2,a,m+l,k,nw) * een_rescaled_n_gl(j,2,a,m ,nw) + &
|
||||
dtmp_c(j,3,a,m+l,k,nw) * een_rescaled_n_gl(j,3,a,m ,nw) &
|
||||
) * cn
|
||||
end do
|
||||
end do
|
||||
@ -11528,8 +11496,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
const size_t elec_num2 = elec_num << 1;
|
||||
const size_t elec_num3 = elec_num * 3;
|
||||
const size_t elec_num2 = elec_num + elec_num;
|
||||
const size_t elec_num3 = elec_num2 + elec_num;
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp parallel for
|
||||
@ -11544,29 +11512,30 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
|
||||
const size_t en = elec_num*nucl_num;
|
||||
const size_t len = l*en;
|
||||
const size_t len4 = len << 2;
|
||||
const size_t cn = cord_num*nw;
|
||||
const size_t len4 = len*4;
|
||||
const size_t c1 = cord_num+1;
|
||||
const size_t cn = cord_num*nw;
|
||||
const size_t addr0 = en*(m+c1*(k+cn));
|
||||
const size_t addr1 = en*(m+cn);
|
||||
const size_t addr1 = en*(m+c1*nw);
|
||||
|
||||
const double* restrict tmp_c_mkn = &(tmp_c[addr0]);
|
||||
const double* restrict tmp_c_mlkn = tmp_c_mkn + len;
|
||||
const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]);
|
||||
const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len;
|
||||
const double* restrict dtmp_c_mknw = &(dtmp_c[addr0 << 2]);
|
||||
const double* restrict dtmp_c_mknw = &(dtmp_c[addr0*4]);
|
||||
const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4;
|
||||
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]);
|
||||
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1*4]); // ?
|
||||
const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4;
|
||||
|
||||
for (size_t a = 0; a < (size_t) nucl_num; a++) {
|
||||
double cn = c_vector_full[a+n*nucl_num];
|
||||
if (cn == 0.0) continue;
|
||||
|
||||
const size_t ishift = elec_num*a;
|
||||
const size_t ishift4 = ishift << 2;
|
||||
const size_t ishift4 = ishift*4;
|
||||
|
||||
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
|
||||
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift;
|
||||
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
|
||||
const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift;
|
||||
const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift;
|
||||
const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4;
|
||||
@ -11597,53 +11566,59 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
|
||||
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * (
|
||||
dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j] );
|
||||
|
||||
tmp3[j] =
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
||||
dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
|
||||
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * (
|
||||
dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);
|
||||
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
||||
dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
|
||||
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * (
|
||||
dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);
|
||||
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||
dtmp_c_2amknw [j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw [j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
||||
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * (
|
||||
dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
|
||||
tmp3[j]*2.0);
|
||||
}
|
||||
|
||||
@ -11655,39 +11630,45 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_0nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
||||
|
||||
factor_een_gl_0nw[j] = cn * (
|
||||
dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j]);
|
||||
|
||||
tmp3[j] =
|
||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
||||
dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_1nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
||||
|
||||
factor_een_gl_1nw[j] = cn * (
|
||||
dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);
|
||||
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
||||
dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_2nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
||||
|
||||
factor_een_gl_2nw[j] = cn * (
|
||||
dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);
|
||||
|
||||
tmp3[j] = tmp3[j] +
|
||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||
@ -11697,11 +11678,11 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||
factor_een_gl_3nw[j] = cn *
|
||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
||||
factor_een_gl_3nw[j] = cn * (
|
||||
dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
|
||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
|
||||
tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
|
||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
|
||||
tmp3[j]*2.0);
|
||||
}
|
||||
|
||||
@ -11797,10 +11778,41 @@ TODO
|
||||
printf("factor_een_gl_hpc\n");
|
||||
|
||||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||
|
||||
assert (ctx != NULL);
|
||||
assert(walk_num == 2);
|
||||
assert(elec_num == 10);
|
||||
assert(nucl_num == 2);
|
||||
assert(cord_num == 5);
|
||||
assert(dim_c_vector == 23);
|
||||
|
||||
assert(ctx->electron.walker.num == walk_num);
|
||||
assert(ctx->electron.num == elec_num);
|
||||
assert(ctx->nucleus.num == nucl_num);
|
||||
assert(ctx->jastrow_champ.cord_num == cord_num);
|
||||
assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector);
|
||||
|
||||
double factor_een_gl_naive[walk_num*4*elec_num];
|
||||
memset(&(factor_een_gl_naive[0]), 0, sizeof(factor_een_gl_naive));
|
||||
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_gl_naive(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->jastrow_champ.cord_num,
|
||||
ctx->jastrow_champ.dim_c_vector,
|
||||
ctx->jastrow_champ.c_vector_full,
|
||||
ctx->jastrow_champ.lkpm_combined_index,
|
||||
ctx->jastrow_champ.een_rescaled_e,
|
||||
ctx->jastrow_champ.een_rescaled_n,
|
||||
ctx->jastrow_champ.een_rescaled_e_gl,
|
||||
ctx->jastrow_champ.een_rescaled_n_gl,
|
||||
factor_een_gl_naive);
|
||||
assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
double factor_een_gl_doc[walk_num*4*elec_num];
|
||||
memset(&(factor_een_gl_doc[0]), 0, walk_num*4*elec_num*sizeof(double));
|
||||
memset(&(factor_een_gl_doc[0]), 0, sizeof(factor_een_gl_doc));
|
||||
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
@ -11816,21 +11828,20 @@ TODO
|
||||
factor_een_gl_doc);
|
||||
assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
assert(walk_num == 2);
|
||||
assert(elec_num == 10);
|
||||
assert(nucl_num == 2);
|
||||
assert(cord_num == 5);
|
||||
assert(dim_c_vector == 23);
|
||||
|
||||
assert(ctx->electron.walker.num == walk_num);
|
||||
assert(ctx->electron.num == elec_num);
|
||||
assert(ctx->nucleus.num == nucl_num);
|
||||
assert(ctx->jastrow_champ.cord_num == cord_num);
|
||||
assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector);
|
||||
for (int64_t i = 0; i < walk_num*4*elec_num; ++i) {
|
||||
if (fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) > 1e-12) {
|
||||
printf("i = %ld\n", i);
|
||||
printf("factor_een_gl_naive = %20.15e\n", factor_een_gl_naive[i]);
|
||||
printf("factor_een_gl_doc = %20.15e\n", factor_een_gl_doc[i]);
|
||||
fflush(stdout);
|
||||
}
|
||||
assert(fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) < 1e-8);
|
||||
}
|
||||
|
||||
double factor_een_gl_hpc[walk_num*4*elec_num];
|
||||
memset(&(factor_een_gl_hpc[0]), 0, walk_num*4*elec_num*sizeof(double));
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
|
||||
memset(&(factor_een_gl_hpc[0]), 0, sizeof(factor_een_gl_hpc));
|
||||
|
||||
rc = qmckl_compute_jastrow_champ_factor_een_gl_hpc(context,
|
||||
ctx->electron.walker.num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
|
Loading…
Reference in New Issue
Block a user