mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Fixed HPC Jastrow
This commit is contained in:
parent
fb86c01008
commit
2402b064b2
@ -10436,20 +10436,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een(qmckl_context context)
|
|||||||
ctx->jastrow_champ.factor_een = factor_een;
|
ctx->jastrow_champ.factor_een = factor_een;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
rc = qmckl_compute_jastrow_champ_factor_een_naive(context,
|
|
||||||
ctx->electron.walker.num,
|
|
||||||
ctx->electron.num,
|
|
||||||
ctx->nucleus.num,
|
|
||||||
ctx->jastrow_champ.cord_num,
|
|
||||||
ctx->jastrow_champ.dim_c_vector,
|
|
||||||
ctx->jastrow_champ.c_vector_full,
|
|
||||||
ctx->jastrow_champ.lkpm_combined_index,
|
|
||||||
ctx->jastrow_champ.een_rescaled_e,
|
|
||||||
ctx->jastrow_champ.een_rescaled_n,
|
|
||||||
ctx->jastrow_champ.factor_een);
|
|
||||||
*/
|
|
||||||
|
|
||||||
rc = qmckl_compute_jastrow_champ_factor_een(context,
|
rc = qmckl_compute_jastrow_champ_factor_een(context,
|
||||||
ctx->electron.walker.num,
|
ctx->electron.walker.num,
|
||||||
ctx->electron.num,
|
ctx->electron.num,
|
||||||
@ -10522,44 +10508,16 @@ integer function qmckl_compute_jastrow_champ_factor_een_naive_f( &
|
|||||||
if (cord_num < 0) info = QMCKL_INVALID_ARG_5
|
if (cord_num < 0) info = QMCKL_INVALID_ARG_5
|
||||||
if (info /= QMCKL_SUCCESS) return
|
if (info /= QMCKL_SUCCESS) return
|
||||||
|
|
||||||
|
|
||||||
! do nw =1, walk_num
|
|
||||||
! factor_een(nw) = 0.0d0
|
|
||||||
! do n = 1, dim_c_vector
|
|
||||||
! l = lkpm_combined_index(n, 1)
|
|
||||||
! k = lkpm_combined_index(n, 2)
|
|
||||||
! p = lkpm_combined_index(n, 3)
|
|
||||||
! m = lkpm_combined_index(n, 4)
|
|
||||||
|
|
||||||
! do a = 1, nucl_num
|
|
||||||
! accu2 = 0.0d0
|
|
||||||
! cn = c_vector_full(a, n)
|
|
||||||
! do j = 1, elec_num
|
|
||||||
! accu = 0.0d0
|
|
||||||
! do i = 1, elec_num
|
|
||||||
|
|
||||||
! accu = accu + een_rescaled_e(i,j,k,nw) * &
|
|
||||||
! een_rescaled_n(i,a,m,nw)
|
|
||||||
! end do
|
|
||||||
! accu2 = accu2 + accu * een_rescaled_n(j,a,m + l,nw)
|
|
||||||
! end do
|
|
||||||
! factor_een(nw) = factor_een(nw) + accu2 * cn
|
|
||||||
! end do
|
|
||||||
! end do
|
|
||||||
! end do
|
|
||||||
|
|
||||||
do nw =1, walk_num
|
do nw =1, walk_num
|
||||||
factor_een(nw) = 0.d0
|
factor_een(nw) = 0.d0
|
||||||
do n = 1, dim_c_vector
|
do n = 1, dim_c_vector
|
||||||
l = lkpm_combined_index(n, 1)
|
l = lkpm_combined_index(n, 1)
|
||||||
k = lkpm_combined_index(n, 2)
|
k = lkpm_combined_index(n, 2)
|
||||||
p = lkpm_combined_index(n, 3)
|
|
||||||
m = lkpm_combined_index(n, 4)
|
m = lkpm_combined_index(n, 4)
|
||||||
|
|
||||||
do a = 1, nucl_num
|
do a = 1, nucl_num
|
||||||
accu2 = 0.0d0
|
accu2 = 0.0d0
|
||||||
cn = c_vector_full(a, n)
|
cn = c_vector_full(a, n)
|
||||||
print *, a, l, k, p, cn
|
|
||||||
do j = 1, elec_num
|
do j = 1, elec_num
|
||||||
accu = 0.0d0
|
accu = 0.0d0
|
||||||
do i = 1, j-1
|
do i = 1, j-1
|
||||||
@ -10683,7 +10641,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( &
|
|||||||
double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num)
|
double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num)
|
||||||
double precision , intent(out) :: factor_een(walk_num)
|
double precision , intent(out) :: factor_een(walk_num)
|
||||||
|
|
||||||
integer*8 :: i, a, j, l, k, p, m, n, nw
|
integer*8 :: i, a, j, l, k, m, n, nw
|
||||||
double precision :: accu, accu2, cn
|
double precision :: accu, accu2, cn
|
||||||
|
|
||||||
info = QMCKL_SUCCESS
|
info = QMCKL_SUCCESS
|
||||||
@ -10703,7 +10661,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_doc_f( &
|
|||||||
do n = 1, dim_c_vector
|
do n = 1, dim_c_vector
|
||||||
l = lkpm_combined_index(n, 1)
|
l = lkpm_combined_index(n, 1)
|
||||||
k = lkpm_combined_index(n, 2)
|
k = lkpm_combined_index(n, 2)
|
||||||
p = lkpm_combined_index(n, 3)
|
|
||||||
m = lkpm_combined_index(n, 4)
|
m = lkpm_combined_index(n, 4)
|
||||||
|
|
||||||
do a = 1, nucl_num
|
do a = 1, nucl_num
|
||||||
@ -10872,6 +10829,32 @@ double factor_een[walk_num];
|
|||||||
rc = qmckl_get_jastrow_champ_factor_een(context, &(factor_een[0]),walk_num);
|
rc = qmckl_get_jastrow_champ_factor_een(context, &(factor_een[0]),walk_num);
|
||||||
|
|
||||||
assert(fabs(factor_een[0] + 0.382580260174321) < 1e-12);
|
assert(fabs(factor_een[0] + 0.382580260174321) < 1e-12);
|
||||||
|
|
||||||
|
{
|
||||||
|
double factor_een_naive[walk_num];
|
||||||
|
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||||
|
assert (ctx != NULL);
|
||||||
|
|
||||||
|
rc = qmckl_compute_jastrow_champ_factor_een_naive(context,
|
||||||
|
ctx->electron.walker.num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->jastrow_champ.cord_num,
|
||||||
|
ctx->jastrow_champ.dim_c_vector,
|
||||||
|
ctx->jastrow_champ.c_vector_full,
|
||||||
|
ctx->jastrow_champ.lkpm_combined_index,
|
||||||
|
ctx->jastrow_champ.een_rescaled_e,
|
||||||
|
ctx->jastrow_champ.een_rescaled_n,
|
||||||
|
factor_een_naive);
|
||||||
|
|
||||||
|
for (int64_t i = 0; i < walk_num; i++) {
|
||||||
|
if (fabs(factor_een[i] - factor_een_naive[i]) > 1e-12) {
|
||||||
|
printf("factor_een[%ld] = %e\n", i, factor_een[i]);
|
||||||
|
printf("factor_een_naive[%ld] = %e\n", i, factor_een_naive[i]);
|
||||||
|
}
|
||||||
|
assert(fabs(factor_een[i] - factor_een_naive[i]) < 1e-12);
|
||||||
|
}
|
||||||
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
*** Electron-electron-nucleus Jastrow $f_{een}$ derivative
|
*** Electron-electron-nucleus Jastrow $f_{een}$ derivative
|
||||||
@ -11030,22 +11013,6 @@ qmckl_exit_code qmckl_provide_jastrow_champ_factor_een_gl(qmckl_context context)
|
|||||||
ctx->jastrow_champ.factor_een_gl = factor_een_gl;
|
ctx->jastrow_champ.factor_een_gl = factor_een_gl;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
rc = qmckl_compute_jastrow_champ_factor_een_gl_naive (context,
|
|
||||||
ctx->electron.walker.num,
|
|
||||||
ctx->electron.num,
|
|
||||||
ctx->nucleus.num,
|
|
||||||
ctx->jastrow_champ.cord_num,
|
|
||||||
ctx->jastrow_champ.dim_c_vector,
|
|
||||||
ctx->jastrow_champ.c_vector_full,
|
|
||||||
ctx->jastrow_champ.lkpm_combined_index,
|
|
||||||
ctx->jastrow_champ.een_rescaled_e,
|
|
||||||
ctx->jastrow_champ.een_rescaled_n,
|
|
||||||
ctx->jastrow_champ.een_rescaled_e_gl,
|
|
||||||
ctx->jastrow_champ.een_rescaled_n_gl,
|
|
||||||
ctx->jastrow_champ.factor_een_gl);
|
|
||||||
,*/
|
|
||||||
|
|
||||||
rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
|
rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
|
||||||
ctx->electron.walker.num,
|
ctx->electron.walker.num,
|
||||||
ctx->electron.num,
|
ctx->electron.num,
|
||||||
@ -11113,7 +11080,7 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( &
|
|||||||
double precision , intent(in) :: een_rescaled_n_gl(elec_num, 4, nucl_num, 0:cord_num, walk_num)
|
double precision , intent(in) :: een_rescaled_n_gl(elec_num, 4, nucl_num, 0:cord_num, walk_num)
|
||||||
double precision , intent(out) :: factor_een_gl(elec_num, 4, walk_num)
|
double precision , intent(out) :: factor_een_gl(elec_num, 4, walk_num)
|
||||||
|
|
||||||
integer*8 :: i, a, j, l, k, p, m, n, nw
|
integer*8 :: i, a, j, l, k, m, n, nw
|
||||||
double precision :: accu, accu2, cn
|
double precision :: accu, accu2, cn
|
||||||
double precision :: daccu(1:4), daccu2(1:4)
|
double precision :: daccu(1:4), daccu2(1:4)
|
||||||
|
|
||||||
@ -11132,7 +11099,6 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_naive_f( &
|
|||||||
do n = 1, dim_c_vector
|
do n = 1, dim_c_vector
|
||||||
l = lkpm_combined_index(n, 1)
|
l = lkpm_combined_index(n, 1)
|
||||||
k = lkpm_combined_index(n, 2)
|
k = lkpm_combined_index(n, 2)
|
||||||
p = lkpm_combined_index(n, 3)
|
|
||||||
m = lkpm_combined_index(n, 4)
|
m = lkpm_combined_index(n, 4)
|
||||||
|
|
||||||
do a = 1, nucl_num
|
do a = 1, nucl_num
|
||||||
@ -11303,11 +11269,13 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
|
|||||||
if (cord_num < 0) info = QMCKL_INVALID_ARG_5
|
if (cord_num < 0) info = QMCKL_INVALID_ARG_5
|
||||||
if (info /= QMCKL_SUCCESS) return
|
if (info /= QMCKL_SUCCESS) return
|
||||||
|
|
||||||
factor_een_gl = 0.0d0
|
if (cord_num == 0) then
|
||||||
|
factor_een_gl = 0.0d0
|
||||||
if (cord_num == 0) return
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
do nw =1, walk_num
|
do nw =1, walk_num
|
||||||
|
factor_een_gl(:,:,nw) = 0.0d0
|
||||||
do n = 1, dim_c_vector
|
do n = 1, dim_c_vector
|
||||||
l = lkpm_combined_index(n, 1)
|
l = lkpm_combined_index(n, 1)
|
||||||
k = lkpm_combined_index(n, 2)
|
k = lkpm_combined_index(n, 2)
|
||||||
@ -11319,24 +11287,24 @@ integer function qmckl_compute_jastrow_champ_factor_een_gl_doc_f( &
|
|||||||
|
|
||||||
do ii = 1, 4
|
do ii = 1, 4
|
||||||
do j = 1, elec_num
|
do j = 1, elec_num
|
||||||
factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( &
|
factor_een_gl(j,ii,nw) = factor_een_gl(j,ii,nw) + ( &
|
||||||
tmp_c(j,a,m,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + &
|
tmp_c (j, a,m ,k,nw) * een_rescaled_n_gl(j,ii,a,m+l,nw) + &
|
||||||
(dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + &
|
tmp_c (j, a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m ,nw) + &
|
||||||
(dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + &
|
dtmp_c(j,ii,a,m ,k,nw) * een_rescaled_n (j, a,m+l,nw) + &
|
||||||
tmp_c(j,a,m+l,k,nw) * een_rescaled_n_gl(j,ii,a,m,nw) &
|
dtmp_c(j,ii,a,m+l,k,nw) * een_rescaled_n (j, a,m ,nw) &
|
||||||
) * cn
|
) * cn
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
|
|
||||||
cn = cn + cn
|
cn = cn + cn
|
||||||
do j = 1, elec_num
|
do j = 1, elec_num
|
||||||
factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( &
|
factor_een_gl(j,4,nw) = factor_een_gl(j,4,nw) + ( &
|
||||||
(dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_gl(j,1,a,m+l,nw) + &
|
dtmp_c(j,1,a,m ,k,nw) * een_rescaled_n_gl(j,1,a,m+l,nw) + &
|
||||||
(dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_gl(j,2,a,m+l,nw) + &
|
dtmp_c(j,2,a,m ,k,nw) * een_rescaled_n_gl(j,2,a,m+l,nw) + &
|
||||||
(dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_gl(j,3,a,m+l,nw) + &
|
dtmp_c(j,3,a,m ,k,nw) * een_rescaled_n_gl(j,3,a,m+l,nw) + &
|
||||||
(dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_gl(j,1,a,m ,nw) + &
|
dtmp_c(j,1,a,m+l,k,nw) * een_rescaled_n_gl(j,1,a,m ,nw) + &
|
||||||
(dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_gl(j,2,a,m ,nw) + &
|
dtmp_c(j,2,a,m+l,k,nw) * een_rescaled_n_gl(j,2,a,m ,nw) + &
|
||||||
(dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_gl(j,3,a,m ,nw) &
|
dtmp_c(j,3,a,m+l,k,nw) * een_rescaled_n_gl(j,3,a,m ,nw) &
|
||||||
) * cn
|
) * cn
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
@ -11528,8 +11496,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t elec_num2 = elec_num << 1;
|
const size_t elec_num2 = elec_num + elec_num;
|
||||||
const size_t elec_num3 = elec_num * 3;
|
const size_t elec_num3 = elec_num2 + elec_num;
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
@ -11544,34 +11512,35 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
|
|
||||||
const size_t en = elec_num*nucl_num;
|
const size_t en = elec_num*nucl_num;
|
||||||
const size_t len = l*en;
|
const size_t len = l*en;
|
||||||
const size_t len4 = len << 2;
|
const size_t len4 = len*4;
|
||||||
const size_t cn = cord_num*nw;
|
|
||||||
const size_t c1 = cord_num+1;
|
const size_t c1 = cord_num+1;
|
||||||
|
const size_t cn = cord_num*nw;
|
||||||
const size_t addr0 = en*(m+c1*(k+cn));
|
const size_t addr0 = en*(m+c1*(k+cn));
|
||||||
const size_t addr1 = en*(m+cn);
|
const size_t addr1 = en*(m+c1*nw);
|
||||||
|
|
||||||
const double* restrict tmp_c_mkn = &(tmp_c[addr0]);
|
const double* restrict tmp_c_mkn = &(tmp_c[addr0]);
|
||||||
const double* restrict tmp_c_mlkn = tmp_c_mkn + len;
|
const double* restrict tmp_c_mlkn = tmp_c_mkn + len;
|
||||||
const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]);
|
const double* restrict een_rescaled_n_mnw = &(een_rescaled_n[addr1]);
|
||||||
const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len;
|
const double* restrict een_rescaled_n_mlnw = een_rescaled_n_mnw + len;
|
||||||
const double* restrict dtmp_c_mknw = &(dtmp_c[addr0 << 2]);
|
const double* restrict dtmp_c_mknw = &(dtmp_c[addr0*4]);
|
||||||
const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4;
|
const double* restrict dtmp_c_mlknw = dtmp_c_mknw + len4;
|
||||||
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1 << 2]);
|
const double* restrict een_rescaled_n_gl_mnw = &(een_rescaled_n_gl[addr1*4]); // ?
|
||||||
const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4;
|
const double* restrict een_rescaled_n_gl_mlnw = een_rescaled_n_gl_mnw + len4;
|
||||||
|
|
||||||
for (size_t a = 0; a < (size_t) nucl_num; a++) {
|
for (size_t a = 0; a < (size_t) nucl_num; a++) {
|
||||||
double cn = c_vector_full[a+n*nucl_num];
|
double cn = c_vector_full[a+n*nucl_num];
|
||||||
if (cn == 0.0) continue;
|
if (cn == 0.0) continue;
|
||||||
|
|
||||||
const size_t ishift = elec_num*a;
|
const size_t ishift = elec_num*a;
|
||||||
const size_t ishift4 = ishift << 2;
|
const size_t ishift4 = ishift*4;
|
||||||
|
|
||||||
|
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift;
|
||||||
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
|
const double* restrict tmp_c_amlkn = tmp_c_mlkn + ishift;
|
||||||
const double* restrict tmp_c_amkn = tmp_c_mkn + ishift;
|
const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift;
|
||||||
const double* restrict een_rescaled_n_amnw = een_rescaled_n_mnw + ishift;
|
|
||||||
const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift;
|
const double* restrict een_rescaled_n_amlnw = een_rescaled_n_mlnw + ishift;
|
||||||
const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4;
|
const double* restrict dtmp_c_0amknw = dtmp_c_mknw + ishift4;
|
||||||
const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4;
|
const double* restrict dtmp_c_0amlknw = dtmp_c_mlknw + ishift4;
|
||||||
const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4;
|
const double* restrict een_rescaled_n_gl_0amnw = een_rescaled_n_gl_mnw + ishift4;
|
||||||
const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4;
|
const double* restrict een_rescaled_n_gl_0amlnw = een_rescaled_n_gl_mlnw + ishift4;
|
||||||
|
|
||||||
const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num;
|
const double* restrict dtmp_c_1amknw = dtmp_c_0amknw + elec_num;
|
||||||
@ -11597,53 +11566,59 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn *
|
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
factor_een_gl_0nw[j] = factor_een_gl_0nw[j] + cn * (
|
||||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||||
|
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j] );
|
||||||
|
|
||||||
tmp3[j] =
|
tmp3[j] =
|
||||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn *
|
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
factor_een_gl_1nw[j] = factor_een_gl_1nw[j] + cn * (
|
||||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||||
|
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);
|
||||||
|
|
||||||
tmp3[j] = tmp3[j] +
|
tmp3[j] = tmp3[j] +
|
||||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn *
|
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
factor_een_gl_2nw[j] = factor_een_gl_2nw[j] + cn * (
|
||||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
|
||||||
|
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);
|
||||||
|
|
||||||
tmp3[j] = tmp3[j] +
|
tmp3[j] = tmp3[j] +
|
||||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
dtmp_c_2amknw [j] * een_rescaled_n_gl_2amlnw[j] +
|
||||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw [j];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn *
|
factor_een_gl_3nw[j] = factor_een_gl_3nw[j] + cn * (
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
|
||||||
tmp3[j]*2.0);
|
tmp3[j]*2.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -11655,39 +11630,45 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_0nw[j] = cn *
|
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_0amlnw[j] +
|
factor_een_gl_0nw[j] = cn * (
|
||||||
dtmp_c_0amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_0amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_0amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_0amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw[j]);
|
tmp_c_amkn [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||||
|
tmp_c_amlkn[j] * een_rescaled_n_gl_0amnw [j]);
|
||||||
|
|
||||||
tmp3[j] =
|
tmp3[j] =
|
||||||
dtmp_c_0amknw[j] * een_rescaled_n_gl_0amlnw[j] +
|
dtmp_c_0amknw [j] * een_rescaled_n_gl_0amlnw[j] +
|
||||||
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw[j];
|
dtmp_c_0amlknw[j] * een_rescaled_n_gl_0amnw [j];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_1nw[j] = cn *
|
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_1amlnw[j] +
|
factor_een_gl_1nw[j] = cn * (
|
||||||
dtmp_c_1amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_1amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_1amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_1amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw[j]);
|
tmp_c_amkn [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||||
|
tmp_c_amlkn[j] * een_rescaled_n_gl_1amnw [j]);
|
||||||
|
|
||||||
tmp3[j] = tmp3[j] +
|
tmp3[j] = tmp3[j] +
|
||||||
dtmp_c_1amknw[j] * een_rescaled_n_gl_1amlnw[j] +
|
dtmp_c_1amknw [j] * een_rescaled_n_gl_1amlnw[j] +
|
||||||
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw[j];
|
dtmp_c_1amlknw[j] * een_rescaled_n_gl_1amnw [j];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_2nw[j] = cn *
|
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_2amlnw[j] +
|
factor_een_gl_2nw[j] = cn * (
|
||||||
dtmp_c_2amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_2amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_2amlknw[j] * een_rescaled_n_amnw[j] +
|
dtmp_c_2amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw[j]);
|
tmp_c_amkn [j] * een_rescaled_n_gl_2amlnw[j] +
|
||||||
|
tmp_c_amlkn[j] * een_rescaled_n_gl_2amnw [j]);
|
||||||
|
|
||||||
tmp3[j] = tmp3[j] +
|
tmp3[j] = tmp3[j] +
|
||||||
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
dtmp_c_2amknw[j] * een_rescaled_n_gl_2amlnw[j] +
|
||||||
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
dtmp_c_2amlknw[j] * een_rescaled_n_gl_2amnw[j];
|
||||||
@ -11697,11 +11678,11 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
|
|||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
for (size_t j = 0; j < (size_t) elec_num; ++j) {
|
||||||
factor_een_gl_3nw[j] = cn *
|
factor_een_gl_3nw[j] = cn * (
|
||||||
(tmp_c_amkn[j] * een_rescaled_n_gl_3amlnw[j] +
|
dtmp_c_3amknw [j] * een_rescaled_n_amlnw[j] +
|
||||||
dtmp_c_3amknw[j] * een_rescaled_n_amlnw[j] +
|
dtmp_c_3amlknw[j] * een_rescaled_n_amnw [j] +
|
||||||
dtmp_c_3amlknw[j] * een_rescaled_n_amnw[j] +
|
tmp_c_amkn [j] * een_rescaled_n_gl_3amlnw[j] +
|
||||||
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw[j] +
|
tmp_c_amlkn[j] * een_rescaled_n_gl_3amnw [j] +
|
||||||
tmp3[j]*2.0);
|
tmp3[j]*2.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -11797,25 +11778,8 @@ TODO
|
|||||||
printf("factor_een_gl_hpc\n");
|
printf("factor_een_gl_hpc\n");
|
||||||
|
|
||||||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||||
|
|
||||||
assert (ctx != NULL);
|
assert (ctx != NULL);
|
||||||
|
|
||||||
double factor_een_gl_doc[walk_num*4*elec_num];
|
|
||||||
memset(&(factor_een_gl_doc[0]), 0, walk_num*4*elec_num*sizeof(double));
|
|
||||||
rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context,
|
|
||||||
ctx->electron.walker.num,
|
|
||||||
ctx->electron.num,
|
|
||||||
ctx->nucleus.num,
|
|
||||||
ctx->jastrow_champ.cord_num,
|
|
||||||
ctx->jastrow_champ.dim_c_vector,
|
|
||||||
ctx->jastrow_champ.c_vector_full,
|
|
||||||
ctx->jastrow_champ.lkpm_combined_index,
|
|
||||||
ctx->jastrow_champ.tmp_c,
|
|
||||||
ctx->jastrow_champ.dtmp_c,
|
|
||||||
ctx->jastrow_champ.een_rescaled_n,
|
|
||||||
ctx->jastrow_champ.een_rescaled_n_gl,
|
|
||||||
factor_een_gl_doc);
|
|
||||||
assert(rc == QMCKL_SUCCESS);
|
|
||||||
|
|
||||||
assert(walk_num == 2);
|
assert(walk_num == 2);
|
||||||
assert(elec_num == 10);
|
assert(elec_num == 10);
|
||||||
assert(nucl_num == 2);
|
assert(nucl_num == 2);
|
||||||
@ -11828,21 +11792,68 @@ TODO
|
|||||||
assert(ctx->jastrow_champ.cord_num == cord_num);
|
assert(ctx->jastrow_champ.cord_num == cord_num);
|
||||||
assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector);
|
assert(ctx->jastrow_champ.dim_c_vector == dim_c_vector);
|
||||||
|
|
||||||
|
double factor_een_gl_naive[walk_num*4*elec_num];
|
||||||
|
memset(&(factor_een_gl_naive[0]), 0, sizeof(factor_een_gl_naive));
|
||||||
|
|
||||||
|
rc = qmckl_compute_jastrow_champ_factor_een_gl_naive(context,
|
||||||
|
ctx->electron.walker.num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->jastrow_champ.cord_num,
|
||||||
|
ctx->jastrow_champ.dim_c_vector,
|
||||||
|
ctx->jastrow_champ.c_vector_full,
|
||||||
|
ctx->jastrow_champ.lkpm_combined_index,
|
||||||
|
ctx->jastrow_champ.een_rescaled_e,
|
||||||
|
ctx->jastrow_champ.een_rescaled_n,
|
||||||
|
ctx->jastrow_champ.een_rescaled_e_gl,
|
||||||
|
ctx->jastrow_champ.een_rescaled_n_gl,
|
||||||
|
factor_een_gl_naive);
|
||||||
|
assert(rc == QMCKL_SUCCESS);
|
||||||
|
|
||||||
|
double factor_een_gl_doc[walk_num*4*elec_num];
|
||||||
|
memset(&(factor_een_gl_doc[0]), 0, sizeof(factor_een_gl_doc));
|
||||||
|
|
||||||
|
rc = qmckl_compute_jastrow_champ_factor_een_gl_doc(context,
|
||||||
|
ctx->electron.walker.num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->jastrow_champ.cord_num,
|
||||||
|
ctx->jastrow_champ.dim_c_vector,
|
||||||
|
ctx->jastrow_champ.c_vector_full,
|
||||||
|
ctx->jastrow_champ.lkpm_combined_index,
|
||||||
|
ctx->jastrow_champ.tmp_c,
|
||||||
|
ctx->jastrow_champ.dtmp_c,
|
||||||
|
ctx->jastrow_champ.een_rescaled_n,
|
||||||
|
ctx->jastrow_champ.een_rescaled_n_gl,
|
||||||
|
factor_een_gl_doc);
|
||||||
|
assert(rc == QMCKL_SUCCESS);
|
||||||
|
|
||||||
|
for (int64_t i = 0; i < walk_num*4*elec_num; ++i) {
|
||||||
|
if (fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) > 1e-12) {
|
||||||
|
printf("i = %ld\n", i);
|
||||||
|
printf("factor_een_gl_naive = %20.15e\n", factor_een_gl_naive[i]);
|
||||||
|
printf("factor_een_gl_doc = %20.15e\n", factor_een_gl_doc[i]);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
assert(fabs(factor_een_gl_naive[i] - factor_een_gl_doc[i]) < 1e-8);
|
||||||
|
}
|
||||||
|
|
||||||
double factor_een_gl_hpc[walk_num*4*elec_num];
|
double factor_een_gl_hpc[walk_num*4*elec_num];
|
||||||
memset(&(factor_een_gl_hpc[0]), 0, walk_num*4*elec_num*sizeof(double));
|
memset(&(factor_een_gl_hpc[0]), 0, sizeof(factor_een_gl_hpc));
|
||||||
rc = qmckl_compute_jastrow_champ_factor_een_gl(context,
|
|
||||||
ctx->electron.walker.num,
|
rc = qmckl_compute_jastrow_champ_factor_een_gl_hpc(context,
|
||||||
ctx->electron.num,
|
ctx->electron.walker.num,
|
||||||
ctx->nucleus.num,
|
ctx->electron.num,
|
||||||
ctx->jastrow_champ.cord_num,
|
ctx->nucleus.num,
|
||||||
ctx->jastrow_champ.dim_c_vector,
|
ctx->jastrow_champ.cord_num,
|
||||||
ctx->jastrow_champ.c_vector_full,
|
ctx->jastrow_champ.dim_c_vector,
|
||||||
ctx->jastrow_champ.lkpm_combined_index,
|
ctx->jastrow_champ.c_vector_full,
|
||||||
ctx->jastrow_champ.tmp_c,
|
ctx->jastrow_champ.lkpm_combined_index,
|
||||||
ctx->jastrow_champ.dtmp_c,
|
ctx->jastrow_champ.tmp_c,
|
||||||
ctx->jastrow_champ.een_rescaled_n,
|
ctx->jastrow_champ.dtmp_c,
|
||||||
ctx->jastrow_champ.een_rescaled_n_gl,
|
ctx->jastrow_champ.een_rescaled_n,
|
||||||
factor_een_gl_hpc);
|
ctx->jastrow_champ.een_rescaled_n_gl,
|
||||||
|
factor_een_gl_hpc);
|
||||||
|
|
||||||
for (int64_t i = 0; i < walk_num*4*elec_num; ++i) {
|
for (int64_t i = 0; i < walk_num*4*elec_num; ++i) {
|
||||||
if (fabs(factor_een_gl_doc[i] - factor_een_gl_hpc[i]) > 1e-12) {
|
if (fabs(factor_een_gl_doc[i] - factor_een_gl_hpc[i]) > 1e-12) {
|
||||||
|
Loading…
Reference in New Issue
Block a user