1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 01:56:18 +01:00

Fixed HPC function

This commit is contained in:
Anthony Scemama 2024-12-14 00:33:18 +01:00
parent 243e937356
commit 8086c2078e

View File

@ -7515,51 +7515,51 @@ integer function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f( &
do nw = 1, walk_num
! Prepare table of exponentiated distances raised to appropriate power
do j = 1, elec_num
do i = 1, j-1
rij_inv(i) = 1.0d0 / ee_distance(i, j, nw)
enddo
rij_inv(j) = 0.0d0
do i = j+1, elec_num
rij_inv(i) = 1.0d0 / ee_distance(i, j, nw)
enddo
do i = 1, elec_num
do ii = 1, 3
elec_dist_gl(i, ii, j) = (coord_ee(i, ii, nw) - coord_ee(j, ii, nw)) * rij_inv(i)
end do
elec_dist_gl(i, 4, j) = 2.0d0 * rij_inv(i)
end do
end do
! Not necessary: should be set to zero by qmckl_malloc
een_rescaled_e_gl(:,:,:,0,nw) = 0.d0
do l = 1, cord_num
kappa_l = - dble(l) * rescale_factor_ee
do j = 1, elec_num
! Prepare table of exponentiated distances raised to appropriate power
do j = 1, elec_num
do i = 1, j-1
rij_inv(i) = 1.0d0 / ee_distance(i, j, nw)
enddo
rij_inv(j) = 0.0d0
do i = j+1, elec_num
rij_inv(i) = 1.0d0 / ee_distance(i, j, nw)
enddo
do i = 1, elec_num
een_rescaled_e_gl(i, 1, j, l, nw) = kappa_l * elec_dist_gl(i, 1, j)
een_rescaled_e_gl(i, 2, j, l, nw) = kappa_l * elec_dist_gl(i, 2, j)
een_rescaled_e_gl(i, 3, j, l, nw) = kappa_l * elec_dist_gl(i, 3, j)
een_rescaled_e_gl(i, 4, j, l, nw) = kappa_l * elec_dist_gl(i, 4, j)
end do
do i = 1, elec_num
een_rescaled_e_gl(i, 4, j, l, nw) = een_rescaled_e_gl(i, 4, j, l, nw) &
+ een_rescaled_e_gl(i, 1, j, l, nw) * een_rescaled_e_gl(i, 1, j, l, nw) &
+ een_rescaled_e_gl(i, 2, j, l, nw) * een_rescaled_e_gl(i, 2, j, l, nw) &
+ een_rescaled_e_gl(i, 3, j, l, nw) * een_rescaled_e_gl(i, 3, j, l, nw)
do ii = 1, 3
elec_dist_gl(i, ii, j) = (coord_ee(i, ii, nw) - coord_ee(j, ii, nw)) * rij_inv(i)
end do
elec_dist_gl(i, 4, j) = 2.0d0 * rij_inv(i)
end do
end do
do i = 1, elec_num
een_rescaled_e_gl(i,1,j,l,nw) = een_rescaled_e_gl(i,1,j,l,nw) * een_rescaled_e(i,j,l,nw)
een_rescaled_e_gl(i,2,j,l,nw) = een_rescaled_e_gl(i,2,j,l,nw) * een_rescaled_e(i,j,l,nw)
een_rescaled_e_gl(i,3,j,l,nw) = een_rescaled_e_gl(i,3,j,l,nw) * een_rescaled_e(i,j,l,nw)
een_rescaled_e_gl(i,4,j,l,nw) = een_rescaled_e_gl(i,4,j,l,nw) * een_rescaled_e(i,j,l,nw)
end do
end do
end do
! Not necessary: should be set to zero by qmckl_malloc
een_rescaled_e_gl(:,:,:,0,nw) = 0.d0
do l = 1, cord_num
kappa_l = - dble(l) * rescale_factor_ee
do j = 1, elec_num
do i = 1, elec_num
een_rescaled_e_gl(i, 1, j, l, nw) = kappa_l * elec_dist_gl(i, 1, j)
een_rescaled_e_gl(i, 2, j, l, nw) = kappa_l * elec_dist_gl(i, 2, j)
een_rescaled_e_gl(i, 3, j, l, nw) = kappa_l * elec_dist_gl(i, 3, j)
een_rescaled_e_gl(i, 4, j, l, nw) = kappa_l * elec_dist_gl(i, 4, j)
end do
do i = 1, elec_num
een_rescaled_e_gl(i, 4, j, l, nw) = een_rescaled_e_gl(i, 4, j, l, nw) &
+ een_rescaled_e_gl(i, 1, j, l, nw) * een_rescaled_e_gl(i, 1, j, l, nw) &
+ een_rescaled_e_gl(i, 2, j, l, nw) * een_rescaled_e_gl(i, 2, j, l, nw) &
+ een_rescaled_e_gl(i, 3, j, l, nw) * een_rescaled_e_gl(i, 3, j, l, nw)
end do
do i = 1, elec_num
een_rescaled_e_gl(i,1,j,l,nw) = een_rescaled_e_gl(i,1,j,l,nw) * een_rescaled_e(i,j,l,nw)
een_rescaled_e_gl(i,2,j,l,nw) = een_rescaled_e_gl(i,2,j,l,nw) * een_rescaled_e(i,j,l,nw)
een_rescaled_e_gl(i,3,j,l,nw) = een_rescaled_e_gl(i,3,j,l,nw) * een_rescaled_e(i,j,l,nw)
een_rescaled_e_gl(i,4,j,l,nw) = een_rescaled_e_gl(i,4,j,l,nw) * een_rescaled_e(i,j,l,nw)
end do
end do
end do
end do
end function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f
@ -7652,21 +7652,27 @@ end function qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_f
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes
qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
const qmckl_context context,
const int64_t walk_num,
const int64_t elec_num,
const int64_t cord_num,
const double rescale_factor_ee,
const double* coord_ee,
const double* ee_distance,
const double* een_rescaled_e,
double* const een_rescaled_e_gl )
qmckl_exit_code
qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
const qmckl_context context,
const int64_t walk_num,
const int64_t elec_num,
const int64_t cord_num,
const double rescale_factor_ee,
const double* coord_ee,
const double* ee_distance,
const double* een_rescaled_e,
double* const een_rescaled_e_gl )
{
if (context == QMCKL_NULL_CONTEXT) return QMCKL_INVALID_CONTEXT;
if (walk_num <= 0) return QMCKL_INVALID_ARG_2;
if (elec_num <= 0) return QMCKL_INVALID_ARG_3;
if (cord_num < 0) return QMCKL_INVALID_ARG_4;
if (context == QMCKL_NULL_CONTEXT) return QMCKL_INVALID_CONTEXT;
if (walk_num <= 0) return QMCKL_INVALID_ARG_2;
if (elec_num <= 0) return QMCKL_INVALID_ARG_3;
if (cord_num < 0) return QMCKL_INVALID_ARG_4;
#ifdef HAVE_OPENMP
#pragma omp parallel
#endif
{
double* restrict elec_dist_gl0 = (double*) calloc(elec_num * elec_num, sizeof(double));
double* restrict elec_dist_gl1 = (double*) calloc(elec_num * elec_num, sizeof(double));
@ -7678,95 +7684,116 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl_hpc (
assert (elec_dist_gl3 != NULL);
#ifdef HAVE_OPENMP
#pragma omp parallel for
#pragma omp for
#endif
for (int64_t nw = 0; nw < walk_num; ++nw) {
double rij_inv[elec_num];
for (int64_t j = 0; j < elec_num; ++j) {
double rij_inv[elec_num];
for (int64_t j=0; j<elec_num; ++j) {
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
rij_inv[i] = ee_distance[i + j * elec_num + nw * elec_num * elec_num] + 1.e-30;
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
rij_inv[i] = 1.0/rij_inv[i];
}
rij_inv[j] = 0.;
const double xj = coord_ee[j + nw * elec_num * 3];
const double yj = coord_ee[j + elec_num + nw * elec_num * 3];
const double zj = coord_ee[j + 2 * elec_num + nw * elec_num * 3];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
const double xi = coord_ee[i + nw * elec_num * 3];
const double yi = coord_ee[i + elec_num + nw * elec_num * 3];
const double zi = coord_ee[i + 2 * elec_num + nw * elec_num * 3];
elec_dist_gl0[i + j * elec_num] = rij_inv[i] * (xi-xj);
elec_dist_gl1[i + j * elec_num] = rij_inv[i] * (yi-yj);
elec_dist_gl2[i + j * elec_num] = rij_inv[i] * (zi-zj);
elec_dist_gl3[i + j * elec_num] = rij_inv[i] + rij_inv[i];
}
for (int64_t i = 0; i < elec_num ; ++i) {
rij_inv[i] = ee_distance[i+j*elec_num+nw*elec_num*elec_num];
}
for (int64_t j = 0; j < elec_num; ++j) {
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (cord_num + 1) * nw)];
rij_inv[j] = 1.0;
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < 4*elec_num; ++i) {
eegl[i] = 0.0;
for (int64_t i = 0; i < elec_num ; ++i) {
rij_inv[i] = 1.0/rij_inv[i];
}
rij_inv[j] = 0.0;
const double xj = coord_ee[j + nw * elec_num * 3];
const double yj = coord_ee[j + elec_num + nw * elec_num * 3];
const double zj = coord_ee[j + 2 * elec_num + nw * elec_num * 3];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num ; ++i) {
const double xi = coord_ee[i + nw * elec_num * 3];
const double yi = coord_ee[i + elec_num + nw * elec_num * 3];
const double zi = coord_ee[i + 2 * elec_num + nw * elec_num * 3];
elec_dist_gl0[i + j * elec_num] = rij_inv[i] * (xi-xj);
elec_dist_gl1[i + j * elec_num] = rij_inv[i] * (yi-yj);
elec_dist_gl2[i + j * elec_num] = rij_inv[i] * (zi-zj);
elec_dist_gl3[i + j * elec_num] = rij_inv[i] + rij_inv[i];
}
}
for (int64_t j = 0; j < elec_num; ++j) {
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (cord_num + 1) * nw)];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < 4*elec_num; ++i) {
eegl[i] = 0.0;
}
}
for (int64_t l=1; l<=cord_num; ++l) {
double kappa_l = - (double)l * rescale_factor_ee;
for (int64_t j=0; j<elec_num; ++j) {
double* restrict eegl =
&een_rescaled_e_gl[elec_num*4*(j+elec_num*(l+(cord_num+1)*nw))];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0; i<elec_num; ++i) {
eegl[i ] = kappa_l * elec_dist_gl0[i + j * elec_num];
eegl[i + elec_num ] = kappa_l * elec_dist_gl1[i + j * elec_num];
eegl[i + elec_num * 2] = kappa_l * elec_dist_gl2[i + j * elec_num];
eegl[i + elec_num * 3] = kappa_l * elec_dist_gl3[i + j * elec_num];
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0; i<elec_num; ++i) {
eegl[i + elec_num*3] = eegl[i + elec_num*3] +
eegl[i] * eegl[i] +
eegl[i + elec_num*1] * eegl[i + elec_num*1] +
eegl[i + elec_num*2] * eegl[i + elec_num*2];
}
const double* restrict ee =
&een_rescaled_e[elec_num*(j+elec_num*(l+(cord_num+1)*nw))];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0; i<elec_num; ++i) {
eegl[i ] *= ee[i];
eegl[i + elec_num * 1] *= ee[i];
eegl[i + elec_num * 2] *= ee[i];
eegl[i + elec_num * 3] *= ee[i];
}
}
for (int64_t l = 1; l <= cord_num; ++l) {
double kappa_l = - (double)l * rescale_factor_ee;
for (int64_t j = 0; j < elec_num; ++j) {
double* restrict eegl = &een_rescaled_e_gl[ elec_num * 4 * (j + elec_num * (l + (cord_num + 1) * nw))];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i ] = kappa_l * elec_dist_gl0[i + j * elec_num];
eegl[i + elec_num ] = kappa_l * elec_dist_gl1[i + j * elec_num];
eegl[i + elec_num * 2] = kappa_l * elec_dist_gl2[i + j * elec_num];
eegl[i + elec_num * 3] = kappa_l * elec_dist_gl3[i + j * elec_num];
}
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i + elec_num*3] = eegl[i + elec_num*3] +
eegl[i] * eegl[i] +
eegl[i + elec_num*1] * eegl[i + elec_num*1] +
eegl[i + elec_num*2] * eegl[i + elec_num*2];
}
const double* restrict ee = &een_rescaled_e[ elec_num * (j + elec_num * (l + (cord_num + 1) * nw))];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i = 0; i < elec_num; ++i) {
eegl[i ] *= ee[i];
eegl[i + elec_num * 1] *= ee[i];
eegl[i + elec_num * 2] *= ee[i];
eegl[i + elec_num * 3] *= ee[i];
}
}
}
}
}
free(elec_dist_gl0);
free(elec_dist_gl1);
free(elec_dist_gl2);
free(elec_dist_gl3);
}
return QMCKL_SUCCESS;
return QMCKL_SUCCESS;
}
#+end_src
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes
qmckl_exit_code qmckl_compute_jastrow_champ_factor_een_rescaled_e_gl (
@ -7848,10 +7875,10 @@ for l in range(0,cord_num+1):
for i in range(0,elec_num):
for ii in range(0,4):
een_rescaled_e_gl[i,ii,j,l] = kappa_l * elec_dist_gl[ii,i,j]
een_rescaled_e_gl[i,3,j,l] = een_rescaled_e_gl[i,3,j,l] + \
een_rescaled_e_gl[i,0,j,l] * een_rescaled_e_gl[i,0,j,l] + \
een_rescaled_e_gl[i,1,j,l] * een_rescaled_e_gl[i,1,j,l] + \
een_rescaled_e_gl[i,2,j,l] * een_rescaled_e_gl[i,2,j,l]
een_rescaled_e_gl[i,3,j,l] = een_rescaled_e_gl[i,3,j,l] + \
een_rescaled_e_gl[i,0,j,l] * een_rescaled_e_gl[i,0,j,l] + \
een_rescaled_e_gl[i,1,j,l] * een_rescaled_e_gl[i,1,j,l] + \
een_rescaled_e_gl[i,2,j,l] * een_rescaled_e_gl[i,2,j,l]
for ii in range(0,4):
een_rescaled_e_gl[i,ii,j,l] = een_rescaled_e_gl[i,ii,j,l] * een_rescaled_e[i,j,l]
@ -11222,6 +11249,7 @@ end function qmckl_compute_jastrow_champ_factor_een_gl_naive_f
:END:
#+NAME: qmckl_factor_een_gl_args
|-----------------------+---------------------------------------------------------------------+--------+------------------------------------------------|
| Variable | Type | In/Out | Description |
|-----------------------+---------------------------------------------------------------------+--------+------------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
@ -11237,6 +11265,7 @@ end function qmckl_compute_jastrow_champ_factor_een_gl_naive_f
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
| ~een_rescaled_n_gl~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | in | Derivative of Electron-nucleus rescaled factor |
| ~factor_een_gl~ | ~double[walk_num][4][elec_num]~ | out | Derivative of Electron-nucleus jastrow |
|-----------------------+---------------------------------------------------------------------+--------+------------------------------------------------|
#+begin_src f90 :comments org :tangle (eval f) :noweb yes
@ -11491,7 +11520,8 @@ qmckl_compute_jastrow_champ_factor_een_gl_hpc(const qmckl_context context,
#pragma omp parallel for
#endif
for (size_t nw = 0; nw < (size_t) walk_num; ++nw) {
memset(&factor_een_gl[elec_num*4*nw], 0, elec_num*4*sizeof(double));
double* const restrict factor_een_gl_0nw = &(factor_een_gl[elec_num*4*nw]);
memset(factor_een_gl_0nw, 0, elec_num*4*sizeof(double));
}
return QMCKL_SUCCESS;
}