1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-07-17 16:33:59 +02:00

Fixed bug in ao_value

This commit is contained in:
Anthony Scemama 2022-08-14 16:09:58 +02:00
parent 725e488199
commit 5512c1b41c
2 changed files with 116 additions and 130 deletions

View File

@ -5648,33 +5648,14 @@ end function qmckl_compute_ao_value_doc_f
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
#ifdef HAVE_HPC
qmckl_exit_code
qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int32_t* restrict prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* restrict coord,
const double* restrict nucl_coord,
const int64_t* restrict nucleus_index,
const int64_t* restrict nucleus_shell_num,
const double* nucleus_range,
const int32_t* restrict nucleus_max_ang_mom,
const int32_t* restrict shell_ang_mom,
const double* restrict ao_factor,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_value )
{
int32_t lstart[32];
#+NAME:ao_value_constants
#+begin_src c :exports none
int32_t lstart[32] __attribute__((aligned(64)));
for (int32_t l=0 ; l<32 ; ++l) {
lstart[l] = l*(l+1)*(l+2)/6;
}
int64_t ao_index[shell_num+1];
int64_t ao_index[shell_num+1] __attribute__((aligned(64)));
int64_t size_max = 0;
int64_t prim_max = 0;
int64_t shell_max = 0;
@ -5700,17 +5681,42 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
/* Don't compute polynomials when the radial part is zero. */
double cutoff = 27.631021115928547; // -log(1.e-12)
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
#ifdef HAVE_HPC
qmckl_exit_code
qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int32_t* restrict prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* restrict coord,
const double* restrict nucl_coord,
const int64_t* restrict nucleus_index,
const int64_t* restrict nucleus_shell_num,
const double* nucleus_range,
const int32_t* restrict nucleus_max_ang_mom,
const int32_t* restrict shell_ang_mom,
const double* restrict ao_factor,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_value )
{
<<ao_value_constants>>
#ifdef HAVE_OPENMP
#pragma omp parallel
#endif
{
qmckl_exit_code rc;
double ar2[prim_max];
int32_t powers[3*size_max];
double poly_vgl[5*size_max];
double ar2[prim_max] __attribute__((aligned(64)));
int32_t powers[3*size_max] __attribute__((aligned(64)));
double poly_vgl[5*size_max] __attribute__((aligned(64)));
double exp_mat[prim_max];
double ce_mat[shell_max];
double exp_mat[prim_max] __attribute__((aligned(64)));
double ce_mat[shell_max] __attribute__((aligned(64)));
double coef_mat[nucl_num][shell_max][prim_max];
for (int i=0 ; i<nucl_num ; ++i) {
@ -5725,15 +5731,17 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
#pragma omp for
#endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
const double e_coord[3] = { coord[ipoint],
coord[ipoint + point_num],
coord[ipoint + 2*point_num] };
const double e_coord[3] __attribute__((aligned(64))) =
{ coord[ipoint],
coord[ipoint + point_num],
coord[ipoint + 2*point_num] };
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
const double n_coord[3] = { nucl_coord[inucl],
nucl_coord[inucl + nucl_num],
nucl_coord[inucl + 2*nucl_num] };
const double n_coord[3] __attribute__((aligned(64))) =
{ nucl_coord[inucl],
nucl_coord[inucl + nucl_num],
nucl_coord[inucl + 2*nucl_num] };
/* Test if the point is in the range of the nucleus */
const double x = e_coord[0] - n_coord[0];
const double y = e_coord[1] - n_coord[1];
@ -5751,14 +5759,14 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
break;
case 1:
poly_vgl[0] = 0.;
poly_vgl[0] = 1.;
poly_vgl[1] = x;
poly_vgl[2] = y;
poly_vgl[3] = z;
break;
case 2:
poly_vgl[0] = 0.;
poly_vgl[0] = 1.;
poly_vgl[1] = x;
poly_vgl[2] = y;
poly_vgl[3] = z;
@ -5797,10 +5805,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
}
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
ce_mat[i] = 0.;
}
for (int k=0 ; k<nidx; ++k) {
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
ce_mat[i] = 0.;
for (int k=0 ; k<nidx; ++k) {
ce_mat[i] += coef_mat[inucl][i][k] * exp_mat[k];
}
}
@ -5811,7 +5817,6 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const double s1 = ce_mat[ishell-ishell_start];
if (s1 == 0.0) continue;
const int64_t k = ao_index[ishell];
double* restrict const ao_value_1 = ao_value + ipoint*ao_num + k;
@ -5819,6 +5824,13 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l];
if (s1 == 0.0) {
for (int64_t il=0 ; il<n ; ++il) {
ao_value_1[il] = 0.;
}
continue;
}
double* restrict poly_vgl_1 = NULL;
if (nidx > 0) {
const double* restrict f = ao_factor + k;
@ -5827,10 +5839,10 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
poly_vgl_1 = &(poly_vgl[idx]);
switch (n) {
case(1):
case 1:
ao_value_1[0] = s1 * f[0];
break;
case (3):
case 3:
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -5848,7 +5860,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
break;
default:
#ifdef HAVE_OPENMP
#pragma omp simd simdlen(8)
#pragma omp simd
#endif
for (int il=0 ; il<n ; ++il) {
ao_value_1[il] = poly_vgl_1[il] * s1 * f[il];
@ -6470,36 +6482,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_vgl )
{
int32_t lstart[32];
for (int32_t l=0 ; l<32 ; ++l) {
lstart[l] = l*(l+1)*(l+2)/6;
}
int64_t ao_index[shell_num+1];
int64_t size_max = 0;
int64_t prim_max = 0;
int64_t shell_max = 0;
{
int64_t k=0;
for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
prim_max = prim_num_per_nucleus[inucl] > prim_max ?
prim_num_per_nucleus[inucl] : prim_max;
shell_max = nucleus_shell_num[inucl] > shell_max ?
nucleus_shell_num[inucl] : shell_max;
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const int l = shell_ang_mom[ishell];
ao_index[ishell] = k;
k += lstart[l+1] - lstart[l];
size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max;
}
}
ao_index[shell_num] = ao_num+1;
}
/* Don't compute when the radial part is zero. */
double cutoff = 27.631021115928547; // -log(1.e-12)
<<ao_value_constants>>
#ifdef HAVE_OPENMP
#pragma omp parallel
@ -6508,18 +6491,6 @@ qmckl_compute_ao_vgl_hpc_gaussian (
qmckl_exit_code rc;
double ar2[prim_max] __attribute__((aligned(64)));
int32_t powers[3*size_max] __attribute__((aligned(64)));
double poly_vgl_l1[4][4] __attribute__((aligned(64))) =
{{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
{0.0, 0.0, 0.0, 1.0}};
double poly_vgl_l2[5][10]__attribute__((aligned(64))) =
{{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max] __attribute__((aligned(64)));
double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
@ -6533,20 +6504,33 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
}
double poly_vgl_l1[4][4] __attribute__((aligned(64))) =
{{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
{0.0, 0.0, 0.0, 1.0}};
double poly_vgl_l2[5][10]__attribute__((aligned(64))) =
{{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max] __attribute__((aligned(64)));
#ifdef HAVE_OPENMP
#pragma omp for
#endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
const double e_coord[3] __attribute__((aligned(64))) =
{ coord[ipoint],
coord[ipoint + point_num],
coord[ipoint + 2*point_num] };
{ coord[ipoint],
coord[ipoint + point_num],
coord[ipoint + 2*point_num] };
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
const double n_coord[3] __attribute__((aligned(64))) =
{ nucl_coord[inucl],
nucl_coord[inucl + nucl_num],
nucl_coord[inucl + 2*nucl_num] };
{ nucl_coord[inucl],
nucl_coord[inucl + nucl_num],
nucl_coord[inucl + 2*nucl_num] };
/* Test if the point is in the range of the nucleus */
const double x = e_coord[0] - n_coord[0];
@ -6616,6 +6600,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim][0] = exp(-ar2[iprim]);
}
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0];
f = -f-f;
@ -6628,21 +6613,6 @@ qmckl_compute_ao_vgl_hpc_gaussian (
/* --- */
switch (8) {
case(5):
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] = 0.;
}
for (int k=0 ; k<nidx; ++k) {
const double cm = coef_mat[inucl][i][k];
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] += cm * exp_mat[k][j];
}
}
}
break;
case(8):
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
@ -6664,6 +6634,21 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
break;
case(5):
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] = 0.;
}
for (int k=0 ; k<nidx; ++k) {
const double cm = coef_mat[inucl][i][k];
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] += cm * exp_mat[k][j];
}
}
}
break;
case(512):
for(int i=0; i<nucleus_shell_num[inucl]; ++i){
__m512d cemat_avx512;
@ -6778,14 +6763,14 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_5 = &(poly_vgl[4][idx]);
}
switch (n) {
case(1):
case 1:
ao_vgl_1[0] = s1 * f[0];
ao_vgl_2[0] = s2 * f[0];
ao_vgl_3[0] = s3 * f[0];
ao_vgl_4[0] = s4 * f[0];
ao_vgl_5[0] = s5;
break;
case (3):
case 3:
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -6800,7 +6785,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_4[il] * s4 )) * f[il];
}
break;
case(6):
case 6:
#ifdef HAVE_OPENMP
#pragma omp simd
#endif

View File

@ -628,15 +628,8 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
rc = qmckl_provide_ao_basis_ao_value(context);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context,
QMCKL_NOT_PROVIDED,
"qmckl_ao_value",
NULL);
}
if (ctx->mo_basis.mo_vgl_date == ctx->point.date) {
// mo_vgl has been computed at this step: Just copy the data.
@ -653,6 +646,14 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
} else {
rc = qmckl_provide_ao_basis_ao_value(context);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context,
QMCKL_NOT_PROVIDED,
"qmckl_ao_value",
NULL);
}
rc = qmckl_compute_mo_basis_mo_value(context,
ctx->ao_basis.ao_num,
ctx->mo_basis.mo_num,
@ -664,6 +665,8 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
}
#+end_src
#+CALL: write_provider_post( group="mo_basis", data="mo_value" )
#+RESULTS:
@ -727,8 +730,8 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
do j=1,point_num
mo_value(:,j) = 0.d0
do k=1,ao_num
if (ao_value(k,j) /= 0.d0) then
c1 = ao_value(k,j)
c1 = ao_value(k,j)
if (c1 /= 0.d0) then
do i=1,mo_num
mo_value(i,j) = mo_value(i,j) + coefficient_t(i,k) * c1
end do
@ -736,7 +739,7 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
end do
end do
else ! dgemm
else ! dgemm for checking
LDA = size(coefficient_t,1)
LDB = size(ao_value,1)
@ -874,7 +877,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
}
}
int64_t n;
int64_t n=0;
for (n=0 ; n < nidx-4 ; n+=4) {
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
@ -895,8 +898,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
}
}
const int64_t n0 = n < 0 ? 0 : n;
for (int64_t m=n0 ; m < nidx ; m+=1) {
for (int64_t m=n ; m < nidx ; m+=1) {
const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m];
@ -1355,7 +1357,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
}
}
int64_t n;
int64_t n=0;
for (n=0 ; n < nidx-4 ; n+=4) {
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
@ -1400,8 +1402,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
}
}
const int64_t n0 = n < 0 ? 0 : n;
for (int64_t m=n0 ; m < nidx ; m+=1) {
for (int64_t m=n ; m < nidx ; m+=1) {
const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m];
const double a2 = av2[m];