1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 20:36:01 +01:00

Fixed bug in ao_value

This commit is contained in:
Anthony Scemama 2022-08-14 16:09:58 +02:00
parent 725e488199
commit 5512c1b41c
2 changed files with 116 additions and 130 deletions

View File

@ -5648,33 +5648,14 @@ end function qmckl_compute_ao_value_doc_f
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none #+NAME:ao_value_constants
#ifdef HAVE_HPC #+begin_src c :exports none
qmckl_exit_code int32_t lstart[32] __attribute__((aligned(64)));
qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int32_t* restrict prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* restrict coord,
const double* restrict nucl_coord,
const int64_t* restrict nucleus_index,
const int64_t* restrict nucleus_shell_num,
const double* nucleus_range,
const int32_t* restrict nucleus_max_ang_mom,
const int32_t* restrict shell_ang_mom,
const double* restrict ao_factor,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_value )
{
int32_t lstart[32];
for (int32_t l=0 ; l<32 ; ++l) { for (int32_t l=0 ; l<32 ; ++l) {
lstart[l] = l*(l+1)*(l+2)/6; lstart[l] = l*(l+1)*(l+2)/6;
} }
int64_t ao_index[shell_num+1]; int64_t ao_index[shell_num+1] __attribute__((aligned(64)));
int64_t size_max = 0; int64_t size_max = 0;
int64_t prim_max = 0; int64_t prim_max = 0;
int64_t shell_max = 0; int64_t shell_max = 0;
@ -5700,17 +5681,42 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
/* Don't compute polynomials when the radial part is zero. */ /* Don't compute polynomials when the radial part is zero. */
double cutoff = 27.631021115928547; // -log(1.e-12) double cutoff = 27.631021115928547; // -log(1.e-12)
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
#ifdef HAVE_HPC
qmckl_exit_code
qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int64_t ao_num,
const int64_t shell_num,
const int32_t* restrict prim_num_per_nucleus,
const int64_t point_num,
const int64_t nucl_num,
const double* restrict coord,
const double* restrict nucl_coord,
const int64_t* restrict nucleus_index,
const int64_t* restrict nucleus_shell_num,
const double* nucleus_range,
const int32_t* restrict nucleus_max_ang_mom,
const int32_t* restrict shell_ang_mom,
const double* restrict ao_factor,
const qmckl_matrix expo_per_nucleus,
const qmckl_tensor coef_per_nucleus,
double* restrict const ao_value )
{
<<ao_value_constants>>
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp parallel #pragma omp parallel
#endif #endif
{ {
qmckl_exit_code rc; qmckl_exit_code rc;
double ar2[prim_max]; double ar2[prim_max] __attribute__((aligned(64)));
int32_t powers[3*size_max]; int32_t powers[3*size_max] __attribute__((aligned(64)));
double poly_vgl[5*size_max]; double poly_vgl[5*size_max] __attribute__((aligned(64)));
double exp_mat[prim_max]; double exp_mat[prim_max] __attribute__((aligned(64)));
double ce_mat[shell_max]; double ce_mat[shell_max] __attribute__((aligned(64)));
double coef_mat[nucl_num][shell_max][prim_max]; double coef_mat[nucl_num][shell_max][prim_max];
for (int i=0 ; i<nucl_num ; ++i) { for (int i=0 ; i<nucl_num ; ++i) {
@ -5725,15 +5731,17 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
#pragma omp for #pragma omp for
#endif #endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
const double e_coord[3] = { coord[ipoint], const double e_coord[3] __attribute__((aligned(64))) =
coord[ipoint + point_num], { coord[ipoint],
coord[ipoint + 2*point_num] }; coord[ipoint + point_num],
coord[ipoint + 2*point_num] };
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) { for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
const double n_coord[3] = { nucl_coord[inucl], const double n_coord[3] __attribute__((aligned(64))) =
nucl_coord[inucl + nucl_num], { nucl_coord[inucl],
nucl_coord[inucl + 2*nucl_num] }; nucl_coord[inucl + nucl_num],
nucl_coord[inucl + 2*nucl_num] };
/* Test if the point is in the range of the nucleus */ /* Test if the point is in the range of the nucleus */
const double x = e_coord[0] - n_coord[0]; const double x = e_coord[0] - n_coord[0];
const double y = e_coord[1] - n_coord[1]; const double y = e_coord[1] - n_coord[1];
@ -5751,14 +5759,14 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
break; break;
case 1: case 1:
poly_vgl[0] = 0.; poly_vgl[0] = 1.;
poly_vgl[1] = x; poly_vgl[1] = x;
poly_vgl[2] = y; poly_vgl[2] = y;
poly_vgl[3] = z; poly_vgl[3] = z;
break; break;
case 2: case 2:
poly_vgl[0] = 0.; poly_vgl[0] = 1.;
poly_vgl[1] = x; poly_vgl[1] = x;
poly_vgl[2] = y; poly_vgl[2] = y;
poly_vgl[3] = z; poly_vgl[3] = z;
@ -5797,10 +5805,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
} }
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) { for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
ce_mat[i] = 0.; ce_mat[i] = 0.;
} for (int k=0 ; k<nidx; ++k) {
for (int k=0 ; k<nidx; ++k) {
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
ce_mat[i] += coef_mat[inucl][i][k] * exp_mat[k]; ce_mat[i] += coef_mat[inucl][i][k] * exp_mat[k];
} }
} }
@ -5811,7 +5817,6 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const double s1 = ce_mat[ishell-ishell_start]; const double s1 = ce_mat[ishell-ishell_start];
if (s1 == 0.0) continue;
const int64_t k = ao_index[ishell]; const int64_t k = ao_index[ishell];
double* restrict const ao_value_1 = ao_value + ipoint*ao_num + k; double* restrict const ao_value_1 = ao_value + ipoint*ao_num + k;
@ -5819,6 +5824,13 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int32_t l = shell_ang_mom[ishell]; const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l]; const int32_t n = lstart[l+1]-lstart[l];
if (s1 == 0.0) {
for (int64_t il=0 ; il<n ; ++il) {
ao_value_1[il] = 0.;
}
continue;
}
double* restrict poly_vgl_1 = NULL; double* restrict poly_vgl_1 = NULL;
if (nidx > 0) { if (nidx > 0) {
const double* restrict f = ao_factor + k; const double* restrict f = ao_factor + k;
@ -5827,10 +5839,10 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
poly_vgl_1 = &(poly_vgl[idx]); poly_vgl_1 = &(poly_vgl[idx]);
switch (n) { switch (n) {
case(1): case 1:
ao_value_1[0] = s1 * f[0]; ao_value_1[0] = s1 * f[0];
break; break;
case (3): case 3:
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
@ -5848,7 +5860,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
break; break;
default: default:
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd simdlen(8) #pragma omp simd
#endif #endif
for (int il=0 ; il<n ; ++il) { for (int il=0 ; il<n ; ++il) {
ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_value_1[il] = poly_vgl_1[il] * s1 * f[il];
@ -6470,36 +6482,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const qmckl_tensor coef_per_nucleus, const qmckl_tensor coef_per_nucleus,
double* restrict const ao_vgl ) double* restrict const ao_vgl )
{ {
int32_t lstart[32]; <<ao_value_constants>>
for (int32_t l=0 ; l<32 ; ++l) {
lstart[l] = l*(l+1)*(l+2)/6;
}
int64_t ao_index[shell_num+1];
int64_t size_max = 0;
int64_t prim_max = 0;
int64_t shell_max = 0;
{
int64_t k=0;
for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
prim_max = prim_num_per_nucleus[inucl] > prim_max ?
prim_num_per_nucleus[inucl] : prim_max;
shell_max = nucleus_shell_num[inucl] > shell_max ?
nucleus_shell_num[inucl] : shell_max;
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const int l = shell_ang_mom[ishell];
ao_index[ishell] = k;
k += lstart[l+1] - lstart[l];
size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max;
}
}
ao_index[shell_num] = ao_num+1;
}
/* Don't compute when the radial part is zero. */
double cutoff = 27.631021115928547; // -log(1.e-12)
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp parallel #pragma omp parallel
@ -6508,18 +6491,6 @@ qmckl_compute_ao_vgl_hpc_gaussian (
qmckl_exit_code rc; qmckl_exit_code rc;
double ar2[prim_max] __attribute__((aligned(64))); double ar2[prim_max] __attribute__((aligned(64)));
int32_t powers[3*size_max] __attribute__((aligned(64))); int32_t powers[3*size_max] __attribute__((aligned(64)));
double poly_vgl_l1[4][4] __attribute__((aligned(64))) =
{{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
{0.0, 0.0, 0.0, 1.0}};
double poly_vgl_l2[5][10]__attribute__((aligned(64))) =
{{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max] __attribute__((aligned(64)));
double exp_mat[prim_max][8] __attribute__((aligned(64))) ; double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
double ce_mat[shell_max][8] __attribute__((aligned(64))) ; double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
@ -6533,20 +6504,33 @@ qmckl_compute_ao_vgl_hpc_gaussian (
} }
} }
double poly_vgl_l1[4][4] __attribute__((aligned(64))) =
{{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
{0.0, 0.0, 0.0, 1.0}};
double poly_vgl_l2[5][10]__attribute__((aligned(64))) =
{{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max] __attribute__((aligned(64)));
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp for #pragma omp for
#endif #endif
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
const double e_coord[3] __attribute__((aligned(64))) = const double e_coord[3] __attribute__((aligned(64))) =
{ coord[ipoint], { coord[ipoint],
coord[ipoint + point_num], coord[ipoint + point_num],
coord[ipoint + 2*point_num] }; coord[ipoint + 2*point_num] };
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) { for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
const double n_coord[3] __attribute__((aligned(64))) = const double n_coord[3] __attribute__((aligned(64))) =
{ nucl_coord[inucl], { nucl_coord[inucl],
nucl_coord[inucl + nucl_num], nucl_coord[inucl + nucl_num],
nucl_coord[inucl + 2*nucl_num] }; nucl_coord[inucl + 2*nucl_num] };
/* Test if the point is in the range of the nucleus */ /* Test if the point is in the range of the nucleus */
const double x = e_coord[0] - n_coord[0]; const double x = e_coord[0] - n_coord[0];
@ -6616,6 +6600,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim][0] = exp(-ar2[iprim]); exp_mat[iprim][0] = exp(-ar2[iprim]);
} }
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0]; double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0];
f = -f-f; f = -f-f;
@ -6628,21 +6613,6 @@ qmckl_compute_ao_vgl_hpc_gaussian (
/* --- */ /* --- */
switch (8) { switch (8) {
case(5):
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] = 0.;
}
for (int k=0 ; k<nidx; ++k) {
const double cm = coef_mat[inucl][i][k];
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] += cm * exp_mat[k][j];
}
}
}
break;
case(8): case(8):
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) { for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
@ -6664,6 +6634,21 @@ qmckl_compute_ao_vgl_hpc_gaussian (
} }
break; break;
case(5):
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] = 0.;
}
for (int k=0 ; k<nidx; ++k) {
const double cm = coef_mat[inucl][i][k];
for (int j=0 ; j<5 ; ++j) {
ce_mat[i][j] += cm * exp_mat[k][j];
}
}
}
break;
case(512): case(512):
for(int i=0; i<nucleus_shell_num[inucl]; ++i){ for(int i=0; i<nucleus_shell_num[inucl]; ++i){
__m512d cemat_avx512; __m512d cemat_avx512;
@ -6778,14 +6763,14 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_5 = &(poly_vgl[4][idx]); poly_vgl_5 = &(poly_vgl[4][idx]);
} }
switch (n) { switch (n) {
case(1): case 1:
ao_vgl_1[0] = s1 * f[0]; ao_vgl_1[0] = s1 * f[0];
ao_vgl_2[0] = s2 * f[0]; ao_vgl_2[0] = s2 * f[0];
ao_vgl_3[0] = s3 * f[0]; ao_vgl_3[0] = s3 * f[0];
ao_vgl_4[0] = s4 * f[0]; ao_vgl_4[0] = s4 * f[0];
ao_vgl_5[0] = s5; ao_vgl_5[0] = s5;
break; break;
case (3): case 3:
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
@ -6800,7 +6785,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_4[il] * s4 )) * f[il]; poly_vgl_4[il] * s4 )) * f[il];
} }
break; break;
case(6): case 6:
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif

View File

@ -628,15 +628,8 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
#+end_src #+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
rc = qmckl_provide_ao_basis_ao_value(context);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context,
QMCKL_NOT_PROVIDED,
"qmckl_ao_value",
NULL);
}
if (ctx->mo_basis.mo_vgl_date == ctx->point.date) { if (ctx->mo_basis.mo_vgl_date == ctx->point.date) {
// mo_vgl has been computed at this step: Just copy the data. // mo_vgl has been computed at this step: Just copy the data.
@ -653,6 +646,14 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
} else { } else {
rc = qmckl_provide_ao_basis_ao_value(context);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context,
QMCKL_NOT_PROVIDED,
"qmckl_ao_value",
NULL);
}
rc = qmckl_compute_mo_basis_mo_value(context, rc = qmckl_compute_mo_basis_mo_value(context,
ctx->ao_basis.ao_num, ctx->ao_basis.ao_num,
ctx->mo_basis.mo_num, ctx->mo_basis.mo_num,
@ -664,6 +665,8 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
} }
#+end_src #+end_src
#+CALL: write_provider_post( group="mo_basis", data="mo_value" ) #+CALL: write_provider_post( group="mo_basis", data="mo_value" )
#+RESULTS: #+RESULTS:
@ -727,8 +730,8 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
do j=1,point_num do j=1,point_num
mo_value(:,j) = 0.d0 mo_value(:,j) = 0.d0
do k=1,ao_num do k=1,ao_num
if (ao_value(k,j) /= 0.d0) then c1 = ao_value(k,j)
c1 = ao_value(k,j) if (c1 /= 0.d0) then
do i=1,mo_num do i=1,mo_num
mo_value(i,j) = mo_value(i,j) + coefficient_t(i,k) * c1 mo_value(i,j) = mo_value(i,j) + coefficient_t(i,k) * c1
end do end do
@ -736,7 +739,7 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
end do end do
end do end do
else ! dgemm else ! dgemm for checking
LDA = size(coefficient_t,1) LDA = size(coefficient_t,1)
LDB = size(ao_value,1) LDB = size(ao_value,1)
@ -874,7 +877,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
} }
} }
int64_t n; int64_t n=0;
for (n=0 ; n < nidx-4 ; n+=4) { for (n=0 ; n < nidx-4 ; n+=4) {
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num; const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
@ -895,8 +898,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
} }
} }
const int64_t n0 = n < 0 ? 0 : n; for (int64_t m=n ; m < nidx ; m+=1) {
for (int64_t m=n0 ; m < nidx ; m+=1) {
const double* restrict ck = coefficient_t + idx[m]*mo_num; const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m]; const double a1 = av1[m];
@ -1355,7 +1357,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
} }
} }
int64_t n; int64_t n=0;
for (n=0 ; n < nidx-4 ; n+=4) { for (n=0 ; n < nidx-4 ; n+=4) {
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num; const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
@ -1400,8 +1402,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
} }
} }
const int64_t n0 = n < 0 ? 0 : n; for (int64_t m=n ; m < nidx ; m+=1) {
for (int64_t m=n0 ; m < nidx ; m+=1) {
const double* restrict ck = coefficient_t + idx[m]*mo_num; const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m]; const double a1 = av1[m];
const double a2 = av2[m]; const double a2 = av2[m];