mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 12:23:56 +01:00
Fixed bug in ao_value
This commit is contained in:
parent
725e488199
commit
5512c1b41c
211
org/qmckl_ao.org
211
org/qmckl_ao.org
@ -5648,33 +5648,14 @@ end function qmckl_compute_ao_value_doc_f
|
||||
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
|
||||
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
|
||||
#ifdef HAVE_HPC
|
||||
qmckl_exit_code
|
||||
qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
const int64_t ao_num,
|
||||
const int64_t shell_num,
|
||||
const int32_t* restrict prim_num_per_nucleus,
|
||||
const int64_t point_num,
|
||||
const int64_t nucl_num,
|
||||
const double* restrict coord,
|
||||
const double* restrict nucl_coord,
|
||||
const int64_t* restrict nucleus_index,
|
||||
const int64_t* restrict nucleus_shell_num,
|
||||
const double* nucleus_range,
|
||||
const int32_t* restrict nucleus_max_ang_mom,
|
||||
const int32_t* restrict shell_ang_mom,
|
||||
const double* restrict ao_factor,
|
||||
const qmckl_matrix expo_per_nucleus,
|
||||
const qmckl_tensor coef_per_nucleus,
|
||||
double* restrict const ao_value )
|
||||
{
|
||||
int32_t lstart[32];
|
||||
#+NAME:ao_value_constants
|
||||
#+begin_src c :exports none
|
||||
int32_t lstart[32] __attribute__((aligned(64)));
|
||||
for (int32_t l=0 ; l<32 ; ++l) {
|
||||
lstart[l] = l*(l+1)*(l+2)/6;
|
||||
}
|
||||
|
||||
int64_t ao_index[shell_num+1];
|
||||
int64_t ao_index[shell_num+1] __attribute__((aligned(64)));
|
||||
int64_t size_max = 0;
|
||||
int64_t prim_max = 0;
|
||||
int64_t shell_max = 0;
|
||||
@ -5700,17 +5681,42 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
/* Don't compute polynomials when the radial part is zero. */
|
||||
double cutoff = 27.631021115928547; // -log(1.e-12)
|
||||
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
|
||||
#ifdef HAVE_HPC
|
||||
qmckl_exit_code
|
||||
qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
const int64_t ao_num,
|
||||
const int64_t shell_num,
|
||||
const int32_t* restrict prim_num_per_nucleus,
|
||||
const int64_t point_num,
|
||||
const int64_t nucl_num,
|
||||
const double* restrict coord,
|
||||
const double* restrict nucl_coord,
|
||||
const int64_t* restrict nucleus_index,
|
||||
const int64_t* restrict nucleus_shell_num,
|
||||
const double* nucleus_range,
|
||||
const int32_t* restrict nucleus_max_ang_mom,
|
||||
const int32_t* restrict shell_ang_mom,
|
||||
const double* restrict ao_factor,
|
||||
const qmckl_matrix expo_per_nucleus,
|
||||
const qmckl_tensor coef_per_nucleus,
|
||||
double* restrict const ao_value )
|
||||
{
|
||||
<<ao_value_constants>>
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
qmckl_exit_code rc;
|
||||
double ar2[prim_max];
|
||||
int32_t powers[3*size_max];
|
||||
double poly_vgl[5*size_max];
|
||||
double ar2[prim_max] __attribute__((aligned(64)));
|
||||
int32_t powers[3*size_max] __attribute__((aligned(64)));
|
||||
double poly_vgl[5*size_max] __attribute__((aligned(64)));
|
||||
|
||||
double exp_mat[prim_max];
|
||||
double ce_mat[shell_max];
|
||||
double exp_mat[prim_max] __attribute__((aligned(64)));
|
||||
double ce_mat[shell_max] __attribute__((aligned(64)));
|
||||
|
||||
double coef_mat[nucl_num][shell_max][prim_max];
|
||||
for (int i=0 ; i<nucl_num ; ++i) {
|
||||
@ -5725,15 +5731,17 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
|
||||
const double e_coord[3] = { coord[ipoint],
|
||||
coord[ipoint + point_num],
|
||||
coord[ipoint + 2*point_num] };
|
||||
|
||||
const double e_coord[3] __attribute__((aligned(64))) =
|
||||
{ coord[ipoint],
|
||||
coord[ipoint + point_num],
|
||||
coord[ipoint + 2*point_num] };
|
||||
|
||||
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
|
||||
const double n_coord[3] = { nucl_coord[inucl],
|
||||
nucl_coord[inucl + nucl_num],
|
||||
nucl_coord[inucl + 2*nucl_num] };
|
||||
|
||||
const double n_coord[3] __attribute__((aligned(64))) =
|
||||
{ nucl_coord[inucl],
|
||||
nucl_coord[inucl + nucl_num],
|
||||
nucl_coord[inucl + 2*nucl_num] };
|
||||
|
||||
/* Test if the point is in the range of the nucleus */
|
||||
const double x = e_coord[0] - n_coord[0];
|
||||
const double y = e_coord[1] - n_coord[1];
|
||||
@ -5751,14 +5759,14 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
break;
|
||||
|
||||
case 1:
|
||||
poly_vgl[0] = 0.;
|
||||
poly_vgl[0] = 1.;
|
||||
poly_vgl[1] = x;
|
||||
poly_vgl[2] = y;
|
||||
poly_vgl[3] = z;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
poly_vgl[0] = 0.;
|
||||
poly_vgl[0] = 1.;
|
||||
poly_vgl[1] = x;
|
||||
poly_vgl[2] = y;
|
||||
poly_vgl[3] = z;
|
||||
@ -5797,10 +5805,8 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
}
|
||||
|
||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||
ce_mat[i] = 0.;
|
||||
}
|
||||
for (int k=0 ; k<nidx; ++k) {
|
||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||
ce_mat[i] = 0.;
|
||||
for (int k=0 ; k<nidx; ++k) {
|
||||
ce_mat[i] += coef_mat[inucl][i][k] * exp_mat[k];
|
||||
}
|
||||
}
|
||||
@ -5811,7 +5817,6 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
||||
|
||||
const double s1 = ce_mat[ishell-ishell_start];
|
||||
if (s1 == 0.0) continue;
|
||||
|
||||
const int64_t k = ao_index[ishell];
|
||||
double* restrict const ao_value_1 = ao_value + ipoint*ao_num + k;
|
||||
@ -5819,6 +5824,13 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
const int32_t l = shell_ang_mom[ishell];
|
||||
const int32_t n = lstart[l+1]-lstart[l];
|
||||
|
||||
if (s1 == 0.0) {
|
||||
for (int64_t il=0 ; il<n ; ++il) {
|
||||
ao_value_1[il] = 0.;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
double* restrict poly_vgl_1 = NULL;
|
||||
if (nidx > 0) {
|
||||
const double* restrict f = ao_factor + k;
|
||||
@ -5827,10 +5839,10 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
poly_vgl_1 = &(poly_vgl[idx]);
|
||||
|
||||
switch (n) {
|
||||
case(1):
|
||||
case 1:
|
||||
ao_value_1[0] = s1 * f[0];
|
||||
break;
|
||||
case (3):
|
||||
case 3:
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
@ -5848,7 +5860,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
||||
break;
|
||||
default:
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd simdlen(8)
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int il=0 ; il<n ; ++il) {
|
||||
ao_value_1[il] = poly_vgl_1[il] * s1 * f[il];
|
||||
@ -6470,36 +6482,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
const qmckl_tensor coef_per_nucleus,
|
||||
double* restrict const ao_vgl )
|
||||
{
|
||||
int32_t lstart[32];
|
||||
for (int32_t l=0 ; l<32 ; ++l) {
|
||||
lstart[l] = l*(l+1)*(l+2)/6;
|
||||
}
|
||||
|
||||
int64_t ao_index[shell_num+1];
|
||||
int64_t size_max = 0;
|
||||
int64_t prim_max = 0;
|
||||
int64_t shell_max = 0;
|
||||
{
|
||||
int64_t k=0;
|
||||
for (int inucl=0 ; inucl < nucl_num ; ++inucl) {
|
||||
prim_max = prim_num_per_nucleus[inucl] > prim_max ?
|
||||
prim_num_per_nucleus[inucl] : prim_max;
|
||||
shell_max = nucleus_shell_num[inucl] > shell_max ?
|
||||
nucleus_shell_num[inucl] : shell_max;
|
||||
const int64_t ishell_start = nucleus_index[inucl];
|
||||
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
||||
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
||||
const int l = shell_ang_mom[ishell];
|
||||
ao_index[ishell] = k;
|
||||
k += lstart[l+1] - lstart[l];
|
||||
size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max;
|
||||
}
|
||||
}
|
||||
ao_index[shell_num] = ao_num+1;
|
||||
}
|
||||
|
||||
/* Don't compute when the radial part is zero. */
|
||||
double cutoff = 27.631021115928547; // -log(1.e-12)
|
||||
<<ao_value_constants>>
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp parallel
|
||||
@ -6508,18 +6491,6 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
qmckl_exit_code rc;
|
||||
double ar2[prim_max] __attribute__((aligned(64)));
|
||||
int32_t powers[3*size_max] __attribute__((aligned(64)));
|
||||
double poly_vgl_l1[4][4] __attribute__((aligned(64))) =
|
||||
{{1.0, 0.0, 0.0, 0.0},
|
||||
{0.0, 1.0, 0.0, 0.0},
|
||||
{0.0, 0.0, 1.0, 0.0},
|
||||
{0.0, 0.0, 0.0, 1.0}};
|
||||
double poly_vgl_l2[5][10]__attribute__((aligned(64))) =
|
||||
{{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
|
||||
double poly_vgl[5][size_max] __attribute__((aligned(64)));
|
||||
|
||||
double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
|
||||
double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
|
||||
@ -6533,20 +6504,33 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
}
|
||||
}
|
||||
|
||||
double poly_vgl_l1[4][4] __attribute__((aligned(64))) =
|
||||
{{1.0, 0.0, 0.0, 0.0},
|
||||
{0.0, 1.0, 0.0, 0.0},
|
||||
{0.0, 0.0, 1.0, 0.0},
|
||||
{0.0, 0.0, 0.0, 1.0}};
|
||||
double poly_vgl_l2[5][10]__attribute__((aligned(64))) =
|
||||
{{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
|
||||
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
|
||||
double poly_vgl[5][size_max] __attribute__((aligned(64)));
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) {
|
||||
const double e_coord[3] __attribute__((aligned(64))) =
|
||||
{ coord[ipoint],
|
||||
coord[ipoint + point_num],
|
||||
coord[ipoint + 2*point_num] };
|
||||
|
||||
{ coord[ipoint],
|
||||
coord[ipoint + point_num],
|
||||
coord[ipoint + 2*point_num] };
|
||||
|
||||
for (int64_t inucl=0 ; inucl < nucl_num ; ++inucl) {
|
||||
const double n_coord[3] __attribute__((aligned(64))) =
|
||||
{ nucl_coord[inucl],
|
||||
nucl_coord[inucl + nucl_num],
|
||||
nucl_coord[inucl + 2*nucl_num] };
|
||||
{ nucl_coord[inucl],
|
||||
nucl_coord[inucl + nucl_num],
|
||||
nucl_coord[inucl + 2*nucl_num] };
|
||||
|
||||
/* Test if the point is in the range of the nucleus */
|
||||
const double x = e_coord[0] - n_coord[0];
|
||||
@ -6616,6 +6600,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||
exp_mat[iprim][0] = exp(-ar2[iprim]);
|
||||
}
|
||||
|
||||
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||
double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0];
|
||||
f = -f-f;
|
||||
@ -6628,21 +6613,6 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
|
||||
/* --- */
|
||||
switch (8) {
|
||||
case(5):
|
||||
|
||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||
for (int j=0 ; j<5 ; ++j) {
|
||||
ce_mat[i][j] = 0.;
|
||||
}
|
||||
for (int k=0 ; k<nidx; ++k) {
|
||||
const double cm = coef_mat[inucl][i][k];
|
||||
for (int j=0 ; j<5 ; ++j) {
|
||||
ce_mat[i][j] += cm * exp_mat[k][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case(8):
|
||||
|
||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||
@ -6664,6 +6634,21 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
}
|
||||
break;
|
||||
|
||||
case(5):
|
||||
|
||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||
for (int j=0 ; j<5 ; ++j) {
|
||||
ce_mat[i][j] = 0.;
|
||||
}
|
||||
for (int k=0 ; k<nidx; ++k) {
|
||||
const double cm = coef_mat[inucl][i][k];
|
||||
for (int j=0 ; j<5 ; ++j) {
|
||||
ce_mat[i][j] += cm * exp_mat[k][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case(512):
|
||||
for(int i=0; i<nucleus_shell_num[inucl]; ++i){
|
||||
__m512d cemat_avx512;
|
||||
@ -6778,14 +6763,14 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
poly_vgl_5 = &(poly_vgl[4][idx]);
|
||||
}
|
||||
switch (n) {
|
||||
case(1):
|
||||
case 1:
|
||||
ao_vgl_1[0] = s1 * f[0];
|
||||
ao_vgl_2[0] = s2 * f[0];
|
||||
ao_vgl_3[0] = s3 * f[0];
|
||||
ao_vgl_4[0] = s4 * f[0];
|
||||
ao_vgl_5[0] = s5;
|
||||
break;
|
||||
case (3):
|
||||
case 3:
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
@ -6800,7 +6785,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
||||
poly_vgl_4[il] * s4 )) * f[il];
|
||||
}
|
||||
break;
|
||||
case(6):
|
||||
case 6:
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
|
@ -628,15 +628,8 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
|
||||
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes :exports none
|
||||
rc = qmckl_provide_ao_basis_ao_value(context);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return qmckl_failwith( context,
|
||||
QMCKL_NOT_PROVIDED,
|
||||
"qmckl_ao_value",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (ctx->mo_basis.mo_vgl_date == ctx->point.date) {
|
||||
|
||||
// mo_vgl has been computed at this step: Just copy the data.
|
||||
@ -653,6 +646,14 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
|
||||
|
||||
} else {
|
||||
|
||||
rc = qmckl_provide_ao_basis_ao_value(context);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return qmckl_failwith( context,
|
||||
QMCKL_NOT_PROVIDED,
|
||||
"qmckl_ao_value",
|
||||
NULL);
|
||||
}
|
||||
|
||||
rc = qmckl_compute_mo_basis_mo_value(context,
|
||||
ctx->ao_basis.ao_num,
|
||||
ctx->mo_basis.mo_num,
|
||||
@ -664,6 +665,8 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
#+CALL: write_provider_post( group="mo_basis", data="mo_value" )
|
||||
|
||||
#+RESULTS:
|
||||
@ -727,8 +730,8 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
|
||||
do j=1,point_num
|
||||
mo_value(:,j) = 0.d0
|
||||
do k=1,ao_num
|
||||
if (ao_value(k,j) /= 0.d0) then
|
||||
c1 = ao_value(k,j)
|
||||
c1 = ao_value(k,j)
|
||||
if (c1 /= 0.d0) then
|
||||
do i=1,mo_num
|
||||
mo_value(i,j) = mo_value(i,j) + coefficient_t(i,k) * c1
|
||||
end do
|
||||
@ -736,7 +739,7 @@ integer function qmckl_compute_mo_basis_mo_value_doc_f(context, &
|
||||
end do
|
||||
end do
|
||||
|
||||
else ! dgemm
|
||||
else ! dgemm for checking
|
||||
|
||||
LDA = size(coefficient_t,1)
|
||||
LDB = size(ao_value,1)
|
||||
@ -874,7 +877,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
|
||||
}
|
||||
}
|
||||
|
||||
int64_t n;
|
||||
int64_t n=0;
|
||||
|
||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
||||
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
|
||||
@ -895,8 +898,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
|
||||
}
|
||||
}
|
||||
|
||||
const int64_t n0 = n < 0 ? 0 : n;
|
||||
for (int64_t m=n0 ; m < nidx ; m+=1) {
|
||||
for (int64_t m=n ; m < nidx ; m+=1) {
|
||||
const double* restrict ck = coefficient_t + idx[m]*mo_num;
|
||||
const double a1 = av1[m];
|
||||
|
||||
@ -1355,7 +1357,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
}
|
||||
}
|
||||
|
||||
int64_t n;
|
||||
int64_t n=0;
|
||||
|
||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
||||
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
|
||||
@ -1400,8 +1402,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
}
|
||||
}
|
||||
|
||||
const int64_t n0 = n < 0 ? 0 : n;
|
||||
for (int64_t m=n0 ; m < nidx ; m+=1) {
|
||||
for (int64_t m=n ; m < nidx ; m+=1) {
|
||||
const double* restrict ck = coefficient_t + idx[m]*mo_num;
|
||||
const double a1 = av1[m];
|
||||
const double a2 = av2[m];
|
||||
|
Loading…
Reference in New Issue
Block a user