1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-23 04:44:03 +01:00

Accelerated AOs in HPC

This commit is contained in:
Anthony Scemama 2022-02-17 12:36:16 +01:00
parent 7fe73e0104
commit 41c0effa10

View File

@ -4566,10 +4566,12 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context,
if (ldv == 4) { if (ldv == 4) {
const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], 0.0, const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2],
1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,
1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
1.0, 0.0, 0.0, 0.0, 0.0}; 0.0, 0.0, 0.0, 1.0,
0.0, 0.0, 0.0, 0.0};
for (int i=0 ; i<20 ; ++i) for (int i=0 ; i<20 ; ++i)
VGL[i] = tmp[i]; VGL[i] = tmp[i];
@ -5248,6 +5250,16 @@ qmckl_compute_ao_vgl_hpc (
double expo_[prim_num]; double expo_[prim_num];
double ar2[prim_num]; double ar2[prim_num];
int32_t powers[prim_num]; int32_t powers[prim_num];
double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0},
{0.0, 1.0, 0.0, 0.0},
{0.0, 0.0, 1.0, 0.0},
{0.0, 0.0, 0.0, 1.0},
{0.0, 0.0, 0.0, 0.0}};
double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 1., 0., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 1., 0., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 1., 0., 0., 0., 0., 0., 0.},
{0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}};
double poly_vgl[5][size_max]; double poly_vgl[5][size_max];
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
@ -5275,12 +5287,47 @@ qmckl_compute_ao_vgl_hpc (
} }
int64_t n_poly; int64_t n_poly;
switch (nucleus_max_ang_mom[inucl]) {
case 0:
break;
case 1:
poly_vgl_l1[0][1] = x;
poly_vgl_l1[0][2] = y;
poly_vgl_l1[0][3] = z;
break;
case 2:
poly_vgl_l2[0][1] = x;
poly_vgl_l2[0][2] = y;
poly_vgl_l2[0][3] = z;
poly_vgl_l2[0][4] = x*x;
poly_vgl_l2[0][5] = x*y;
poly_vgl_l2[0][6] = x*z;
poly_vgl_l2[0][7] = y*y;
poly_vgl_l2[0][8] = y*z;
poly_vgl_l2[0][9] = z*z;
poly_vgl_l2[1][4] = x+x;
poly_vgl_l2[1][5] = y;
poly_vgl_l2[1][6] = z;
poly_vgl_l2[2][5] = x;
poly_vgl_l2[2][7] = y+y;
poly_vgl_l2[2][8] = z;
poly_vgl_l2[3][6] = x;
poly_vgl_l2[3][8] = y;
poly_vgl_l2[3][9] = z+z;
break;
default:
const qmckl_exit_code rc = const qmckl_exit_code rc =
qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord,
nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3, nucleus_max_ang_mom[inucl],
&n_poly, powers, (int64_t) 3,
&(poly_vgl[0][0]), size_max); &(poly_vgl[0][0]), size_max);
assert (rc == QMCKL_SUCCESS); assert (rc == QMCKL_SUCCESS);
break;
}
const int64_t ishell_start = nucleus_index[inucl]; const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
@ -5305,10 +5352,10 @@ qmckl_compute_ao_vgl_hpc (
double s6_ = 0.; double s6_ = 0.;
for (int idx=0 ; idx<nidx ; ++idx) { for (int idx=0 ; idx<nidx ; ++idx) {
const double v = c_[idx] * exp(-ar2[idx]); const double v = c_[idx] * exp(-ar2[idx]);
const double x = v*expo_[idx]; const double t = v*expo_[idx];
s1_ += v; s1_ += v;
s6_ -= x; s6_ -= t;
s5_ += x*(4.0*ar2[idx]-6.0); s5_ += t*(4.0*ar2[idx]-6.0);
} }
s6_ += s6_; s6_ += s6_;
const double s1 = s1_; const double s1 = s1_;
@ -5327,26 +5374,77 @@ qmckl_compute_ao_vgl_hpc (
double* const ao_vgl_4 = &(ao_vgl_1[3*ao_num]); double* const ao_vgl_4 = &(ao_vgl_1[3*ao_num]);
double* const ao_vgl_5 = &(ao_vgl_1[4*ao_num]); double* const ao_vgl_5 = &(ao_vgl_1[4*ao_num]);
double* poly_vgl_1;
double* poly_vgl_2;
double* poly_vgl_3;
double* poly_vgl_4;
double* poly_vgl_5;
if (nidx > 0) { if (nidx > 0) {
const int64_t idx = lstart[l];
const double* poly_vgl_1 = &(poly_vgl[0][idx]);
const double* poly_vgl_2 = &(poly_vgl[1][idx]);
const double* poly_vgl_3 = &(poly_vgl[2][idx]);
const double* poly_vgl_4 = &(poly_vgl[3][idx]);
const double* poly_vgl_5 = &(poly_vgl[4][idx]);
const double* f = &(ao_factor[k]); const double* f = &(ao_factor[k]);
const int64_t idx = lstart[l];
switch (nucleus_max_ang_mom[inucl]) {
case 0:
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = s1 * f[il];
ao_vgl_2[il] = s2 * f[il];
ao_vgl_3[il] = s3 * f[il];
ao_vgl_4[il] = s4 * f[il];
ao_vgl_5[il] = s5;
}
break;
case 1:
poly_vgl_1 = &(poly_vgl_l1[0][idx]);
poly_vgl_2 = &(poly_vgl_l1[1][idx]);
poly_vgl_3 = &(poly_vgl_l1[2][idx]);
poly_vgl_4 = &(poly_vgl_l1[3][idx]);
for (int64_t il=0 ; il<n ; ++il) { for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il]; ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il]; ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + ao_vgl_5[il] = (poly_vgl_1[il] * s5 +
poly_vgl_1[il] * s5 + 2.0*( 2.0*(poly_vgl_2[il] * s2 +
poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 + poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il]; poly_vgl_4[il] * s4 )) * f[il];
} }
break;
case 2:
poly_vgl_1 = &(poly_vgl_l2[0][idx]);
poly_vgl_2 = &(poly_vgl_l2[1][idx]);
poly_vgl_3 = &(poly_vgl_l2[2][idx]);
poly_vgl_4 = &(poly_vgl_l2[3][idx]);
poly_vgl_5 = &(poly_vgl_l2[4][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
default:
poly_vgl_1 = &(poly_vgl[0][idx]);
poly_vgl_2 = &(poly_vgl[1][idx]);
poly_vgl_3 = &(poly_vgl[2][idx]);
poly_vgl_4 = &(poly_vgl[3][idx]);
poly_vgl_5 = &(poly_vgl[4][idx]);
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
ao_vgl_3[il] = (poly_vgl_3[il] * s1 + poly_vgl_1[il] * s3) * f[il];
ao_vgl_4[il] = (poly_vgl_4[il] * s1 + poly_vgl_1[il] * s4) * f[il];
ao_vgl_5[il] = (poly_vgl_5[il] * s1 + poly_vgl_1[il] * s5 +
2.0*(poly_vgl_2[il] * s2 +
poly_vgl_3[il] * s3 +
poly_vgl_4[il] * s4 )) * f[il];
}
break;
}
} else { } else {
for (int64_t il=0 ; il<n ; ++il) { for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = 0.0; ao_vgl_1[il] = 0.0;