From 41c0effa103cfdd57b103ccdaffa754486daedc5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 17 Feb 2022 12:36:16 +0100 Subject: [PATCH] Accelerated AOs in HPC --- org/qmckl_ao.org | 146 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 24 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index b546ef0..4007e5b 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4566,10 +4566,12 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, if (ldv == 4) { - const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], 0.0, - 1.0, 0.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 0.0}; + const double tmp[20] = {1.0, X[0]-R[0], X[1]-R[1], X[2]-R[2], + 0.0, 1.0, 0.0, 0.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + 0.0, 0.0, 0.0, 0.0}; + for (int i=0 ; i<20 ; ++i) VGL[i] = tmp[i]; @@ -5248,6 +5250,16 @@ qmckl_compute_ao_vgl_hpc ( double expo_[prim_num]; double ar2[prim_num]; int32_t powers[prim_num]; + double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, + {0.0, 1.0, 0.0, 0.0}, + {0.0, 0.0, 1.0, 0.0}, + {0.0, 0.0, 0.0, 1.0}, + {0.0, 0.0, 0.0, 0.0}}; + double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, + {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, + {0., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, + {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}}; double poly_vgl[5][size_max]; #ifdef HAVE_OPENMP @@ -5275,12 +5287,47 @@ qmckl_compute_ao_vgl_hpc ( } int64_t n_poly; - const qmckl_exit_code rc = - qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, - nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3, - &(poly_vgl[0][0]), size_max); + switch (nucleus_max_ang_mom[inucl]) { + case 0: + break; - assert (rc == QMCKL_SUCCESS); + case 1: + poly_vgl_l1[0][1] = x; + poly_vgl_l1[0][2] = y; + poly_vgl_l1[0][3] = z; + break; + + case 2: + poly_vgl_l2[0][1] = x; + poly_vgl_l2[0][2] = y; + poly_vgl_l2[0][3] = z; + poly_vgl_l2[0][4] = x*x; + poly_vgl_l2[0][5] = x*y; + poly_vgl_l2[0][6] = x*z; + poly_vgl_l2[0][7] = y*y; + poly_vgl_l2[0][8] = y*z; + poly_vgl_l2[0][9] = z*z; + poly_vgl_l2[1][4] = x+x; + poly_vgl_l2[1][5] = y; + poly_vgl_l2[1][6] = z; + poly_vgl_l2[2][5] = x; + poly_vgl_l2[2][7] = y+y; + poly_vgl_l2[2][8] = z; + poly_vgl_l2[3][6] = x; + poly_vgl_l2[3][8] = y; + poly_vgl_l2[3][9] = z+z; + break; + + default: + const qmckl_exit_code rc = + qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + &(poly_vgl[0][0]), size_max); + + assert (rc == QMCKL_SUCCESS); + break; + } const int64_t ishell_start = nucleus_index[inucl]; const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; @@ -5305,10 +5352,10 @@ qmckl_compute_ao_vgl_hpc ( double s6_ = 0.; for (int idx=0 ; idx 0) { - const int64_t idx = lstart[l]; - const double* poly_vgl_1 = &(poly_vgl[0][idx]); - const double* poly_vgl_2 = &(poly_vgl[1][idx]); - const double* poly_vgl_3 = &(poly_vgl[2][idx]); - const double* poly_vgl_4 = &(poly_vgl[3][idx]); - const double* poly_vgl_5 = &(poly_vgl[4][idx]); const double* f = &(ao_factor[k]); + const int64_t idx = lstart[l]; - for (int64_t il=0 ; il