1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-11-03 12:43:57 +01:00

Added control of precision in AOs

This commit is contained in:
Anthony Scemama 2023-11-28 17:00:39 +01:00
parent 063aada9e4
commit 6bf9388a4e
4 changed files with 239 additions and 97 deletions

View File

@ -66,7 +66,7 @@ AC_ARG_WITH([icx],
AS_IF([test "x$with_icx" = "xyes"], [
CC=icx
CFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ])
CFLAGS="-march=native -Ofast -ftz -finline -g -qmkl=sequential" ])
AS_IF([test "x$with_icx.$with_ifort" = "xyes.yes"], [
ax_blas_ok="yes"

View File

@ -2684,7 +2684,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
}
/* Find distance beyond which all AOs are zero.
The distance is obtained by sqrt(log(cutoff)*range) */
The distance is obtained by sqrt(-log(epsilon)*range) */
{
if (ctx->ao_basis.type == 'G') {
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
@ -5350,7 +5350,7 @@ for (int32_t ldl=3 ; ldl<=5 ; ++ldl) {
*** Unoptimized version
#+NAME: qmckl_ao_value_args_doc
| Variable | Type | In/Out | Description |
|-----------------------+-----------------------------------+--------+----------------------------------------------|
|-----------------------+-----------------------------------+--------+----------------------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
| ~shell_num~ | ~int64_t~ | in | Number of shells |
@ -5376,7 +5376,7 @@ function qmckl_compute_ao_value_doc(context, &
bind(C) result(info)
use, intrinsic :: iso_c_binding
use qmckl_constants
use qmckl, only: qmckl_ao_polynomial_vgl
use qmckl, only: qmckl_ao_polynomial_vgl, qmckl_get_numprec_epsilon
implicit none
integer (qmckl_context), intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: ao_num
@ -5428,7 +5428,7 @@ function qmckl_compute_ao_value_doc(context, &
info = QMCKL_SUCCESS
! Don't compute polynomials when the radial part is zero.
cutoff = 27.631021115928547d0 !-dlog(1.d-12)
cutoff = -dlog(qmckl_get_numprec_epsilon(context))
do ipoint = 1, point_num
e_coord(1) = coord(ipoint,1)
@ -5479,7 +5479,7 @@ end function qmckl_compute_ao_value_doc
*** HPC version
#+NAME: qmckl_ao_value_args_hpc_gaussian
| Variable | Type | In/Out | Description |
|-----------------------+-----------------------------+--------+----------------------------------------------|
|-----------------------+-----------------------------+--------+----------------------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
| ~shell_num~ | ~int64_t~ | in | Number of shells |
@ -5530,8 +5530,13 @@ end function qmckl_compute_ao_value_doc
ao_index[shell_num] = ao_num+1;
}
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
assert (ctx != NULL);
/* Don't compute polynomials when the radial part is zero. */
const double cutoff = 27.631021115928547; // -log(1.e-12)
const int precision = ctx->numprec.precision;
const double cutoff = log( (double) ( ((uint64_t) 1) << (precision-2) ) );
const bool double_precision = precision > 23;
#+end_src
@ -5566,10 +5571,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
{
qmckl_exit_code rc;
double ar2[prim_max] __attribute__((aligned(64)));
float ar2_sp[prim_max] __attribute__((aligned(64)));
int32_t powers[3*size_max] __attribute__((aligned(64)));
double poly_vgl[5*size_max] __attribute__((aligned(64)));
double exp_mat[prim_max] __attribute__((aligned(64)));
float exp_mat_sp[prim_max] __attribute__((aligned(64)));
double ce_mat[shell_max] __attribute__((aligned(64)));
int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
@ -5613,7 +5620,10 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const double y = e_coord[1] - n_coord[1];
const double z = e_coord[2] - n_coord[2];
const double r2 = x*x + y*y + z*z;
const double x2 = x*x;
const double y2 = y*y;
const double z2 = z*z;
const double r2 = x2 + y2 + z2;
if (r2 > cutoff * nucleus_range[inucl]) {
continue;
@ -5636,12 +5646,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
poly_vgl[1] = x;
poly_vgl[2] = y;
poly_vgl[3] = z;
poly_vgl[4] = x*x;
poly_vgl[4] = x2;
poly_vgl[5] = x*y;
poly_vgl[6] = x*z;
poly_vgl[7] = y*y;
poly_vgl[7] = y2;
poly_vgl[8] = y*z;
poly_vgl[9] = z*z;
poly_vgl[9] = z2;
break;
default:
@ -5656,18 +5666,45 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
/* Compute all exponents */
int64_t nidx = 0;
if (double_precision) {
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
if (v <= cutoff) {
ar2[iprim] = v;
ar2[iprim] = -v;
++nidx;
} else {
break;
}
}
IVDEP
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim] = exp(-ar2[iprim]);
exp_mat[iprim] = exp(ar2[iprim]);
}
} else {
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
if (v <= cutoff) {
ar2_sp[iprim] = (float) -v;
++nidx;
} else {
break;
}
}
IVDEP
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat_sp[iprim] = expf(ar2_sp[iprim]);
}
IVDEP
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim] = exp_mat_sp[iprim];
}
}
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
@ -5684,20 +5721,27 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
const int64_t ipoint_ao_num = ipoint*ao_num;
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const double s1 = ce_mat[ishell-ishell_start];
const int64_t k = ao_index[ishell];
double* restrict const ao_value_1 = ao_value + ipoint*ao_num + k;
double* restrict const ao_value_1 = ao_value + ipoint_ao_num + k;
const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l];
if (s1 == 0.0) {
/*
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t il=0 ; il<n ; ++il) {
ao_value_1[il] = 0.;
}
*/
continue;
}
@ -5713,6 +5757,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
ao_value_1[0] = s1 * f[0];
break;
case 3:
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -5721,6 +5766,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
}
break;
case(6):
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -5729,6 +5775,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
}
break;
default:
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -5738,9 +5785,14 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
break;
}
} else {
/*
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t il=0 ; il<n ; ++il) {
ao_value_1[il] = 0.0;
}
*/
}
}
}
@ -6121,7 +6173,7 @@ function qmckl_compute_ao_vgl_doc(context, &
bind(C) result(info)
use, intrinsic :: iso_c_binding
use qmckl_constants
use qmckl, only : qmckl_ao_polynomial_vgl
use qmckl, only : qmckl_ao_polynomial_vgl, qmckl_get_numprec_epsilon
implicit none
integer (qmckl_context), intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: ao_num
@ -6172,7 +6224,7 @@ function qmckl_compute_ao_vgl_doc(context, &
info = QMCKL_SUCCESS
! Don't compute polynomials when the radial part is zero.
cutoff = 27.631021115928547d0 ! -dlog(1.d-12)
cutoff = -dlog(qmckl_get_numprec_epsilon(context))
do ipoint = 1, point_num
e_coord(1) = coord(ipoint,1)
@ -6306,9 +6358,11 @@ qmckl_compute_ao_vgl_hpc_gaussian (
{
qmckl_exit_code rc;
double ar2[prim_max] __attribute__((aligned(64)));
float ar2_sp[prim_max] __attribute__((aligned(64)));
int32_t powers[3*size_max] __attribute__((aligned(64)));
double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
float exp_mat_sp[prim_max] __attribute__((aligned(64)));
double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
@ -6365,7 +6419,10 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const double y = e_coord[1] - n_coord[1];
const double z = e_coord[2] - n_coord[2];
const double r2 = x*x + y*y + z*z;
const double x2 = x*x;
const double y2 = y*y;
const double z2 = z*z;
const double r2 = x2 + y2 + z2;
if (r2 > cutoff * nucleus_range[inucl]) {
continue;
@ -6386,12 +6443,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
poly_vgl_l2[0][1] = x;
poly_vgl_l2[0][2] = y;
poly_vgl_l2[0][3] = z;
poly_vgl_l2[0][4] = x*x;
poly_vgl_l2[0][4] = x2;
poly_vgl_l2[0][5] = x*y;
poly_vgl_l2[0][6] = x*z;
poly_vgl_l2[0][7] = y*y;
poly_vgl_l2[0][7] = y2;
poly_vgl_l2[0][8] = y*z;
poly_vgl_l2[0][9] = z*z;
poly_vgl_l2[0][9] = z2;
poly_vgl_l2[1][4] = x+x;
poly_vgl_l2[1][5] = y;
poly_vgl_l2[1][6] = z;
@ -6415,6 +6472,9 @@ qmckl_compute_ao_vgl_hpc_gaussian (
/* Compute all exponents */
int64_t nidx = 0;
if (double_precision) {
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
if (v <= cutoff) {
@ -6429,6 +6489,28 @@ qmckl_compute_ao_vgl_hpc_gaussian (
exp_mat[iprim][0] = exp(-ar2[iprim]);
}
} else {
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
if (v <= cutoff) {
ar2_sp[iprim] = (float) v;
++nidx;
} else {
break;
}
}
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat_sp[iprim] = expf(-ar2_sp[iprim]);
}
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
exp_mat[iprim][0] = exp_mat_sp[iprim];
}
}
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
double f = qmckl_mat(expo_per_nucleus, iprim, inucl) * exp_mat[iprim][0];
f = -f-f;
@ -6443,6 +6525,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
// if (do_sparse) {
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -6454,6 +6537,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
for (int l=1 ; l<idx[0]; ++l) {
const int k = idx[l];
if (k >= nidx) break;
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -6467,12 +6551,13 @@ qmckl_compute_ao_vgl_hpc_gaussian (
const int64_t ishell_start = nucleus_index[inucl];
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
const int64_t ipoint_5_ao_num = ipoint*5*ao_num;
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
const double s1 = ce_mat[ishell-ishell_start][0];
const int64_t k = ao_index[ishell];
double* restrict const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
double* restrict const ao_vgl_1 = ao_vgl + ipoint_5_ao_num + k;
const int32_t l = shell_ang_mom[ishell];
const int32_t n = lstart[l+1]-lstart[l];
@ -6483,6 +6568,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
if (s1 == 0.0) {
/*
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = 0.0;
ao_vgl_2[il] = 0.0;
@ -6490,6 +6576,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
ao_vgl_4[il] = 0.0;
ao_vgl_5[il] = 0.0;
}
*/
continue;
}
@ -6539,6 +6626,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
ao_vgl_5[0] = s5;
break;
case 3:
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -6554,6 +6642,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
break;
case 6:
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -6569,6 +6658,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
}
break;
default:
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -6585,6 +6675,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
break;
}
} else {
/*
for (int64_t il=0 ; il<n ; ++il) {
ao_vgl_1[il] = 0.0;
ao_vgl_2[il] = 0.0;
@ -6592,6 +6683,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
ao_vgl_4[il] = 0.0;
ao_vgl_5[il] = 0.0;
}
*/
}
}
}

View File

@ -1782,6 +1782,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num;
const double* restrict avgl5 = avgl1 + (ao_num << 2);
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = 0.;
vgl2[i] = 0.;
@ -1842,15 +1843,16 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
const double a35 = av5[n+2];
const double a45 = av5[n+3];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl2[i] += ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
vgl3[i] += ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
vgl4[i] += ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
vgl5[i] += ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
}
}
@ -1862,6 +1864,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
const double a4 = av4[m];
const double a5 = av5[m];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -2166,6 +2169,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
const double a31 = av1[n+2];
const double a41 = av1[n+3];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -2178,6 +2182,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -2189,9 +2194,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
for (int64_t inucl=0 ; inucl<nucl_num ; ++inucl) {
if (ria[inucl] < r_cusp[inucl]) {
const double r = ria[inucl];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
qmckl_ten3(cusp_param,i,1,inucl) + r*(
@ -2591,6 +2594,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
const double a35 = av5[n+2];
const double a45 = av5[n+3];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -2611,6 +2615,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
const double a4 = av4[m];
const double a5 = av5[m];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
@ -2633,9 +2638,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
qmckl_mat(point_coord,ipoint,2) - qmckl_mat(nucl_coord,inucl,2) };
const double r_inv = 1./r;
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
qmckl_ten3(cusp_param,i,1,inucl) + r*(

View File

@ -86,8 +86,8 @@ trapfpe ()
default parameters determining the target numerical precision and
range are defined. Following the IEEE Standard for Floating-Point
Arithmetic (IEEE 754),
/precision/ refers to the number of significand bits and /range/
refers to the number of exponent bits.
/precision/ refers to the number of significand bits (including the
sign bit) and /range/ refers to the number of exponent bits.
#+NAME: table-precision
| ~QMCKL_DEFAULT_PRECISION~ | 53 |
@ -340,7 +340,7 @@ double qmckl_get_numprec_epsilon(const qmckl_context context);
#+begin_src c :tangle (eval c)
double qmckl_get_numprec_epsilon(const qmckl_context context) {
const int precision = qmckl_get_numprec_precision(context);
return 1. / (double) (1L << (precision-2));
return 1. / (double) ( ((uint64_t) 1) << (precision-2));
}
#+end_src
@ -355,6 +355,53 @@ double qmckl_get_numprec_epsilon(const qmckl_context context) {
end interface
#+end_src
* Approximate functions
** Exponential
Fast exponential function, adapted from Johan Rade's implementation
(https://gist.github.com/jrade/293a73f89dfef51da6522428c857802d). It
is based on Schraudolph's paper:
N. Schraudolph, "A Fast, Compact Approximation of the Exponential Function",
/Neural Computation/ *11*, 853862 (1999).
(available at https://nic.schraudolph.org/pubs/Schraudolph99.pdf)
#+begin_src c :tangle (eval c)
float fastExpf(float x)
{
const float a = 12102203.0;
const float b = 1064986816.0;
x = a * x + b;
const float c = 8388608.0;
const float d = 2139095040.0;
if (x < c || x > d)
x = (x < c) ? 0.0f : d;
uint32_t n = (uint32_t) x;
memcpy(&x, &n, 4);
return x;
}
double fastExp(double x)
{
const double a = 6497320848556798.0;
const double b = 4606985713057410560.0;
x = a * x + b;
const double c = 4503599627370496.0;
const double d = 9218868437227405312.0;
if (x < c || x > d)
x = (x < c) ? 0.0 : d;
uint64_t n = (uint64_t) x;
memcpy(&x, &n, 8);
return x;
}
#+end_src
* End of files :noexport:
#+begin_src c :comments link :tangle (eval h_private_type)