From 6bf9388a4edf3419f2ebccdfafaacecf3895d51a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Nov 2023 17:00:39 +0100 Subject: [PATCH] Added control of precision in AOs --- configure.ac | 2 +- org/qmckl_ao.org | 256 ++++++++++++++++++++++++++++-------------- org/qmckl_mo.org | 25 +++-- org/qmckl_numprec.org | 53 ++++++++- 4 files changed, 239 insertions(+), 97 deletions(-) diff --git a/configure.ac b/configure.ac index 60e2661..49e9fc3 100644 --- a/configure.ac +++ b/configure.ac @@ -66,7 +66,7 @@ AC_ARG_WITH([icx], AS_IF([test "x$with_icx" = "xyes"], [ CC=icx - CFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ]) + CFLAGS="-march=native -Ofast -ftz -finline -g -qmkl=sequential" ]) AS_IF([test "x$with_icx.$with_ifort" = "xyes.yes"], [ ax_blas_ok="yes" diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 10fefa7..5796127 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2684,7 +2684,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } /* Find distance beyond which all AOs are zero. - The distance is obtained by sqrt(log(cutoff)*range) */ + The distance is obtained by sqrt(-log(epsilon)*range) */ { if (ctx->ao_basis.type == 'G') { qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; @@ -5349,23 +5349,23 @@ for (int32_t ldl=3 ; ldl<=5 ; ++ldl) { :END: *** Unoptimized version #+NAME: qmckl_ao_value_args_doc - | Variable | Type | In/Out | Description | - |-----------------------+-----------------------------------+--------+----------------------------------------------| - | ~context~ | ~qmckl_context~ | in | Global state | - | ~ao_num~ | ~int64_t~ | in | Number of AOs | - | ~shell_num~ | ~int64_t~ | in | Number of shells | - | ~point_num~ | ~int64_t~ | in | Number of points | - | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | - | ~coord~ | ~double[3][point_num]~ | in | Coordinates | - | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | - | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | - | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | - | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | - | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | - | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | - | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | - | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells | - | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | + | Variable | Type | In/Out | Description | + |-----------------------+-----------------------------------+--------+----------------------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | #+begin_src f90 :comments org :tangle (eval f) :noweb yes function qmckl_compute_ao_value_doc(context, & @@ -5376,7 +5376,7 @@ function qmckl_compute_ao_value_doc(context, & bind(C) result(info) use, intrinsic :: iso_c_binding use qmckl_constants - use qmckl, only: qmckl_ao_polynomial_vgl + use qmckl, only: qmckl_ao_polynomial_vgl, qmckl_get_numprec_epsilon implicit none integer (qmckl_context), intent(in) , value :: context integer (c_int64_t) , intent(in) , value :: ao_num @@ -5428,7 +5428,7 @@ function qmckl_compute_ao_value_doc(context, & info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. - cutoff = 27.631021115928547d0 !-dlog(1.d-12) + cutoff = -dlog(qmckl_get_numprec_epsilon(context)) do ipoint = 1, point_num e_coord(1) = coord(ipoint,1) @@ -5478,27 +5478,27 @@ end function qmckl_compute_ao_value_doc *** HPC version #+NAME: qmckl_ao_value_args_hpc_gaussian - | Variable | Type | In/Out | Description | - |-----------------------+-----------------------------+--------+----------------------------------------------| - | ~context~ | ~qmckl_context~ | in | Global state | - | ~ao_num~ | ~int64_t~ | in | Number of AOs | - | ~shell_num~ | ~int64_t~ | in | Number of shells | - | ~prim_num~ | ~int64_t~ | in | Number of primitives | - | ~point_num~ | ~int64_t~ | in | Number of points | - | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | - | ~coord~ | ~double[3][point_num]~ | in | Coordinates | - | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | - | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | - | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | - | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | - | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | - | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | - | ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell | - | ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell | - | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | - | ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | - | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | - | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | + | Variable | Type | In/Out | Description | + |-----------------------+-----------------------------+--------+----------------------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~prim_num~ | ~int64_t~ | in | Number of primitives | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell | + | ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | #+NAME:ao_value_constants #+begin_src c :exports none @@ -5530,8 +5530,13 @@ end function qmckl_compute_ao_value_doc ao_index[shell_num] = ao_num+1; } + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + /* Don't compute polynomials when the radial part is zero. */ - const double cutoff = 27.631021115928547; // -log(1.e-12) + const int precision = ctx->numprec.precision; + const double cutoff = log( (double) ( ((uint64_t) 1) << (precision-2) ) ); + const bool double_precision = precision > 23; #+end_src @@ -5566,10 +5571,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, { qmckl_exit_code rc; double ar2[prim_max] __attribute__((aligned(64))); + float ar2_sp[prim_max] __attribute__((aligned(64))); int32_t powers[3*size_max] __attribute__((aligned(64))); double poly_vgl[5*size_max] __attribute__((aligned(64))); double exp_mat[prim_max] __attribute__((aligned(64))); + float exp_mat_sp[prim_max] __attribute__((aligned(64))); double ce_mat[shell_max] __attribute__((aligned(64))); int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) = @@ -5613,7 +5620,10 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, const double y = e_coord[1] - n_coord[1]; const double z = e_coord[2] - n_coord[2]; - const double r2 = x*x + y*y + z*z; + const double x2 = x*x; + const double y2 = y*y; + const double z2 = z*z; + const double r2 = x2 + y2 + z2; if (r2 > cutoff * nucleus_range[inucl]) { continue; @@ -5636,19 +5646,19 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, poly_vgl[1] = x; poly_vgl[2] = y; poly_vgl[3] = z; - poly_vgl[4] = x*x; + poly_vgl[4] = x2; poly_vgl[5] = x*y; poly_vgl[6] = x*z; - poly_vgl[7] = y*y; + poly_vgl[7] = y2; poly_vgl[8] = y*z; - poly_vgl[9] = z*z; + poly_vgl[9] = z2; break; default: rc = qmckl_ao_polynomial_transp_vgl_hpc_inline(context, e_coord, n_coord, - nucleus_max_ang_mom[inucl], - &n_poly, powers, (int64_t) 3, - poly_vgl, size_max); + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + poly_vgl, size_max); assert (rc == QMCKL_SUCCESS); break; } @@ -5656,18 +5666,45 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, /* Compute all exponents */ int64_t nidx = 0; - for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { - const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; - if (v <= cutoff) { - ar2[iprim] = v; - ++nidx; - } else { - break; - } - } + if (double_precision) { + + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2[iprim] = -v; + ++nidx; + } else { + break; + } + } + +IVDEP + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim] = exp(ar2[iprim]); + } + + } else { + + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2_sp[iprim] = (float) -v; + ++nidx; + } else { + break; + } + } + +IVDEP + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat_sp[iprim] = expf(ar2_sp[iprim]); + } + +IVDEP + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim] = exp_mat_sp[iprim]; + } - for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { - exp_mat[iprim] = exp(-ar2[iprim]); } for (int i=0 ; i cutoff * nucleus_range[inucl]) { continue; @@ -6386,12 +6443,12 @@ qmckl_compute_ao_vgl_hpc_gaussian ( poly_vgl_l2[0][1] = x; poly_vgl_l2[0][2] = y; poly_vgl_l2[0][3] = z; - poly_vgl_l2[0][4] = x*x; + poly_vgl_l2[0][4] = x2; poly_vgl_l2[0][5] = x*y; poly_vgl_l2[0][6] = x*z; - poly_vgl_l2[0][7] = y*y; + poly_vgl_l2[0][7] = y2; poly_vgl_l2[0][8] = y*z; - poly_vgl_l2[0][9] = z*z; + poly_vgl_l2[0][9] = z2; poly_vgl_l2[1][4] = x+x; poly_vgl_l2[1][5] = y; poly_vgl_l2[1][6] = z; @@ -6405,9 +6462,9 @@ qmckl_compute_ao_vgl_hpc_gaussian ( default: rc = qmckl_ao_polynomial_transp_vgl_hpc_inline(context, e_coord, n_coord, - nucleus_max_ang_mom[inucl], - &n_poly, powers, (int64_t) 3, - &(poly_vgl[0][0]), size_max); + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + &(poly_vgl[0][0]), size_max); assert (rc == QMCKL_SUCCESS); break; } @@ -6415,18 +6472,43 @@ qmckl_compute_ao_vgl_hpc_gaussian ( /* Compute all exponents */ int64_t nidx = 0; - for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { - const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; - if (v <= cutoff) { - ar2[iprim] = v; - ++nidx; - } else { - break; - } - } - for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { - exp_mat[iprim][0] = exp(-ar2[iprim]); + if (double_precision) { + + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2[iprim] = v; + ++nidx; + } else { + break; + } + } + + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim][0] = exp(-ar2[iprim]); + } + + } else { + + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2_sp[iprim] = (float) v; + ++nidx; + } else { + break; + } + } + + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat_sp[iprim] = expf(-ar2_sp[iprim]); + } + + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim][0] = exp_mat_sp[iprim]; + } + } for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { @@ -6443,6 +6525,7 @@ qmckl_compute_ao_vgl_hpc_gaussian ( // if (do_sparse) { for (int i=0 ; i= nidx) break; +IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif @@ -6467,12 +6551,13 @@ qmckl_compute_ao_vgl_hpc_gaussian ( const int64_t ishell_start = nucleus_index[inucl]; const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; + const int64_t ipoint_5_ao_num = ipoint*5*ao_num; for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { const double s1 = ce_mat[ishell-ishell_start][0]; const int64_t k = ao_index[ishell]; - double* restrict const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k; + double* restrict const ao_vgl_1 = ao_vgl + ipoint_5_ao_num + k; const int32_t l = shell_ang_mom[ishell]; const int32_t n = lstart[l+1]-lstart[l]; @@ -6483,6 +6568,7 @@ qmckl_compute_ao_vgl_hpc_gaussian ( double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2); if (s1 == 0.0) { +/* for (int64_t il=0 ; il d) + x = (x < c) ? 0.0f : d; + + uint32_t n = (uint32_t) x; + memcpy(&x, &n, 4); + return x; +} + + +double fastExp(double x) +{ + const double a = 6497320848556798.0; + const double b = 4606985713057410560.0; + x = a * x + b; + + const double c = 4503599627370496.0; + const double d = 9218868437227405312.0; + if (x < c || x > d) + x = (x < c) ? 0.0 : d; + + uint64_t n = (uint64_t) x; + memcpy(&x, &n, 8); + return x; +} + #+end_src + * End of files :noexport: #+begin_src c :comments link :tangle (eval h_private_type)