mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-03 20:54:09 +01:00
Added control of precision in AOs
This commit is contained in:
parent
063aada9e4
commit
6bf9388a4e
@ -66,7 +66,7 @@ AC_ARG_WITH([icx],
|
|||||||
|
|
||||||
AS_IF([test "x$with_icx" = "xyes"], [
|
AS_IF([test "x$with_icx" = "xyes"], [
|
||||||
CC=icx
|
CC=icx
|
||||||
CFLAGS="-march=native -ip -Ofast -ftz -finline -g -mkl=sequential" ])
|
CFLAGS="-march=native -Ofast -ftz -finline -g -qmkl=sequential" ])
|
||||||
|
|
||||||
AS_IF([test "x$with_icx.$with_ifort" = "xyes.yes"], [
|
AS_IF([test "x$with_icx.$with_ifort" = "xyes.yes"], [
|
||||||
ax_blas_ok="yes"
|
ax_blas_ok="yes"
|
||||||
|
256
org/qmckl_ao.org
256
org/qmckl_ao.org
@ -2684,7 +2684,7 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Find distance beyond which all AOs are zero.
|
/* Find distance beyond which all AOs are zero.
|
||||||
The distance is obtained by sqrt(log(cutoff)*range) */
|
The distance is obtained by sqrt(-log(epsilon)*range) */
|
||||||
{
|
{
|
||||||
if (ctx->ao_basis.type == 'G') {
|
if (ctx->ao_basis.type == 'G') {
|
||||||
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
|
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
|
||||||
@ -5349,23 +5349,23 @@ for (int32_t ldl=3 ; ldl<=5 ; ++ldl) {
|
|||||||
:END:
|
:END:
|
||||||
*** Unoptimized version
|
*** Unoptimized version
|
||||||
#+NAME: qmckl_ao_value_args_doc
|
#+NAME: qmckl_ao_value_args_doc
|
||||||
| Variable | Type | In/Out | Description |
|
| Variable | Type | In/Out | Description |
|
||||||
|-----------------------+-----------------------------------+--------+----------------------------------------------|
|
|-----------------------+-----------------------------------+--------+----------------------------------------------------------|
|
||||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
|
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
|
||||||
| ~shell_num~ | ~int64_t~ | in | Number of shells |
|
| ~shell_num~ | ~int64_t~ | in | Number of shells |
|
||||||
| ~point_num~ | ~int64_t~ | in | Number of points |
|
| ~point_num~ | ~int64_t~ | in | Number of points |
|
||||||
| ~nucl_num~ | ~int64_t~ | in | Number of nuclei |
|
| ~nucl_num~ | ~int64_t~ | in | Number of nuclei |
|
||||||
| ~coord~ | ~double[3][point_num]~ | in | Coordinates |
|
| ~coord~ | ~double[3][point_num]~ | in | Coordinates |
|
||||||
| ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates |
|
| ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates |
|
||||||
| ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus |
|
| ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus |
|
||||||
| ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus |
|
| ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus |
|
||||||
| ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero |
|
| ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero |
|
||||||
| ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus |
|
| ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus |
|
||||||
| ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell |
|
| ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell |
|
||||||
| ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs |
|
| ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs |
|
||||||
| ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells |
|
| ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells |
|
||||||
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
|
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
|
||||||
|
|
||||||
#+begin_src f90 :comments org :tangle (eval f) :noweb yes
|
#+begin_src f90 :comments org :tangle (eval f) :noweb yes
|
||||||
function qmckl_compute_ao_value_doc(context, &
|
function qmckl_compute_ao_value_doc(context, &
|
||||||
@ -5376,7 +5376,7 @@ function qmckl_compute_ao_value_doc(context, &
|
|||||||
bind(C) result(info)
|
bind(C) result(info)
|
||||||
use, intrinsic :: iso_c_binding
|
use, intrinsic :: iso_c_binding
|
||||||
use qmckl_constants
|
use qmckl_constants
|
||||||
use qmckl, only: qmckl_ao_polynomial_vgl
|
use qmckl, only: qmckl_ao_polynomial_vgl, qmckl_get_numprec_epsilon
|
||||||
implicit none
|
implicit none
|
||||||
integer (qmckl_context), intent(in) , value :: context
|
integer (qmckl_context), intent(in) , value :: context
|
||||||
integer (c_int64_t) , intent(in) , value :: ao_num
|
integer (c_int64_t) , intent(in) , value :: ao_num
|
||||||
@ -5428,7 +5428,7 @@ function qmckl_compute_ao_value_doc(context, &
|
|||||||
info = QMCKL_SUCCESS
|
info = QMCKL_SUCCESS
|
||||||
|
|
||||||
! Don't compute polynomials when the radial part is zero.
|
! Don't compute polynomials when the radial part is zero.
|
||||||
cutoff = 27.631021115928547d0 !-dlog(1.d-12)
|
cutoff = -dlog(qmckl_get_numprec_epsilon(context))
|
||||||
|
|
||||||
do ipoint = 1, point_num
|
do ipoint = 1, point_num
|
||||||
e_coord(1) = coord(ipoint,1)
|
e_coord(1) = coord(ipoint,1)
|
||||||
@ -5478,27 +5478,27 @@ end function qmckl_compute_ao_value_doc
|
|||||||
|
|
||||||
*** HPC version
|
*** HPC version
|
||||||
#+NAME: qmckl_ao_value_args_hpc_gaussian
|
#+NAME: qmckl_ao_value_args_hpc_gaussian
|
||||||
| Variable | Type | In/Out | Description |
|
| Variable | Type | In/Out | Description |
|
||||||
|-----------------------+-----------------------------+--------+----------------------------------------------|
|
|-----------------------+-----------------------------+--------+----------------------------------------------------------|
|
||||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
|
| ~ao_num~ | ~int64_t~ | in | Number of AOs |
|
||||||
| ~shell_num~ | ~int64_t~ | in | Number of shells |
|
| ~shell_num~ | ~int64_t~ | in | Number of shells |
|
||||||
| ~prim_num~ | ~int64_t~ | in | Number of primitives |
|
| ~prim_num~ | ~int64_t~ | in | Number of primitives |
|
||||||
| ~point_num~ | ~int64_t~ | in | Number of points |
|
| ~point_num~ | ~int64_t~ | in | Number of points |
|
||||||
| ~nucl_num~ | ~int64_t~ | in | Number of nuclei |
|
| ~nucl_num~ | ~int64_t~ | in | Number of nuclei |
|
||||||
| ~coord~ | ~double[3][point_num]~ | in | Coordinates |
|
| ~coord~ | ~double[3][point_num]~ | in | Coordinates |
|
||||||
| ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates |
|
| ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates |
|
||||||
| ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus |
|
| ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus |
|
||||||
| ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus |
|
| ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus |
|
||||||
| ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero |
|
| ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero |
|
||||||
| ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus |
|
| ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus |
|
||||||
| ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell |
|
| ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell |
|
||||||
| ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell |
|
| ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell |
|
||||||
| ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell |
|
| ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell |
|
||||||
| ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs |
|
| ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs |
|
||||||
| ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
|
| ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
|
||||||
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
|
| ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells |
|
||||||
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
|
| ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs |
|
||||||
|
|
||||||
#+NAME:ao_value_constants
|
#+NAME:ao_value_constants
|
||||||
#+begin_src c :exports none
|
#+begin_src c :exports none
|
||||||
@ -5530,8 +5530,13 @@ end function qmckl_compute_ao_value_doc
|
|||||||
ao_index[shell_num] = ao_num+1;
|
ao_index[shell_num] = ao_num+1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||||
|
assert (ctx != NULL);
|
||||||
|
|
||||||
/* Don't compute polynomials when the radial part is zero. */
|
/* Don't compute polynomials when the radial part is zero. */
|
||||||
const double cutoff = 27.631021115928547; // -log(1.e-12)
|
const int precision = ctx->numprec.precision;
|
||||||
|
const double cutoff = log( (double) ( ((uint64_t) 1) << (precision-2) ) );
|
||||||
|
const bool double_precision = precision > 23;
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
@ -5566,10 +5571,12 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
{
|
{
|
||||||
qmckl_exit_code rc;
|
qmckl_exit_code rc;
|
||||||
double ar2[prim_max] __attribute__((aligned(64)));
|
double ar2[prim_max] __attribute__((aligned(64)));
|
||||||
|
float ar2_sp[prim_max] __attribute__((aligned(64)));
|
||||||
int32_t powers[3*size_max] __attribute__((aligned(64)));
|
int32_t powers[3*size_max] __attribute__((aligned(64)));
|
||||||
double poly_vgl[5*size_max] __attribute__((aligned(64)));
|
double poly_vgl[5*size_max] __attribute__((aligned(64)));
|
||||||
|
|
||||||
double exp_mat[prim_max] __attribute__((aligned(64)));
|
double exp_mat[prim_max] __attribute__((aligned(64)));
|
||||||
|
float exp_mat_sp[prim_max] __attribute__((aligned(64)));
|
||||||
double ce_mat[shell_max] __attribute__((aligned(64)));
|
double ce_mat[shell_max] __attribute__((aligned(64)));
|
||||||
|
|
||||||
int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
|
int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
|
||||||
@ -5613,7 +5620,10 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
const double y = e_coord[1] - n_coord[1];
|
const double y = e_coord[1] - n_coord[1];
|
||||||
const double z = e_coord[2] - n_coord[2];
|
const double z = e_coord[2] - n_coord[2];
|
||||||
|
|
||||||
const double r2 = x*x + y*y + z*z;
|
const double x2 = x*x;
|
||||||
|
const double y2 = y*y;
|
||||||
|
const double z2 = z*z;
|
||||||
|
const double r2 = x2 + y2 + z2;
|
||||||
|
|
||||||
if (r2 > cutoff * nucleus_range[inucl]) {
|
if (r2 > cutoff * nucleus_range[inucl]) {
|
||||||
continue;
|
continue;
|
||||||
@ -5636,19 +5646,19 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
poly_vgl[1] = x;
|
poly_vgl[1] = x;
|
||||||
poly_vgl[2] = y;
|
poly_vgl[2] = y;
|
||||||
poly_vgl[3] = z;
|
poly_vgl[3] = z;
|
||||||
poly_vgl[4] = x*x;
|
poly_vgl[4] = x2;
|
||||||
poly_vgl[5] = x*y;
|
poly_vgl[5] = x*y;
|
||||||
poly_vgl[6] = x*z;
|
poly_vgl[6] = x*z;
|
||||||
poly_vgl[7] = y*y;
|
poly_vgl[7] = y2;
|
||||||
poly_vgl[8] = y*z;
|
poly_vgl[8] = y*z;
|
||||||
poly_vgl[9] = z*z;
|
poly_vgl[9] = z2;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
rc = qmckl_ao_polynomial_transp_vgl_hpc_inline(context, e_coord, n_coord,
|
rc = qmckl_ao_polynomial_transp_vgl_hpc_inline(context, e_coord, n_coord,
|
||||||
nucleus_max_ang_mom[inucl],
|
nucleus_max_ang_mom[inucl],
|
||||||
&n_poly, powers, (int64_t) 3,
|
&n_poly, powers, (int64_t) 3,
|
||||||
poly_vgl, size_max);
|
poly_vgl, size_max);
|
||||||
assert (rc == QMCKL_SUCCESS);
|
assert (rc == QMCKL_SUCCESS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -5656,18 +5666,45 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
/* Compute all exponents */
|
/* Compute all exponents */
|
||||||
|
|
||||||
int64_t nidx = 0;
|
int64_t nidx = 0;
|
||||||
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
|
if (double_precision) {
|
||||||
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
|
|
||||||
if (v <= cutoff) {
|
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
|
||||||
ar2[iprim] = v;
|
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
|
||||||
++nidx;
|
if (v <= cutoff) {
|
||||||
} else {
|
ar2[iprim] = -v;
|
||||||
break;
|
++nidx;
|
||||||
}
|
} else {
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IVDEP
|
||||||
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
|
exp_mat[iprim] = exp(ar2[iprim]);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
|
||||||
|
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
|
||||||
|
if (v <= cutoff) {
|
||||||
|
ar2_sp[iprim] = (float) -v;
|
||||||
|
++nidx;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IVDEP
|
||||||
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
|
exp_mat_sp[iprim] = expf(ar2_sp[iprim]);
|
||||||
|
}
|
||||||
|
|
||||||
|
IVDEP
|
||||||
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
|
exp_mat[iprim] = exp_mat_sp[iprim];
|
||||||
|
}
|
||||||
|
|
||||||
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
|
||||||
exp_mat[iprim] = exp(-ar2[iprim]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||||
@ -5684,20 +5721,27 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
const int64_t ishell_start = nucleus_index[inucl];
|
const int64_t ishell_start = nucleus_index[inucl];
|
||||||
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
||||||
|
|
||||||
|
const int64_t ipoint_ao_num = ipoint*ao_num;
|
||||||
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
||||||
|
|
||||||
const double s1 = ce_mat[ishell-ishell_start];
|
const double s1 = ce_mat[ishell-ishell_start];
|
||||||
|
|
||||||
const int64_t k = ao_index[ishell];
|
const int64_t k = ao_index[ishell];
|
||||||
double* restrict const ao_value_1 = ao_value + ipoint*ao_num + k;
|
double* restrict const ao_value_1 = ao_value + ipoint_ao_num + k;
|
||||||
|
|
||||||
const int32_t l = shell_ang_mom[ishell];
|
const int32_t l = shell_ang_mom[ishell];
|
||||||
const int32_t n = lstart[l+1]-lstart[l];
|
const int32_t n = lstart[l+1]-lstart[l];
|
||||||
|
|
||||||
if (s1 == 0.0) {
|
if (s1 == 0.0) {
|
||||||
|
/*
|
||||||
|
IVDEP
|
||||||
|
#ifdef HAVE_OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
for (int64_t il=0 ; il<n ; ++il) {
|
||||||
ao_value_1[il] = 0.;
|
ao_value_1[il] = 0.;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5713,6 +5757,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
ao_value_1[0] = s1 * f[0];
|
ao_value_1[0] = s1 * f[0];
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -5721,6 +5766,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case(6):
|
case(6):
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -5729,6 +5775,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -5738,9 +5785,14 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
|
#ifdef HAVE_OPENMP
|
||||||
|
#pragma omp simd
|
||||||
|
#endif
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
for (int64_t il=0 ; il<n ; ++il) {
|
||||||
ao_value_1[il] = 0.0;
|
ao_value_1[il] = 0.0;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -6121,7 +6173,7 @@ function qmckl_compute_ao_vgl_doc(context, &
|
|||||||
bind(C) result(info)
|
bind(C) result(info)
|
||||||
use, intrinsic :: iso_c_binding
|
use, intrinsic :: iso_c_binding
|
||||||
use qmckl_constants
|
use qmckl_constants
|
||||||
use qmckl, only : qmckl_ao_polynomial_vgl
|
use qmckl, only : qmckl_ao_polynomial_vgl, qmckl_get_numprec_epsilon
|
||||||
implicit none
|
implicit none
|
||||||
integer (qmckl_context), intent(in) , value :: context
|
integer (qmckl_context), intent(in) , value :: context
|
||||||
integer (c_int64_t) , intent(in) , value :: ao_num
|
integer (c_int64_t) , intent(in) , value :: ao_num
|
||||||
@ -6172,7 +6224,7 @@ function qmckl_compute_ao_vgl_doc(context, &
|
|||||||
info = QMCKL_SUCCESS
|
info = QMCKL_SUCCESS
|
||||||
|
|
||||||
! Don't compute polynomials when the radial part is zero.
|
! Don't compute polynomials when the radial part is zero.
|
||||||
cutoff = 27.631021115928547d0 ! -dlog(1.d-12)
|
cutoff = -dlog(qmckl_get_numprec_epsilon(context))
|
||||||
|
|
||||||
do ipoint = 1, point_num
|
do ipoint = 1, point_num
|
||||||
e_coord(1) = coord(ipoint,1)
|
e_coord(1) = coord(ipoint,1)
|
||||||
@ -6306,9 +6358,11 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
{
|
{
|
||||||
qmckl_exit_code rc;
|
qmckl_exit_code rc;
|
||||||
double ar2[prim_max] __attribute__((aligned(64)));
|
double ar2[prim_max] __attribute__((aligned(64)));
|
||||||
|
float ar2_sp[prim_max] __attribute__((aligned(64)));
|
||||||
int32_t powers[3*size_max] __attribute__((aligned(64)));
|
int32_t powers[3*size_max] __attribute__((aligned(64)));
|
||||||
|
|
||||||
double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
|
double exp_mat[prim_max][8] __attribute__((aligned(64))) ;
|
||||||
|
float exp_mat_sp[prim_max] __attribute__((aligned(64)));
|
||||||
double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
|
double ce_mat[shell_max][8] __attribute__((aligned(64))) ;
|
||||||
|
|
||||||
int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
|
int32_t (*coef_mat_sparse_idx)[shell_max][prim_max+1] __attribute__((aligned(64))) =
|
||||||
@ -6365,7 +6419,10 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
const double y = e_coord[1] - n_coord[1];
|
const double y = e_coord[1] - n_coord[1];
|
||||||
const double z = e_coord[2] - n_coord[2];
|
const double z = e_coord[2] - n_coord[2];
|
||||||
|
|
||||||
const double r2 = x*x + y*y + z*z;
|
const double x2 = x*x;
|
||||||
|
const double y2 = y*y;
|
||||||
|
const double z2 = z*z;
|
||||||
|
const double r2 = x2 + y2 + z2;
|
||||||
|
|
||||||
if (r2 > cutoff * nucleus_range[inucl]) {
|
if (r2 > cutoff * nucleus_range[inucl]) {
|
||||||
continue;
|
continue;
|
||||||
@ -6386,12 +6443,12 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
poly_vgl_l2[0][1] = x;
|
poly_vgl_l2[0][1] = x;
|
||||||
poly_vgl_l2[0][2] = y;
|
poly_vgl_l2[0][2] = y;
|
||||||
poly_vgl_l2[0][3] = z;
|
poly_vgl_l2[0][3] = z;
|
||||||
poly_vgl_l2[0][4] = x*x;
|
poly_vgl_l2[0][4] = x2;
|
||||||
poly_vgl_l2[0][5] = x*y;
|
poly_vgl_l2[0][5] = x*y;
|
||||||
poly_vgl_l2[0][6] = x*z;
|
poly_vgl_l2[0][6] = x*z;
|
||||||
poly_vgl_l2[0][7] = y*y;
|
poly_vgl_l2[0][7] = y2;
|
||||||
poly_vgl_l2[0][8] = y*z;
|
poly_vgl_l2[0][8] = y*z;
|
||||||
poly_vgl_l2[0][9] = z*z;
|
poly_vgl_l2[0][9] = z2;
|
||||||
poly_vgl_l2[1][4] = x+x;
|
poly_vgl_l2[1][4] = x+x;
|
||||||
poly_vgl_l2[1][5] = y;
|
poly_vgl_l2[1][5] = y;
|
||||||
poly_vgl_l2[1][6] = z;
|
poly_vgl_l2[1][6] = z;
|
||||||
@ -6405,9 +6462,9 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
|
|
||||||
default:
|
default:
|
||||||
rc = qmckl_ao_polynomial_transp_vgl_hpc_inline(context, e_coord, n_coord,
|
rc = qmckl_ao_polynomial_transp_vgl_hpc_inline(context, e_coord, n_coord,
|
||||||
nucleus_max_ang_mom[inucl],
|
nucleus_max_ang_mom[inucl],
|
||||||
&n_poly, powers, (int64_t) 3,
|
&n_poly, powers, (int64_t) 3,
|
||||||
&(poly_vgl[0][0]), size_max);
|
&(poly_vgl[0][0]), size_max);
|
||||||
assert (rc == QMCKL_SUCCESS);
|
assert (rc == QMCKL_SUCCESS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -6415,18 +6472,43 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
/* Compute all exponents */
|
/* Compute all exponents */
|
||||||
|
|
||||||
int64_t nidx = 0;
|
int64_t nidx = 0;
|
||||||
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
|
|
||||||
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
|
|
||||||
if (v <= cutoff) {
|
|
||||||
ar2[iprim] = v;
|
|
||||||
++nidx;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
if (double_precision) {
|
||||||
exp_mat[iprim][0] = exp(-ar2[iprim]);
|
|
||||||
|
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
|
||||||
|
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
|
||||||
|
if (v <= cutoff) {
|
||||||
|
ar2[iprim] = v;
|
||||||
|
++nidx;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
|
exp_mat[iprim][0] = exp(-ar2[iprim]);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) {
|
||||||
|
const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2;
|
||||||
|
if (v <= cutoff) {
|
||||||
|
ar2_sp[iprim] = (float) v;
|
||||||
|
++nidx;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
|
exp_mat_sp[iprim] = expf(-ar2_sp[iprim]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
|
exp_mat[iprim][0] = exp_mat_sp[iprim];
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) {
|
||||||
@ -6443,6 +6525,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
|
|
||||||
// if (do_sparse) {
|
// if (do_sparse) {
|
||||||
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -6454,6 +6537,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
for (int l=1 ; l<idx[0]; ++l) {
|
for (int l=1 ; l<idx[0]; ++l) {
|
||||||
const int k = idx[l];
|
const int k = idx[l];
|
||||||
if (k >= nidx) break;
|
if (k >= nidx) break;
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -6467,12 +6551,13 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
const int64_t ishell_start = nucleus_index[inucl];
|
const int64_t ishell_start = nucleus_index[inucl];
|
||||||
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl];
|
||||||
|
|
||||||
|
const int64_t ipoint_5_ao_num = ipoint*5*ao_num;
|
||||||
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) {
|
||||||
|
|
||||||
const double s1 = ce_mat[ishell-ishell_start][0];
|
const double s1 = ce_mat[ishell-ishell_start][0];
|
||||||
|
|
||||||
const int64_t k = ao_index[ishell];
|
const int64_t k = ao_index[ishell];
|
||||||
double* restrict const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k;
|
double* restrict const ao_vgl_1 = ao_vgl + ipoint_5_ao_num + k;
|
||||||
|
|
||||||
const int32_t l = shell_ang_mom[ishell];
|
const int32_t l = shell_ang_mom[ishell];
|
||||||
const int32_t n = lstart[l+1]-lstart[l];
|
const int32_t n = lstart[l+1]-lstart[l];
|
||||||
@ -6483,6 +6568,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
|
double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2);
|
||||||
|
|
||||||
if (s1 == 0.0) {
|
if (s1 == 0.0) {
|
||||||
|
/*
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
for (int64_t il=0 ; il<n ; ++il) {
|
||||||
ao_vgl_1[il] = 0.0;
|
ao_vgl_1[il] = 0.0;
|
||||||
ao_vgl_2[il] = 0.0;
|
ao_vgl_2[il] = 0.0;
|
||||||
@ -6490,6 +6576,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
ao_vgl_4[il] = 0.0;
|
ao_vgl_4[il] = 0.0;
|
||||||
ao_vgl_5[il] = 0.0;
|
ao_vgl_5[il] = 0.0;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6539,6 +6626,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
ao_vgl_5[0] = s5;
|
ao_vgl_5[0] = s5;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -6554,6 +6642,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 6:
|
case 6:
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -6569,6 +6658,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -6585,6 +6675,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
/*
|
||||||
for (int64_t il=0 ; il<n ; ++il) {
|
for (int64_t il=0 ; il<n ; ++il) {
|
||||||
ao_vgl_1[il] = 0.0;
|
ao_vgl_1[il] = 0.0;
|
||||||
ao_vgl_2[il] = 0.0;
|
ao_vgl_2[il] = 0.0;
|
||||||
@ -6592,6 +6683,7 @@ qmckl_compute_ao_vgl_hpc_gaussian (
|
|||||||
ao_vgl_4[il] = 0.0;
|
ao_vgl_4[il] = 0.0;
|
||||||
ao_vgl_5[il] = 0.0;
|
ao_vgl_5[il] = 0.0;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1782,6 +1782,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
|||||||
const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num;
|
const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num;
|
||||||
const double* restrict avgl5 = avgl1 + (ao_num << 2);
|
const double* restrict avgl5 = avgl1 + (ao_num << 2);
|
||||||
|
|
||||||
|
IVDEP
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] = 0.;
|
vgl1[i] = 0.;
|
||||||
vgl2[i] = 0.;
|
vgl2[i] = 0.;
|
||||||
@ -1842,15 +1843,16 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
|||||||
const double a35 = av5[n+2];
|
const double a35 = av5[n+2];
|
||||||
const double a45 = av5[n+3];
|
const double a45 = av5[n+3];
|
||||||
|
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
||||||
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
vgl2[i] += ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
||||||
vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
vgl3[i] += ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
||||||
vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
vgl4[i] += ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
||||||
vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
vgl5[i] += ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1862,6 +1864,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
|||||||
const double a4 = av4[m];
|
const double a4 = av4[m];
|
||||||
const double a5 = av5[m];
|
const double a5 = av5[m];
|
||||||
|
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -2166,6 +2169,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
|
|||||||
const double a31 = av1[n+2];
|
const double a31 = av1[n+2];
|
||||||
const double a41 = av1[n+3];
|
const double a41 = av1[n+3];
|
||||||
|
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -2178,6 +2182,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
|
|||||||
const double* restrict ck = coefficient_t + idx[m]*mo_num;
|
const double* restrict ck = coefficient_t + idx[m]*mo_num;
|
||||||
const double a1 = av1[m];
|
const double a1 = av1[m];
|
||||||
|
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -2189,9 +2194,7 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
|
|||||||
for (int64_t inucl=0 ; inucl<nucl_num ; ++inucl) {
|
for (int64_t inucl=0 ; inucl<nucl_num ; ++inucl) {
|
||||||
if (ria[inucl] < r_cusp[inucl]) {
|
if (ria[inucl] < r_cusp[inucl]) {
|
||||||
const double r = ria[inucl];
|
const double r = ria[inucl];
|
||||||
#ifdef HAVE_OPENMP
|
IVDEP
|
||||||
#pragma omp simd
|
|
||||||
#endif
|
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
||||||
@ -2591,6 +2594,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
|
|||||||
const double a35 = av5[n+2];
|
const double a35 = av5[n+2];
|
||||||
const double a45 = av5[n+3];
|
const double a45 = av5[n+3];
|
||||||
|
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -2611,6 +2615,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
|
|||||||
const double a4 = av4[m];
|
const double a4 = av4[m];
|
||||||
const double a5 = av5[m];
|
const double a5 = av5[m];
|
||||||
|
|
||||||
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
@ -2633,9 +2638,7 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
|
|||||||
qmckl_mat(point_coord,ipoint,2) - qmckl_mat(nucl_coord,inucl,2) };
|
qmckl_mat(point_coord,ipoint,2) - qmckl_mat(nucl_coord,inucl,2) };
|
||||||
const double r_inv = 1./r;
|
const double r_inv = 1./r;
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
IVDEP
|
||||||
#pragma omp simd
|
|
||||||
#endif
|
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
vgl1[i] += qmckl_ten3(cusp_param,i,0,inucl) + r*(
|
||||||
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
qmckl_ten3(cusp_param,i,1,inucl) + r*(
|
||||||
|
@ -86,8 +86,8 @@ trapfpe ()
|
|||||||
default parameters determining the target numerical precision and
|
default parameters determining the target numerical precision and
|
||||||
range are defined. Following the IEEE Standard for Floating-Point
|
range are defined. Following the IEEE Standard for Floating-Point
|
||||||
Arithmetic (IEEE 754),
|
Arithmetic (IEEE 754),
|
||||||
/precision/ refers to the number of significand bits and /range/
|
/precision/ refers to the number of significand bits (including the
|
||||||
refers to the number of exponent bits.
|
sign bit) and /range/ refers to the number of exponent bits.
|
||||||
|
|
||||||
#+NAME: table-precision
|
#+NAME: table-precision
|
||||||
| ~QMCKL_DEFAULT_PRECISION~ | 53 |
|
| ~QMCKL_DEFAULT_PRECISION~ | 53 |
|
||||||
@ -340,7 +340,7 @@ double qmckl_get_numprec_epsilon(const qmckl_context context);
|
|||||||
#+begin_src c :tangle (eval c)
|
#+begin_src c :tangle (eval c)
|
||||||
double qmckl_get_numprec_epsilon(const qmckl_context context) {
|
double qmckl_get_numprec_epsilon(const qmckl_context context) {
|
||||||
const int precision = qmckl_get_numprec_precision(context);
|
const int precision = qmckl_get_numprec_precision(context);
|
||||||
return 1. / (double) (1L << (precision-2));
|
return 1. / (double) ( ((uint64_t) 1) << (precision-2));
|
||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
@ -355,6 +355,53 @@ double qmckl_get_numprec_epsilon(const qmckl_context context) {
|
|||||||
end interface
|
end interface
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
* Approximate functions
|
||||||
|
|
||||||
|
** Exponential
|
||||||
|
|
||||||
|
Fast exponential function, adapted from Johan Rade's implementation
|
||||||
|
(https://gist.github.com/jrade/293a73f89dfef51da6522428c857802d). It
|
||||||
|
is based on Schraudolph's paper:
|
||||||
|
|
||||||
|
N. Schraudolph, "A Fast, Compact Approximation of the Exponential Function",
|
||||||
|
/Neural Computation/ *11*, 853–862 (1999).
|
||||||
|
(available at https://nic.schraudolph.org/pubs/Schraudolph99.pdf)
|
||||||
|
|
||||||
|
#+begin_src c :tangle (eval c)
|
||||||
|
float fastExpf(float x)
|
||||||
|
{
|
||||||
|
const float a = 12102203.0;
|
||||||
|
const float b = 1064986816.0;
|
||||||
|
x = a * x + b;
|
||||||
|
|
||||||
|
const float c = 8388608.0;
|
||||||
|
const float d = 2139095040.0;
|
||||||
|
if (x < c || x > d)
|
||||||
|
x = (x < c) ? 0.0f : d;
|
||||||
|
|
||||||
|
uint32_t n = (uint32_t) x;
|
||||||
|
memcpy(&x, &n, 4);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double fastExp(double x)
|
||||||
|
{
|
||||||
|
const double a = 6497320848556798.0;
|
||||||
|
const double b = 4606985713057410560.0;
|
||||||
|
x = a * x + b;
|
||||||
|
|
||||||
|
const double c = 4503599627370496.0;
|
||||||
|
const double d = 9218868437227405312.0;
|
||||||
|
if (x < c || x > d)
|
||||||
|
x = (x < c) ? 0.0 : d;
|
||||||
|
|
||||||
|
uint64_t n = (uint64_t) x;
|
||||||
|
memcpy(&x, &n, 8);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
* End of files :noexport:
|
* End of files :noexport:
|
||||||
|
|
||||||
#+begin_src c :comments link :tangle (eval h_private_type)
|
#+begin_src c :comments link :tangle (eval h_private_type)
|
||||||
|
Loading…
Reference in New Issue
Block a user