mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-07 03:43:27 +01:00
505 lines
14 KiB
Org Mode
505 lines
14 KiB
Org Mode
3+TITLE: Numerical precision
|
||
#+SETUPFILE: ../tools/theme.setup
|
||
#+INCLUDE: ../tools/lib.org
|
||
|
||
* Headers :noexport:
|
||
|
||
#+begin_src c :tangle (eval c_test) :noweb yes
|
||
#include "qmckl.h"
|
||
#include "assert.h"
|
||
#ifdef HAVE_CONFIG_H
|
||
#include "config.h"
|
||
#endif
|
||
int main() {
|
||
#+end_src
|
||
|
||
#+begin_src c :tangle (eval h_private_type)
|
||
#ifndef QMCKL_NUMPREC_HPT
|
||
#define QMCKL_NUMPREC_HPT
|
||
|
||
#ifdef HAVE_STDINT_H
|
||
#include <stdint.h>
|
||
#elif HAVE_INTTYPES_H
|
||
#include <inttypes.h>
|
||
#endif
|
||
#+end_src
|
||
|
||
#+begin_src c :tangle (eval c)
|
||
#ifdef HAVE_CONFIG_H
|
||
#include "config.h"
|
||
#endif
|
||
|
||
#ifdef HAVE_STDINT_H
|
||
#include <stdint.h>
|
||
#elif HAVE_INTTYPES_H
|
||
#include <inttypes.h>
|
||
#endif
|
||
|
||
#include <assert.h>
|
||
#include <math.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
#ifdef HAVE_FPE
|
||
#define _GNU_SOURCE
|
||
#include <fenv.h>
|
||
#include <signal.h>
|
||
#include <stdio.h>
|
||
#include <execinfo.h>
|
||
|
||
|
||
#define MAX_BACKTRACE_SIZE 100
|
||
|
||
void floatingPointExceptionHandler(int signal) {
|
||
void* backtraceArray[MAX_BACKTRACE_SIZE];
|
||
int backtraceSize = backtrace(backtraceArray, MAX_BACKTRACE_SIZE);
|
||
char** backtraceSymbols = backtrace_symbols(backtraceArray, backtraceSize);
|
||
|
||
// Print the backtrace
|
||
for (int i = 0; i < backtraceSize; ++i) {
|
||
printf("[%d] %s\n", i, backtraceSymbols[i]);
|
||
}
|
||
|
||
// Clean up the memory used by backtrace_symbols
|
||
free(backtraceSymbols);
|
||
|
||
exit(EXIT_FAILURE);
|
||
}
|
||
|
||
static void __attribute__ ((constructor))
|
||
trapfpe ()
|
||
{
|
||
/* Enable some exceptions. At startup all exceptions are masked. */
|
||
|
||
feenableexcept (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);
|
||
signal(SIGFPE, floatingPointExceptionHandler);
|
||
}
|
||
#endif
|
||
|
||
#include "qmckl.h"
|
||
#include "qmckl_context_private_type.h"
|
||
#+end_src
|
||
|
||
* Control of the numerical precision
|
||
|
||
Controlling numerical precision enables optimizations. Here, the
|
||
default parameters determining the target numerical precision and
|
||
range are defined. Following the IEEE Standard for Floating-Point
|
||
Arithmetic (IEEE 754), /precision/ refers to the number of significand bits (including the
|
||
sign bit) and /range/ refers to the number of exponent bits.
|
||
|
||
#+NAME: table-precision
|
||
| ~QMCKL_DEFAULT_PRECISION~ | 53 |
|
||
| ~QMCKL_DEFAULT_RANGE~ | 11 |
|
||
|
||
# We need to force Emacs not to indent the Python code:
|
||
# -*- org-src-preserve-indentation: t
|
||
|
||
#+begin_src python :var table=table-precision :results drawer :exports results
|
||
""" This script generates the C and Fortran constants from the org-mode table.
|
||
"""
|
||
|
||
result = [ "#+begin_src c :comments org :tangle (eval h_type)" ]
|
||
for (text, code) in table:
|
||
text=text.replace("~","")
|
||
result += [ f"#define {text:30s} {code:d}" ]
|
||
result += [ "#+end_src" ]
|
||
|
||
result += [ "" ]
|
||
|
||
result += [ "#+begin_src f90 :comments org :tangle (eval fh_func) :exports none" ]
|
||
for (text, code) in table:
|
||
text=text.replace("~","")
|
||
result += [ f" integer, parameter :: {text:30s} = {code:d}" ]
|
||
result += [ "#+end_src" ]
|
||
|
||
return '\n'.join(result)
|
||
|
||
#+end_src
|
||
|
||
#+RESULTS:
|
||
:results:
|
||
#+begin_src c :comments org :tangle (eval h_type)
|
||
#define QMCKL_DEFAULT_PRECISION 53
|
||
#define QMCKL_DEFAULT_RANGE 11
|
||
#+end_src
|
||
|
||
#+begin_src f90 :comments org :tangle (eval fh_func) :exports none
|
||
integer, parameter :: QMCKL_DEFAULT_PRECISION = 53
|
||
integer, parameter :: QMCKL_DEFAULT_RANGE = 11
|
||
#+end_src
|
||
:end:
|
||
|
||
#+begin_src c :comments org :tangle (eval h_private_type)
|
||
typedef struct qmckl_numprec_struct {
|
||
uint32_t precision;
|
||
uint32_t range;
|
||
} qmckl_numprec_struct;
|
||
#+end_src
|
||
|
||
The following functions set and get the required precision and
|
||
range. ~precision~ is an integer between 2 and 53, and ~range~ is an
|
||
integer between 2 and 11.
|
||
|
||
The setter functions functions return a new context as a 64-bit
|
||
integer. The getter functions return the value, as a 32-bit
|
||
integer. The update functions return ~QMCKL_SUCCESS~ or ~QMCKL_FAILURE~.
|
||
|
||
* Precision ~qmckl_context_set_numprec_precision~ modifies the parameter for the
|
||
numerical precision in the context.
|
||
|
||
# Header
|
||
#+begin_src c :comments org :tangle (eval h_func) :exports none
|
||
qmckl_exit_code qmckl_set_numprec_precision(const qmckl_context context, const int precision);
|
||
#+end_src
|
||
|
||
# Source
|
||
#+begin_src c :tangle (eval c)
|
||
qmckl_exit_code qmckl_set_numprec_precision(const qmckl_context context, const int precision) {
|
||
|
||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
|
||
return QMCKL_INVALID_CONTEXT;
|
||
|
||
if (precision < 2) {
|
||
return qmckl_failwith(context,
|
||
QMCKL_INVALID_ARG_2,
|
||
"qmckl_update_numprec_precision",
|
||
"precision < 2");
|
||
}
|
||
|
||
if (precision > 53) {
|
||
return qmckl_failwith(context,
|
||
QMCKL_INVALID_ARG_2,
|
||
"qmckl_update_numprec_precision",
|
||
"precision > 53");
|
||
}
|
||
|
||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||
|
||
/* This should be always true because the context is valid */
|
||
assert (ctx != NULL);
|
||
|
||
qmckl_lock(context);
|
||
{
|
||
ctx->numprec.precision = (uint32_t) precision;
|
||
}
|
||
qmckl_unlock(context);
|
||
|
||
return QMCKL_SUCCESS;
|
||
}
|
||
#+end_src
|
||
|
||
# Fortran interface
|
||
|
||
#+begin_src f90 :tangle (eval fh_func)
|
||
interface
|
||
integer (qmckl_exit_code) function qmckl_set_numprec_precision(context, precision) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
integer (qmckl_context), intent(in), value :: context
|
||
integer (c_int32_t), intent(in), value :: precision
|
||
end function qmckl_set_numprec_precision
|
||
end interface
|
||
#+end_src ~qmckl_get_numprec_precision~ returns the value of the numerical precision in the context.
|
||
|
||
#+begin_src c :comments org :tangle (eval h_func) :exports none
|
||
int32_t qmckl_get_numprec_precision(const qmckl_context context);
|
||
#+end_src
|
||
|
||
# Source
|
||
#+begin_src c :tangle (eval c)
|
||
int qmckl_get_numprec_precision(const qmckl_context context) {
|
||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||
return qmckl_failwith(context,
|
||
QMCKL_INVALID_CONTEXT,
|
||
"qmckl_get_numprec_precision",
|
||
"");
|
||
}
|
||
|
||
const qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||
return ctx->numprec.precision;
|
||
}
|
||
#+end_src
|
||
|
||
# Fortran interface
|
||
#+begin_src f90 :tangle (eval fh_func)
|
||
interface
|
||
integer (qmckl_exit_code) function qmckl_get_numprec_precision(context) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
integer (qmckl_context), intent(in), value :: context
|
||
end function qmckl_get_numprec_precision
|
||
end interface
|
||
#+end_src
|
||
|
||
* Range ~qmckl_set_numprec_range~ modifies the parameter for the numerical
|
||
range in a given context.
|
||
|
||
# Header
|
||
#+begin_src c :comments org :tangle (eval h_func) :exports none
|
||
qmckl_exit_code qmckl_set_numprec_range(const qmckl_context context, const int range);
|
||
#+end_src
|
||
|
||
# Source
|
||
#+begin_src c :tangle (eval c)
|
||
qmckl_exit_code qmckl_set_numprec_range(const qmckl_context context, const int range) {
|
||
|
||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
|
||
return QMCKL_INVALID_CONTEXT;
|
||
|
||
if (range < 2) {
|
||
return qmckl_failwith(context,
|
||
QMCKL_INVALID_ARG_2,
|
||
"qmckl_set_numprec_range",
|
||
"range < 2");
|
||
}
|
||
|
||
if (range > 11) {
|
||
return qmckl_failwith(context,
|
||
QMCKL_INVALID_ARG_2,
|
||
"qmckl_set_numprec_range",
|
||
"range > 11");
|
||
}
|
||
|
||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||
|
||
/* This should be always true because the context is valid */
|
||
assert (ctx != NULL);
|
||
|
||
qmckl_lock(context);
|
||
{
|
||
ctx->numprec.range = (uint32_t) range;
|
||
}
|
||
qmckl_unlock(context);
|
||
|
||
return QMCKL_SUCCESS;
|
||
}
|
||
#+end_src
|
||
|
||
# Fortran interface
|
||
#+begin_src f90 :tangle (eval fh_func)
|
||
interface
|
||
integer (qmckl_exit_code) function qmckl_set_numprec_range(context, range) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
integer (qmckl_context), intent(in), value :: context
|
||
integer (c_int32_t), intent(in), value :: range
|
||
end function qmckl_set_numprec_range
|
||
end interface
|
||
#+end_src ~qmckl_get_numprec_range~ returns the value of the numerical range in the context.
|
||
|
||
#+begin_src c :comments org :tangle (eval h_func) :exports none
|
||
int32_t qmckl_get_numprec_range(const qmckl_context context);
|
||
#+end_src
|
||
|
||
# Source
|
||
#+begin_src c :tangle (eval c)
|
||
int qmckl_get_numprec_range(const qmckl_context context) {
|
||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||
return qmckl_failwith(context,
|
||
QMCKL_INVALID_CONTEXT,
|
||
"qmckl_get_numprec_range",
|
||
"");
|
||
}
|
||
|
||
const qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||
return ctx->numprec.range;
|
||
}
|
||
#+end_src
|
||
|
||
# Fortran interface
|
||
#+begin_src f90 :tangle (eval fh_func) :exports none
|
||
interface
|
||
integer (qmckl_exit_code) function qmckl_get_numprec_range(context) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
integer (qmckl_context), intent(in), value :: context
|
||
end function qmckl_get_numprec_range
|
||
end interface
|
||
#+end_src
|
||
|
||
* Helper functions
|
||
|
||
** Epsilon ~qmckl_get_numprec_epsilon~ returns $\epsilon = 2^{1-n}$ where ~n~ is the precision.
|
||
We need to remove the sign bit from the precision.
|
||
|
||
#+begin_src c :comments org :tangle (eval h_func) :exports none
|
||
double qmckl_get_numprec_epsilon(const qmckl_context context);
|
||
#+end_src
|
||
|
||
# Source
|
||
#+begin_src c :tangle (eval c)
|
||
double qmckl_get_numprec_epsilon(const qmckl_context context) {
|
||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
|
||
return QMCKL_INVALID_CONTEXT;
|
||
const qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||
const int precision = ctx->numprec.precision;
|
||
return 1. / (double) ( ((uint64_t) 1) << (precision-2));
|
||
}
|
||
#+end_src
|
||
|
||
# Fortran interface
|
||
#+begin_src f90 :tangle (eval fh_func) :exports none
|
||
interface
|
||
real (c_double) function qmckl_get_numprec_epsilon(context) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
integer (qmckl_context), intent(in), value :: context
|
||
end function qmckl_get_numprec_epsilon
|
||
end interface
|
||
#+end_src
|
||
|
||
** Testing the number of unchanged bits
|
||
|
||
To test that a given approximation keeps a given number of bits
|
||
unchanged, we need a function that returns the number of unchanged
|
||
bits in the range, and in the precision.
|
||
|
||
For this, we first count by how many units in the last place (ulps) two
|
||
numbers differ.
|
||
|
||
#+begin_src c :tangle (eval c)
|
||
int64_t countUlpDifference_64(double a, double b) {
|
||
|
||
union int_or_float {
|
||
int64_t i;
|
||
double f;
|
||
} x, y;
|
||
|
||
x.f = a;
|
||
y.f = b;
|
||
|
||
// Handle sign bit discontinuity: if the signs are different and either value is not zero
|
||
if ((x.i < 0) != (y.i < 0) && (x.f != 0.0) && (y.f != 0.0)) {
|
||
// Use the absolute values and add the distance to zero for both numbers
|
||
int64_t distanceToZeroForX = x.i < 0 ? INT64_MAX + x.i : INT64_MAX - x.i;
|
||
int64_t distanceToZeroForY = y.i < 0 ? INT64_MAX + y.i : INT64_MAX - y.i;
|
||
return distanceToZeroForX + distanceToZeroForY;
|
||
}
|
||
|
||
// Calculate the difference in their binary representations
|
||
int64_t result = x.i - y.i;
|
||
result = result > 0 ? result : -result;
|
||
return result;
|
||
}
|
||
#+end_src
|
||
|
||
#+begin_src c :comments org :tangle (eval h_func) :exports none
|
||
int32_t qmckl_test_precision_64(double a, double b);
|
||
int32_t qmckl_test_precision_32(float a, float b);
|
||
#+end_src
|
||
|
||
#+begin_src c :tangle (eval c)
|
||
int32_t qmckl_test_precision_64(double a, double b) {
|
||
|
||
int64_t diff = countUlpDifference_64(a,b);
|
||
|
||
if (diff == 0) return 53;
|
||
|
||
int32_t result = 53;
|
||
|
||
for (int i=0 ; i<53 && diff != 0 ; ++i) {
|
||
diff >>= 1;
|
||
result--;
|
||
}
|
||
|
||
return result;
|
||
}
|
||
#+end_src
|
||
|
||
|
||
#+begin_src c :tangle (eval c)
|
||
int32_t qmckl_test_precision_32(float a, float b) {
|
||
return qmckl_test_precision_64( (double) a, (double) b );
|
||
}
|
||
#+end_src
|
||
|
||
#+begin_src f90 :tangle (eval fh_func) :exports none
|
||
interface
|
||
integer (c_int) function qmckl_test_precision_32(a,b) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
real (c_float), intent(in), value :: a, b
|
||
end function qmckl_test_precision_32
|
||
end interface
|
||
|
||
interface
|
||
integer (c_int) function qmckl_test_precision_64(a,b) bind(C)
|
||
use, intrinsic :: iso_c_binding
|
||
import
|
||
real (c_double), intent(in), value :: a, b
|
||
end function qmckl_test_precision_64
|
||
end interface
|
||
#+end_src
|
||
|
||
* Approximate functions
|
||
|
||
** Exponential
|
||
|
||
Fast exponential function, adapted from Johan Rade's implementation
|
||
(https://gist.github.com/jrade/293a73f89dfef51da6522428c857802d). It
|
||
is based on Schraudolph's paper:
|
||
|
||
N. Schraudolph, "A Fast, Compact Approximation of the Exponential Function", /Neural Computation/ *11*, 853–862 (1999).
|
||
(available at https://nic.schraudolph.org/pubs/Schraudolph99.pdf)
|
||
|
||
#+begin_src c :tangle (eval c)
|
||
float fastExpf(float x)
|
||
{
|
||
const float a = 12102203.0;
|
||
const float b = 1064986816.0;
|
||
x = a * x + b;
|
||
|
||
const float c = 8388608.0;
|
||
const float d = 2139095040.0;
|
||
if (x < c || x > d)
|
||
x = (x < c) ? 0.0f : d;
|
||
|
||
uint32_t n = (uint32_t) x;
|
||
memcpy(&x, &n, 4);
|
||
return x;
|
||
}
|
||
|
||
|
||
double fastExp(double x)
|
||
{
|
||
const double a = 6497320848556798.0;
|
||
const double b = 4606985713057410560.0;
|
||
x = a * x + b;
|
||
|
||
const double c = 4503599627370496.0;
|
||
const double d = 9218868437227405312.0;
|
||
if (x < c || x > d)
|
||
x = (x < c) ? 0.0 : d;
|
||
|
||
uint64_t n = (uint64_t) x;
|
||
memcpy(&x, &n, 8);
|
||
return x;
|
||
}
|
||
#+end_src
|
||
|
||
* End of files :noexport:
|
||
|
||
#+begin_src c :comments link :tangle (eval h_private_type)
|
||
#endif
|
||
#+end_src
|
||
|
||
*** Test
|
||
#+begin_src c :comments link :tangle (eval c_test)
|
||
return 0;
|
||
}
|
||
#+end_src
|
||
|