1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-20 17:42:13 +01:00
qmckl/org/qmckl_numprec.org

505 lines
14 KiB
Org Mode
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

3+TITLE: Numerical precision
#+SETUPFILE: ../tools/theme.setup
#+INCLUDE: ../tools/lib.org
* Headers :noexport:
#+begin_src c :tangle (eval c_test) :noweb yes
#include "qmckl.h"
#include "assert.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
int main() {
#+end_src
#+begin_src c :tangle (eval h_private_type)
#ifndef QMCKL_NUMPREC_HPT
#define QMCKL_NUMPREC_HPT
#ifdef HAVE_STDINT_H
#include <stdint.h>
#elif HAVE_INTTYPES_H
#include <inttypes.h>
#endif
#+end_src
#+begin_src c :tangle (eval c)
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
#elif HAVE_INTTYPES_H
#include <inttypes.h>
#endif
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_FPE
#define _GNU_SOURCE
#include <fenv.h>
#include <signal.h>
#include <stdio.h>
#include <execinfo.h>
#define MAX_BACKTRACE_SIZE 100
void floatingPointExceptionHandler(int signal) {
void* backtraceArray[MAX_BACKTRACE_SIZE];
int backtraceSize = backtrace(backtraceArray, MAX_BACKTRACE_SIZE);
char** backtraceSymbols = backtrace_symbols(backtraceArray, backtraceSize);
// Print the backtrace
for (int i = 0; i < backtraceSize; ++i) {
printf("[%d] %s\n", i, backtraceSymbols[i]);
}
// Clean up the memory used by backtrace_symbols
free(backtraceSymbols);
exit(EXIT_FAILURE);
}
static void __attribute__ ((constructor))
trapfpe ()
{
/* Enable some exceptions. At startup all exceptions are masked. */
feenableexcept (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);
signal(SIGFPE, floatingPointExceptionHandler);
}
#endif
#include "qmckl.h"
#include "qmckl_context_private_type.h"
#+end_src
* Control of the numerical precision
Controlling numerical precision enables optimizations. Here, the
default parameters determining the target numerical precision and
range are defined. Following the IEEE Standard for Floating-Point
Arithmetic (IEEE 754),
/precision/ refers to the number of significand bits (including the
sign bit) and /range/ refers to the number of exponent bits.
#+NAME: table-precision
| ~QMCKL_DEFAULT_PRECISION~ | 53 |
| ~QMCKL_DEFAULT_RANGE~ | 11 |
# We need to force Emacs not to indent the Python code:
# -*- org-src-preserve-indentation: t
#+begin_src python :var table=table-precision :results drawer :exports results
""" This script generates the C and Fortran constants from the org-mode table.
"""
result = [ "#+begin_src c :comments org :tangle (eval h_type)" ]
for (text, code) in table:
text=text.replace("~","")
result += [ f"#define {text:30s} {code:d}" ]
result += [ "#+end_src" ]
result += [ "" ]
result += [ "#+begin_src f90 :comments org :tangle (eval fh_func) :exports none" ]
for (text, code) in table:
text=text.replace("~","")
result += [ f" integer, parameter :: {text:30s} = {code:d}" ]
result += [ "#+end_src" ]
return '\n'.join(result)
#+end_src
#+RESULTS:
:results:
#+begin_src c :comments org :tangle (eval h_type)
#define QMCKL_DEFAULT_PRECISION 53
#define QMCKL_DEFAULT_RANGE 11
#+end_src
#+begin_src f90 :comments org :tangle (eval fh_func) :exports none
integer, parameter :: QMCKL_DEFAULT_PRECISION = 53
integer, parameter :: QMCKL_DEFAULT_RANGE = 11
#+end_src
:end:
#+begin_src c :comments org :tangle (eval h_private_type)
typedef struct qmckl_numprec_struct {
uint32_t precision;
uint32_t range;
} qmckl_numprec_struct;
#+end_src
The following functions set and get the required precision and
range. ~precision~ is an integer between 2 and 53, and ~range~ is an
integer between 2 and 11.
The setter functions functions return a new context as a 64-bit
integer. The getter functions return the value, as a 32-bit
integer. The update functions return ~QMCKL_SUCCESS~ or
~QMCKL_FAILURE~.
* Precision
~qmckl_context_set_numprec_precision~ modifies the parameter for the
numerical precision in the context.
# Header
#+begin_src c :comments org :tangle (eval h_func) :exports none
qmckl_exit_code qmckl_set_numprec_precision(const qmckl_context context, const int precision);
#+end_src
# Source
#+begin_src c :tangle (eval c)
qmckl_exit_code qmckl_set_numprec_precision(const qmckl_context context, const int precision) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
return QMCKL_INVALID_CONTEXT;
if (precision < 2) {
return qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
"qmckl_update_numprec_precision",
"precision < 2");
}
if (precision > 53) {
return qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
"qmckl_update_numprec_precision",
"precision > 53");
}
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
/* This should be always true because the context is valid */
assert (ctx != NULL);
qmckl_lock(context);
{
ctx->numprec.precision = (uint32_t) precision;
}
qmckl_unlock(context);
return QMCKL_SUCCESS;
}
#+end_src
# Fortran interface
#+begin_src f90 :tangle (eval fh_func)
interface
integer (qmckl_exit_code) function qmckl_set_numprec_precision(context, precision) bind(C)
use, intrinsic :: iso_c_binding
import
integer (qmckl_context), intent(in), value :: context
integer (c_int32_t), intent(in), value :: precision
end function qmckl_set_numprec_precision
end interface
#+end_src
~qmckl_get_numprec_precision~ returns the value of the numerical precision in the context.
#+begin_src c :comments org :tangle (eval h_func) :exports none
int32_t qmckl_get_numprec_precision(const qmckl_context context);
#+end_src
# Source
#+begin_src c :tangle (eval c)
int qmckl_get_numprec_precision(const qmckl_context context) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_INVALID_CONTEXT,
"qmckl_get_numprec_precision",
"");
}
const qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
return ctx->numprec.precision;
}
#+end_src
# Fortran interface
#+begin_src f90 :tangle (eval fh_func)
interface
integer (qmckl_exit_code) function qmckl_get_numprec_precision(context) bind(C)
use, intrinsic :: iso_c_binding
import
integer (qmckl_context), intent(in), value :: context
end function qmckl_get_numprec_precision
end interface
#+end_src
* Range
~qmckl_set_numprec_range~ modifies the parameter for the numerical
range in a given context.
# Header
#+begin_src c :comments org :tangle (eval h_func) :exports none
qmckl_exit_code qmckl_set_numprec_range(const qmckl_context context, const int range);
#+end_src
# Source
#+begin_src c :tangle (eval c)
qmckl_exit_code qmckl_set_numprec_range(const qmckl_context context, const int range) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
return QMCKL_INVALID_CONTEXT;
if (range < 2) {
return qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
"qmckl_set_numprec_range",
"range < 2");
}
if (range > 11) {
return qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
"qmckl_set_numprec_range",
"range > 11");
}
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
/* This should be always true because the context is valid */
assert (ctx != NULL);
qmckl_lock(context);
{
ctx->numprec.range = (uint32_t) range;
}
qmckl_unlock(context);
return QMCKL_SUCCESS;
}
#+end_src
# Fortran interface
#+begin_src f90 :tangle (eval fh_func)
interface
integer (qmckl_exit_code) function qmckl_set_numprec_range(context, range) bind(C)
use, intrinsic :: iso_c_binding
import
integer (qmckl_context), intent(in), value :: context
integer (c_int32_t), intent(in), value :: range
end function qmckl_set_numprec_range
end interface
#+end_src
~qmckl_get_numprec_range~ returns the value of the numerical range in the context.
#+begin_src c :comments org :tangle (eval h_func) :exports none
int32_t qmckl_get_numprec_range(const qmckl_context context);
#+end_src
# Source
#+begin_src c :tangle (eval c)
int qmckl_get_numprec_range(const qmckl_context context) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_INVALID_CONTEXT,
"qmckl_get_numprec_range",
"");
}
const qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
return ctx->numprec.range;
}
#+end_src
# Fortran interface
#+begin_src f90 :tangle (eval fh_func) :exports none
interface
integer (qmckl_exit_code) function qmckl_get_numprec_range(context) bind(C)
use, intrinsic :: iso_c_binding
import
integer (qmckl_context), intent(in), value :: context
end function qmckl_get_numprec_range
end interface
#+end_src
* Helper functions
** Epsilon
~qmckl_get_numprec_epsilon~ returns $\epsilon = 2^{1-n}$ where ~n~ is the precision.
We need to remove the sign bit from the precision.
#+begin_src c :comments org :tangle (eval h_func) :exports none
double qmckl_get_numprec_epsilon(const qmckl_context context);
#+end_src
# Source
#+begin_src c :tangle (eval c)
double qmckl_get_numprec_epsilon(const qmckl_context context) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
return QMCKL_INVALID_CONTEXT;
const qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
const int precision = ctx->numprec.precision;
return 1. / (double) ( ((uint64_t) 1) << (precision-2));
}
#+end_src
# Fortran interface
#+begin_src f90 :tangle (eval fh_func) :exports none
interface
real (c_double) function qmckl_get_numprec_epsilon(context) bind(C)
use, intrinsic :: iso_c_binding
import
integer (qmckl_context), intent(in), value :: context
end function qmckl_get_numprec_epsilon
end interface
#+end_src
** Testing the number of unchanged bits
To test that a given approximation keeps a given number of bits
unchanged, we need a function that returns the number of unchanged
bits in the range, and in the precision.
For this, we first count by how many units in the last place (ulps) two
numbers differ.
#+begin_src c :tangle (eval c)
int64_t countUlpDifference_64(double a, double b) {
union int_or_float {
int64_t i;
double f;
} x, y;
x.f = a;
y.f = b;
// Handle sign bit discontinuity: if the signs are different and either value is not zero
if ((x.i < 0) != (y.i < 0) && (x.f != 0.0) && (y.f != 0.0)) {
// Use the absolute values and add the distance to zero for both numbers
int64_t distanceToZeroForX = x.i < 0 ? INT64_MAX + x.i : INT64_MAX - x.i;
int64_t distanceToZeroForY = y.i < 0 ? INT64_MAX + y.i : INT64_MAX - y.i;
return distanceToZeroForX + distanceToZeroForY;
}
// Calculate the difference in their binary representations
int64_t result = x.i - y.i;
result = result > 0 ? result : -result;
return result;
}
#+end_src
#+begin_src c :comments org :tangle (eval h_func) :exports none
int32_t qmckl_test_precision_64(double a, double b);
int32_t qmckl_test_precision_32(float a, float b);
#+end_src
#+begin_src c :tangle (eval c)
int32_t qmckl_test_precision_64(double a, double b) {
int64_t diff = countUlpDifference_64(a,b);
if (diff == 0) return 53;
int32_t result = 53;
for (int i=0 ; i<53 && diff != 0 ; ++i) {
diff >>= 1;
result--;
}
return result;
}
#+end_src
#+begin_src c :tangle (eval c)
int32_t qmckl_test_precision_32(float a, float b) {
return qmckl_test_precision_64( (double) a, (double) b );
}
#+end_src
#+begin_src f90 :tangle (eval fh_func) :exports none
interface
integer (c_int) function qmckl_test_precision_32(a,b) bind(C)
use, intrinsic :: iso_c_binding
import
real (c_float), intent(in), value :: a, b
end function qmckl_test_precision_32
end interface
interface
integer (c_int) function qmckl_test_precision_64(a,b) bind(C)
use, intrinsic :: iso_c_binding
import
real (c_double), intent(in), value :: a, b
end function qmckl_test_precision_64
end interface
#+end_src
* Approximate functions
** Exponential
Fast exponential function, adapted from Johan Rade's implementation
(https://gist.github.com/jrade/293a73f89dfef51da6522428c857802d). It
is based on Schraudolph's paper:
N. Schraudolph, "A Fast, Compact Approximation of the Exponential Function",
/Neural Computation/ *11*, 853862 (1999).
(available at https://nic.schraudolph.org/pubs/Schraudolph99.pdf)
#+begin_src c :tangle (eval c)
float fastExpf(float x)
{
const float a = 12102203.0;
const float b = 1064986816.0;
x = a * x + b;
const float c = 8388608.0;
const float d = 2139095040.0;
if (x < c || x > d)
x = (x < c) ? 0.0f : d;
uint32_t n = (uint32_t) x;
memcpy(&x, &n, 4);
return x;
}
double fastExp(double x)
{
const double a = 6497320848556798.0;
const double b = 4606985713057410560.0;
x = a * x + b;
const double c = 4503599627370496.0;
const double d = 9218868437227405312.0;
if (x < c || x > d)
x = (x < c) ? 0.0 : d;
uint64_t n = (uint64_t) x;
memcpy(&x, &n, 8);
return x;
}
#+end_src
* End of files :noexport:
#+begin_src c :comments link :tangle (eval h_private_type)
#endif
#+end_src
*** Test
#+begin_src c :comments link :tangle (eval c_test)
return 0;
}
#+end_src