1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-04-04 16:11:28 +02:00
qmckl/qmckl_numprec.html

661 lines
30 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<!-- 2025-04-02 Wed 13:00 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>&lrm;</title>
<meta name="author" content="TREX CoE" />
<meta name="generator" content="Org Mode" />
<style>
#content { max-width: 60em; margin: auto; }
.title { text-align: center;
margin-bottom: .2em; }
.subtitle { text-align: center;
font-size: medium;
font-weight: bold;
margin-top:0; }
.todo { font-family: monospace; color: red; }
.done { font-family: monospace; color: green; }
.priority { font-family: monospace; color: orange; }
.tag { background-color: #eee; font-family: monospace;
padding: 2px; font-size: 80%; font-weight: normal; }
.timestamp { color: #bebebe; }
.timestamp-kwd { color: #5f9ea0; }
.org-right { margin-left: auto; margin-right: 0px; text-align: right; }
.org-left { margin-left: 0px; margin-right: auto; text-align: left; }
.org-center { margin-left: auto; margin-right: auto; text-align: center; }
.underline { text-decoration: underline; }
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
p.verse { margin-left: 3%; }
pre {
border: 1px solid #e6e6e6;
border-radius: 3px;
background-color: #f2f2f2;
padding: 8pt;
font-family: monospace;
overflow: auto;
margin: 1.2em;
}
pre.src {
position: relative;
overflow: auto;
}
pre.src:before {
display: none;
position: absolute;
top: -8px;
right: 12px;
padding: 3px;
color: #555;
background-color: #f2f2f299;
}
pre.src:hover:before { display: inline; margin-top: 14px;}
/* Languages per Org manual */
pre.src-asymptote:before { content: 'Asymptote'; }
pre.src-awk:before { content: 'Awk'; }
pre.src-authinfo::before { content: 'Authinfo'; }
pre.src-C:before { content: 'C'; }
/* pre.src-C++ doesn't work in CSS */
pre.src-clojure:before { content: 'Clojure'; }
pre.src-css:before { content: 'CSS'; }
pre.src-D:before { content: 'D'; }
pre.src-ditaa:before { content: 'ditaa'; }
pre.src-dot:before { content: 'Graphviz'; }
pre.src-calc:before { content: 'Emacs Calc'; }
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
pre.src-fortran:before { content: 'Fortran'; }
pre.src-gnuplot:before { content: 'gnuplot'; }
pre.src-haskell:before { content: 'Haskell'; }
pre.src-hledger:before { content: 'hledger'; }
pre.src-java:before { content: 'Java'; }
pre.src-js:before { content: 'Javascript'; }
pre.src-latex:before { content: 'LaTeX'; }
pre.src-ledger:before { content: 'Ledger'; }
pre.src-lisp:before { content: 'Lisp'; }
pre.src-lilypond:before { content: 'Lilypond'; }
pre.src-lua:before { content: 'Lua'; }
pre.src-matlab:before { content: 'MATLAB'; }
pre.src-mscgen:before { content: 'Mscgen'; }
pre.src-ocaml:before { content: 'Objective Caml'; }
pre.src-octave:before { content: 'Octave'; }
pre.src-org:before { content: 'Org mode'; }
pre.src-oz:before { content: 'OZ'; }
pre.src-plantuml:before { content: 'Plantuml'; }
pre.src-processing:before { content: 'Processing.js'; }
pre.src-python:before { content: 'Python'; }
pre.src-R:before { content: 'R'; }
pre.src-ruby:before { content: 'Ruby'; }
pre.src-sass:before { content: 'Sass'; }
pre.src-scheme:before { content: 'Scheme'; }
pre.src-screen:before { content: 'Gnu Screen'; }
pre.src-sed:before { content: 'Sed'; }
pre.src-sh:before { content: 'shell'; }
pre.src-sql:before { content: 'SQL'; }
pre.src-sqlite:before { content: 'SQLite'; }
/* additional languages in org.el's org-babel-load-languages alist */
pre.src-forth:before { content: 'Forth'; }
pre.src-io:before { content: 'IO'; }
pre.src-J:before { content: 'J'; }
pre.src-makefile:before { content: 'Makefile'; }
pre.src-maxima:before { content: 'Maxima'; }
pre.src-perl:before { content: 'Perl'; }
pre.src-picolisp:before { content: 'Pico Lisp'; }
pre.src-scala:before { content: 'Scala'; }
pre.src-shell:before { content: 'Shell Script'; }
pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
/* additional language identifiers per "defun org-babel-execute"
in ob-*.el */
pre.src-cpp:before { content: 'C++'; }
pre.src-abc:before { content: 'ABC'; }
pre.src-coq:before { content: 'Coq'; }
pre.src-groovy:before { content: 'Groovy'; }
/* additional language identifiers from org-babel-shell-names in
ob-shell.el: ob-shell is the only babel language using a lambda to put
the execution function name together. */
pre.src-bash:before { content: 'bash'; }
pre.src-csh:before { content: 'csh'; }
pre.src-ash:before { content: 'ash'; }
pre.src-dash:before { content: 'dash'; }
pre.src-ksh:before { content: 'ksh'; }
pre.src-mksh:before { content: 'mksh'; }
pre.src-posh:before { content: 'posh'; }
/* Additional Emacs modes also supported by the LaTeX listings package */
pre.src-ada:before { content: 'Ada'; }
pre.src-asm:before { content: 'Assembler'; }
pre.src-caml:before { content: 'Caml'; }
pre.src-delphi:before { content: 'Delphi'; }
pre.src-html:before { content: 'HTML'; }
pre.src-idl:before { content: 'IDL'; }
pre.src-mercury:before { content: 'Mercury'; }
pre.src-metapost:before { content: 'MetaPost'; }
pre.src-modula-2:before { content: 'Modula-2'; }
pre.src-pascal:before { content: 'Pascal'; }
pre.src-ps:before { content: 'PostScript'; }
pre.src-prolog:before { content: 'Prolog'; }
pre.src-simula:before { content: 'Simula'; }
pre.src-tcl:before { content: 'tcl'; }
pre.src-tex:before { content: 'TeX'; }
pre.src-plain-tex:before { content: 'Plain TeX'; }
pre.src-verilog:before { content: 'Verilog'; }
pre.src-vhdl:before { content: 'VHDL'; }
pre.src-xml:before { content: 'XML'; }
pre.src-nxml:before { content: 'XML'; }
/* add a generic configuration mode; LaTeX export needs an additional
(add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
pre.src-conf:before { content: 'Configuration File'; }
table { border-collapse:collapse; }
caption.t-above { caption-side: top; }
caption.t-bottom { caption-side: bottom; }
td, th { vertical-align:top; }
th.org-right { text-align: center; }
th.org-left { text-align: center; }
th.org-center { text-align: center; }
td.org-right { text-align: right; }
td.org-left { text-align: left; }
td.org-center { text-align: center; }
dt { font-weight: bold; }
.footpara { display: inline; }
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
.equation-container {
display: table;
text-align: center;
width: 100%;
}
.equation {
vertical-align: middle;
}
.equation-label {
display: table-cell;
text-align: right;
vertical-align: middle;
}
.inlinetask {
padding: 10px;
border: 2px solid gray;
margin: 10px;
background: #ffffcc;
}
#org-div-home-and-up
{ text-align: right; font-size: 70%; white-space: nowrap; }
textarea { overflow-x: auto; }
.linenr { font-size: smaller }
.code-highlighted { background-color: #ffff00; }
.org-info-js_info-navigation { border-style: none; }
#org-info-js_console-label
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
.org-info-js_search-highlight
{ background-color: #ffff00; color: #000000; font-weight: bold; }
.org-svg { }
</style>
<link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
<script src="org-info.js">
// @license magnet:?xt=urn:btih:1f739d935676111cfff4b4693e3816e664797050&amp;dn=gpl-3.0.txt GPL-v3-or-Later
// @license-end
</script>
<script>
// @license magnet:?xt=urn:btih:1f739d935676111cfff4b4693e3816e664797050&amp;dn=gpl-3.0.txt GPL-v3-or-Later
org_html_manager.set("TOC_DEPTH", "4");
org_html_manager.set("LINK_HOME", "index.html");
org_html_manager.set("LINK_UP", "");
org_html_manager.set("LOCAL_TOC", "1");
org_html_manager.set("VIEW_BUTTONS", "0");
org_html_manager.set("MOUSE_HINT", "underline");
org_html_manager.set("FIXED_TOC", "0");
org_html_manager.set("TOC", "1");
org_html_manager.set("VIEW", "info");
org_html_manager.setup(); // activate after the parameters are set
// @license-end
</script>
<script>
window.MathJax = {
tex: {
ams: {
multlineWidth: '85%'
},
tags: 'ams',
tagSide: 'right',
tagIndent: '.8em'
},
chtml: {
scale: 1.0,
displayAlign: 'center',
displayIndent: '0em'
},
svg: {
scale: 1.0,
displayAlign: 'center',
displayIndent: '0em'
},
output: {
font: 'mathjax-modern',
displayOverflow: 'overflow'
}
};
</script>
<script
id="MathJax-script"
async
src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js">
</script>
</head>
<body>
<div id="org-div-home-and-up">
<a accesskey="h" href=""> UP </a>
|
<a accesskey="H" href="index.html"> HOME </a>
</div><div id="content" class="content">
<div id="table-of-contents" role="doc-toc">
<h2>Table of Contents</h2>
<div id="text-table-of-contents" role="doc-toc">
<ul>
<li><a href="#orgda2fc50">1. Control of the numerical precision</a></li>
<li><a href="#orgcc31963">2. Precision</a></li>
<li><a href="#orgb72d677">3. Range</a></li>
<li><a href="#org7cdeadd">4. Helper functions</a>
<ul>
<li><a href="#orgc44e589">4.1. Epsilon</a></li>
<li><a href="#orga87eb55">4.2. Testing the number of unchanged bits</a></li>
</ul>
</li>
<li><a href="#org4c1e54f">5. Approximate functions</a>
<ul>
<li><a href="#org437fa71">5.1. Exponential</a></li>
</ul>
</li>
</ul>
</div>
</div>
<p>
3+TITLE: Numerical precision
</p>
<div id="outline-container-orgda2fc50" class="outline-2">
<h2 id="orgda2fc50"><span class="section-number-2">1.</span> Control of the numerical precision</h2>
<div class="outline-text-2" id="text-1">
<p>
Controlling numerical precision enables optimizations. Here, the
default parameters determining the target numerical precision and
range are defined. Following the IEEE Standard for Floating-Point
Arithmetic (IEEE 754),
<i>precision</i> refers to the number of significand bits (including the
sign bit) and <i>range</i> refers to the number of exponent bits.
</p>
<table id="org28191f5" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
<colgroup>
<col class="org-left" />
<col class="org-right" />
</colgroup>
<tbody>
<tr>
<td class="org-left"><code>QMCKL_DEFAULT_PRECISION</code></td>
<td class="org-right">53</td>
</tr>
<tr>
<td class="org-left"><code>QMCKL_DEFAULT_RANGE</code></td>
<td class="org-right">11</td>
</tr>
</tbody>
</table>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #a020f0;">typedef</span> <span style="color: #a020f0;">struct</span> <span style="color: #228b22;">qmckl_numprec_struct</span> {
<span style="color: #228b22;">uint32_t</span> <span style="color: #a0522d;">precision</span>;
<span style="color: #228b22;">uint32_t</span> <span style="color: #a0522d;">range</span>;
} <span style="color: #228b22;">qmckl_numprec_struct</span>;
</pre>
</div>
<p>
The following functions set and get the required precision and
range. <code>precision</code> is an integer between 2 and 53, and <code>range</code> is an
integer between 2 and 11.
</p>
<p>
The setter functions functions return a new context as a 64-bit
integer. The getter functions return the value, as a 32-bit
integer. The update functions return <code>QMCKL_SUCCESS</code> or
<code>QMCKL_FAILURE</code>.
</p>
</div>
</div>
<div id="outline-container-orgcc31963" class="outline-2">
<h2 id="orgcc31963"><span class="section-number-2">2.</span> Precision</h2>
<div class="outline-text-2" id="text-2">
<p>
<code>qmckl_context_set_numprec_precision</code> modifies the parameter for the
numerical precision in the context.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_set_numprec_precision</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>, <span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">precision</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
<span style="color: #a020f0;">return</span> QMCKL_INVALID_CONTEXT;
<span style="color: #a020f0;">if</span> (precision &lt; 2) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
<span style="color: #8b2252;">"qmckl_update_numprec_precision"</span>,
<span style="color: #8b2252;">"precision &lt; 2"</span>);
}
<span style="color: #a020f0;">if</span> (precision &gt; 53) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
<span style="color: #8b2252;">"qmckl_update_numprec_precision"</span>,
<span style="color: #8b2252;">"precision &gt; 53"</span>);
}
<span style="color: #228b22;">qmckl_context_struct</span>* <span style="color: #a020f0;">const</span> <span style="color: #a0522d;">ctx</span> = (<span style="color: #228b22;">qmckl_context_struct</span>*) context;
/* <span style="color: #b22222;">This should be always true because the context is valid</span> */
assert (ctx != <span style="color: #008b8b;">NULL</span>);
qmckl_lock(context);
{
ctx-&gt;numprec.precision = (<span style="color: #228b22;">uint32_t</span>) precision;
}
qmckl_unlock(context);
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-f90"><span style="color: #a020f0;">interface</span>
<span style="color: #228b22;">integer</span> (qmckl_exit_code) <span style="color: #a020f0;">function</span> <span style="color: #0000ff;">qmckl_set_numprec_precision</span>(context, precision) <span style="color: #a020f0;">bind</span>(C)
<span style="color: #a020f0;">use</span>, <span style="color: #a020f0;">intrinsic</span> :: <span style="color: #0000ff;">iso_c_binding</span>
<span style="color: #a020f0;">import</span>
<span style="color: #228b22;">integer</span> (qmckl_context), <span style="color: #a020f0;">intent</span>(in), <span style="color: #a020f0;">value</span> ::<span style="color: #a0522d;"> context</span>
<span style="color: #228b22;">integer</span> (<span style="color: #008b8b;">c_int32_t</span>), <span style="color: #a020f0;">intent</span>(in), <span style="color: #a020f0;">value</span> ::<span style="color: #a0522d;"> precision</span>
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_set_numprec_precision</span>
<span style="color: #a020f0;">end interface</span>
</pre>
</div>
<p>
<code>qmckl_get_numprec_precision</code> returns the value of the numerical precision in the context.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">int</span> <span style="color: #0000ff;">qmckl_get_numprec_precision</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_INVALID_CONTEXT,
<span style="color: #8b2252;">"qmckl_get_numprec_precision"</span>,
<span style="color: #8b2252;">""</span>);
}
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context_struct</span>* <span style="color: #a020f0;">const</span> <span style="color: #a0522d;">ctx</span> = (<span style="color: #228b22;">qmckl_context_struct</span>*) context;
<span style="color: #a020f0;">return</span> ctx-&gt;numprec.precision;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-f90"><span style="color: #a020f0;">interface</span>
<span style="color: #228b22;">integer</span> (qmckl_exit_code) <span style="color: #a020f0;">function</span> <span style="color: #0000ff;">qmckl_get_numprec_precision</span>(context) <span style="color: #a020f0;">bind</span>(C)
<span style="color: #a020f0;">use</span>, <span style="color: #a020f0;">intrinsic</span> :: <span style="color: #0000ff;">iso_c_binding</span>
<span style="color: #a020f0;">import</span>
<span style="color: #228b22;">integer</span> (qmckl_context), <span style="color: #a020f0;">intent</span>(in), <span style="color: #a020f0;">value</span> ::<span style="color: #a0522d;"> context</span>
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_get_numprec_precision</span>
<span style="color: #a020f0;">end interface</span>
</pre>
</div>
</div>
</div>
<div id="outline-container-orgb72d677" class="outline-2">
<h2 id="orgb72d677"><span class="section-number-2">3.</span> Range</h2>
<div class="outline-text-2" id="text-3">
<p>
<code>qmckl_set_numprec_range</code> modifies the parameter for the numerical
range in a given context.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_set_numprec_range</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>, <span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">range</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
<span style="color: #a020f0;">return</span> QMCKL_INVALID_CONTEXT;
<span style="color: #a020f0;">if</span> (range &lt; 2) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
<span style="color: #8b2252;">"qmckl_set_numprec_range"</span>,
<span style="color: #8b2252;">"range &lt; 2"</span>);
}
<span style="color: #a020f0;">if</span> (range &gt; 11) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_INVALID_ARG_2,
<span style="color: #8b2252;">"qmckl_set_numprec_range"</span>,
<span style="color: #8b2252;">"range &gt; 11"</span>);
}
<span style="color: #228b22;">qmckl_context_struct</span>* <span style="color: #a020f0;">const</span> <span style="color: #a0522d;">ctx</span> = (<span style="color: #228b22;">qmckl_context_struct</span>*) context;
/* <span style="color: #b22222;">This should be always true because the context is valid</span> */
assert (ctx != <span style="color: #008b8b;">NULL</span>);
qmckl_lock(context);
{
ctx-&gt;numprec.range = (<span style="color: #228b22;">uint32_t</span>) range;
}
qmckl_unlock(context);
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-f90"><span style="color: #a020f0;">interface</span>
<span style="color: #228b22;">integer</span> (qmckl_exit_code) <span style="color: #a020f0;">function</span> <span style="color: #0000ff;">qmckl_set_numprec_range</span>(context, range) <span style="color: #a020f0;">bind</span>(C)
<span style="color: #a020f0;">use</span>, <span style="color: #a020f0;">intrinsic</span> :: <span style="color: #0000ff;">iso_c_binding</span>
<span style="color: #a020f0;">import</span>
<span style="color: #228b22;">integer</span> (qmckl_context), <span style="color: #a020f0;">intent</span>(in), <span style="color: #a020f0;">value</span> ::<span style="color: #a0522d;"> context</span>
<span style="color: #228b22;">integer</span> (<span style="color: #008b8b;">c_int32_t</span>), <span style="color: #a020f0;">intent</span>(in), <span style="color: #a020f0;">value</span> ::<span style="color: #a0522d;"> range</span>
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_set_numprec_range</span>
<span style="color: #a020f0;">end interface</span>
</pre>
</div>
<p>
<code>qmckl_get_numprec_range</code> returns the value of the numerical range in the context.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">int</span> <span style="color: #0000ff;">qmckl_get_numprec_range</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_INVALID_CONTEXT,
<span style="color: #8b2252;">"qmckl_get_numprec_range"</span>,
<span style="color: #8b2252;">""</span>);
}
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context_struct</span>* <span style="color: #a020f0;">const</span> <span style="color: #a0522d;">ctx</span> = (<span style="color: #228b22;">qmckl_context_struct</span>*) context;
<span style="color: #a020f0;">return</span> ctx-&gt;numprec.range;
}
</pre>
</div>
</div>
</div>
<div id="outline-container-org7cdeadd" class="outline-2">
<h2 id="org7cdeadd"><span class="section-number-2">4.</span> Helper functions</h2>
<div class="outline-text-2" id="text-4">
</div>
<div id="outline-container-orgc44e589" class="outline-3">
<h3 id="orgc44e589"><span class="section-number-3">4.1.</span> Epsilon</h3>
<div class="outline-text-3" id="text-4-1">
<p>
<code>qmckl_get_numprec_epsilon</code> returns \(\epsilon = 2^{1-n}\) where <code>n</code> is the precision.
We need to remove the sign bit from the precision.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">double</span> <span style="color: #0000ff;">qmckl_get_numprec_epsilon</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT)
<span style="color: #a020f0;">return</span> QMCKL_INVALID_CONTEXT;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context_struct</span>* <span style="color: #a020f0;">const</span> <span style="color: #a0522d;">ctx</span> = (<span style="color: #228b22;">qmckl_context_struct</span>*) context;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">precision</span> = ctx-&gt;numprec.precision;
<span style="color: #a020f0;">return</span> 1. / (<span style="color: #228b22;">double</span>) ( ((<span style="color: #228b22;">uint64_t</span>) 1) &lt;&lt; (precision-2));
}
</pre>
</div>
</div>
</div>
<div id="outline-container-orga87eb55" class="outline-3">
<h3 id="orga87eb55"><span class="section-number-3">4.2.</span> Testing the number of unchanged bits</h3>
<div class="outline-text-3" id="text-4-2">
<p>
To test that a given approximation keeps a given number of bits
unchanged, we need a function that returns the number of unchanged
bits in the range, and in the precision.
</p>
<p>
For this, we first count by how many units in the last place (ulps) two
numbers differ.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">int64_t</span> <span style="color: #0000ff;">countUlpDifference_64</span>(<span style="color: #228b22;">double</span> <span style="color: #a0522d;">a</span>, <span style="color: #228b22;">double</span> <span style="color: #a0522d;">b</span>) {
<span style="color: #a020f0;">union</span> <span style="color: #228b22;">int_or_float</span> {
<span style="color: #228b22;">int64_t</span> <span style="color: #a0522d;">i</span>;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">f</span>;
} <span style="color: #a0522d;">x</span>, <span style="color: #a0522d;">y</span>;
x.f = a;
y.f = b;
// <span style="color: #b22222;">Handle sign bit discontinuity: if the signs are different and either value is not zero</span>
<span style="color: #a020f0;">if</span> ((x.i &lt; 0) != (y.i &lt; 0) &amp;&amp; (x.f != 0.0) &amp;&amp; (y.f != 0.0)) {
// <span style="color: #b22222;">Use the absolute values and add the distance to zero for both numbers</span>
<span style="color: #228b22;">int64_t</span> <span style="color: #a0522d;">distanceToZeroForX</span> = x.i &lt; 0 ? INT64_MAX + x.i : INT64_MAX - x.i;
<span style="color: #228b22;">int64_t</span> <span style="color: #a0522d;">distanceToZeroForY</span> = y.i &lt; 0 ? INT64_MAX + y.i : INT64_MAX - y.i;
<span style="color: #a020f0;">return</span> distanceToZeroForX + distanceToZeroForY;
}
// <span style="color: #b22222;">Calculate the difference in their binary representations</span>
<span style="color: #228b22;">int64_t</span> <span style="color: #a0522d;">result</span> = x.i - y.i;
result = result &gt; 0 ? result : -result;
<span style="color: #a020f0;">return</span> result;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">int32_t</span> <span style="color: #0000ff;">qmckl_test_precision_64</span>(<span style="color: #228b22;">double</span> <span style="color: #a0522d;">a</span>, <span style="color: #228b22;">double</span> <span style="color: #a0522d;">b</span>) {
<span style="color: #228b22;">int64_t</span> <span style="color: #a0522d;">diff</span> = countUlpDifference_64(a,b);
<span style="color: #a020f0;">if</span> (diff == 0) <span style="color: #a020f0;">return</span> 53;
<span style="color: #228b22;">int32_t</span> <span style="color: #a0522d;">result</span> = 53;
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span>=0 ; i&lt;53 &amp;&amp; diff != 0 ; ++i) {
diff &gt;&gt;= 1;
result--;
}
<span style="color: #a020f0;">return</span> result;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">int32_t</span> <span style="color: #0000ff;">qmckl_test_precision_32</span>(<span style="color: #228b22;">float</span> <span style="color: #a0522d;">a</span>, <span style="color: #228b22;">float</span> <span style="color: #a0522d;">b</span>) {
<span style="color: #a020f0;">return</span> qmckl_test_precision_64( (<span style="color: #228b22;">double</span>) a, (<span style="color: #228b22;">double</span>) b );
}
</pre>
</div>
</div>
</div>
</div>
<div id="outline-container-org4c1e54f" class="outline-2">
<h2 id="org4c1e54f"><span class="section-number-2">5.</span> Approximate functions</h2>
<div class="outline-text-2" id="text-5">
</div>
<div id="outline-container-org437fa71" class="outline-3">
<h3 id="org437fa71"><span class="section-number-3">5.1.</span> Exponential</h3>
<div class="outline-text-3" id="text-5-1">
<p>
Fast exponential function, adapted from Johan Rade's implementation
(<a href="https://gist.github.com/jrade/293a73f89dfef51da6522428c857802d">https://gist.github.com/jrade/293a73f89dfef51da6522428c857802d</a>). It
is based on Schraudolph's paper:
</p>
<p>
N. Schraudolph, "A Fast, Compact Approximation of the Exponential Function",
<i>Neural Computation</i> <b>11</b>, 853862 (1999).
(available at <a href="https://nic.schraudolph.org/pubs/Schraudolph99.pdf">https://nic.schraudolph.org/pubs/Schraudolph99.pdf</a>)
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">float</span> <span style="color: #0000ff;">fastExpf</span>(<span style="color: #228b22;">float</span> <span style="color: #a0522d;">x</span>)
{
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">float</span> <span style="color: #a0522d;">a</span> = 12102203.0;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">float</span> <span style="color: #a0522d;">b</span> = 1064986816.0;
x = a * x + b;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">float</span> <span style="color: #a0522d;">c</span> = 8388608.0;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">float</span> <span style="color: #a0522d;">d</span> = 2139095040.0;
<span style="color: #a020f0;">if</span> (x &lt; c || x &gt; d)
x = (x &lt; c) ? 0.0f : d;
<span style="color: #228b22;">uint32_t</span> <span style="color: #a0522d;">n</span> = (<span style="color: #228b22;">uint32_t</span>) x;
memcpy(&amp;x, &amp;n, 4);
<span style="color: #a020f0;">return</span> x;
}
<span style="color: #228b22;">double</span> <span style="color: #0000ff;">fastExp</span>(<span style="color: #228b22;">double</span> <span style="color: #a0522d;">x</span>)
{
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">a</span> = 6497320848556798.0;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">b</span> = 4606985713057410560.0;
x = a * x + b;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">c</span> = 4503599627370496.0;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">d</span> = 9218868437227405312.0;
<span style="color: #a020f0;">if</span> (x &lt; c || x &gt; d)
x = (x &lt; c) ? 0.0 : d;
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">n</span> = (<span style="color: #228b22;">uint64_t</span>) x;
memcpy(&amp;x, &amp;n, 8);
<span style="color: #a020f0;">return</span> x;
}
</pre>
</div>
</div>
</div>
</div>
</div>
<div id="postamble" class="status">
<p class="author">Author: TREX CoE</p>
<p class="date">Created: 2025-04-02 Wed 13:00</p>
<p class="validation"><a href="https://validator.w3.org/check?uri=referer">Validate</a></p>
</div>
</body>
</html>