1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-24 03:22:11 +01:00
qmckl/qmckl_sherman_morrison_woodbury.html

6173 lines
363 KiB
HTML

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<!-- 2023-03-09 Thu 10:03 -->
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Sherman-Morrison-Woodbury</title>
<meta name="generator" content="Org mode" />
<meta name="author" content="TREX CoE" />
<style type="text/css">
<!--/*--><![CDATA[/*><!--*/
.title { text-align: center;
margin-bottom: .2em; }
.subtitle { text-align: center;
font-size: medium;
font-weight: bold;
margin-top:0; }
.todo { font-family: monospace; color: red; }
.done { font-family: monospace; color: green; }
.priority { font-family: monospace; color: orange; }
.tag { background-color: #eee; font-family: monospace;
padding: 2px; font-size: 80%; font-weight: normal; }
.timestamp { color: #bebebe; }
.timestamp-kwd { color: #5f9ea0; }
.org-right { margin-left: auto; margin-right: 0px; text-align: right; }
.org-left { margin-left: 0px; margin-right: auto; text-align: left; }
.org-center { margin-left: auto; margin-right: auto; text-align: center; }
.underline { text-decoration: underline; }
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
p.verse { margin-left: 3%; }
pre {
border: 1px solid #ccc;
box-shadow: 3px 3px 3px #eee;
padding: 8pt;
font-family: monospace;
overflow: auto;
margin: 1.2em;
}
pre.src {
position: relative;
overflow: visible;
padding-top: 1.2em;
}
pre.src:before {
display: none;
position: absolute;
background-color: white;
top: -10px;
right: 10px;
padding: 3px;
border: 1px solid black;
}
pre.src:hover:before { display: inline;}
/* Languages per Org manual */
pre.src-asymptote:before { content: 'Asymptote'; }
pre.src-awk:before { content: 'Awk'; }
pre.src-C:before { content: 'C'; }
/* pre.src-C++ doesn't work in CSS */
pre.src-clojure:before { content: 'Clojure'; }
pre.src-css:before { content: 'CSS'; }
pre.src-D:before { content: 'D'; }
pre.src-ditaa:before { content: 'ditaa'; }
pre.src-dot:before { content: 'Graphviz'; }
pre.src-calc:before { content: 'Emacs Calc'; }
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
pre.src-fortran:before { content: 'Fortran'; }
pre.src-gnuplot:before { content: 'gnuplot'; }
pre.src-haskell:before { content: 'Haskell'; }
pre.src-hledger:before { content: 'hledger'; }
pre.src-java:before { content: 'Java'; }
pre.src-js:before { content: 'Javascript'; }
pre.src-latex:before { content: 'LaTeX'; }
pre.src-ledger:before { content: 'Ledger'; }
pre.src-lisp:before { content: 'Lisp'; }
pre.src-lilypond:before { content: 'Lilypond'; }
pre.src-lua:before { content: 'Lua'; }
pre.src-matlab:before { content: 'MATLAB'; }
pre.src-mscgen:before { content: 'Mscgen'; }
pre.src-ocaml:before { content: 'Objective Caml'; }
pre.src-octave:before { content: 'Octave'; }
pre.src-org:before { content: 'Org mode'; }
pre.src-oz:before { content: 'OZ'; }
pre.src-plantuml:before { content: 'Plantuml'; }
pre.src-processing:before { content: 'Processing.js'; }
pre.src-python:before { content: 'Python'; }
pre.src-R:before { content: 'R'; }
pre.src-ruby:before { content: 'Ruby'; }
pre.src-sass:before { content: 'Sass'; }
pre.src-scheme:before { content: 'Scheme'; }
pre.src-screen:before { content: 'Gnu Screen'; }
pre.src-sed:before { content: 'Sed'; }
pre.src-sh:before { content: 'shell'; }
pre.src-sql:before { content: 'SQL'; }
pre.src-sqlite:before { content: 'SQLite'; }
/* additional languages in org.el's org-babel-load-languages alist */
pre.src-forth:before { content: 'Forth'; }
pre.src-io:before { content: 'IO'; }
pre.src-J:before { content: 'J'; }
pre.src-makefile:before { content: 'Makefile'; }
pre.src-maxima:before { content: 'Maxima'; }
pre.src-perl:before { content: 'Perl'; }
pre.src-picolisp:before { content: 'Pico Lisp'; }
pre.src-scala:before { content: 'Scala'; }
pre.src-shell:before { content: 'Shell Script'; }
pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
/* additional language identifiers per "defun org-babel-execute"
in ob-*.el */
pre.src-cpp:before { content: 'C++'; }
pre.src-abc:before { content: 'ABC'; }
pre.src-coq:before { content: 'Coq'; }
pre.src-groovy:before { content: 'Groovy'; }
/* additional language identifiers from org-babel-shell-names in
ob-shell.el: ob-shell is the only babel language using a lambda to put
the execution function name together. */
pre.src-bash:before { content: 'bash'; }
pre.src-csh:before { content: 'csh'; }
pre.src-ash:before { content: 'ash'; }
pre.src-dash:before { content: 'dash'; }
pre.src-ksh:before { content: 'ksh'; }
pre.src-mksh:before { content: 'mksh'; }
pre.src-posh:before { content: 'posh'; }
/* Additional Emacs modes also supported by the LaTeX listings package */
pre.src-ada:before { content: 'Ada'; }
pre.src-asm:before { content: 'Assembler'; }
pre.src-caml:before { content: 'Caml'; }
pre.src-delphi:before { content: 'Delphi'; }
pre.src-html:before { content: 'HTML'; }
pre.src-idl:before { content: 'IDL'; }
pre.src-mercury:before { content: 'Mercury'; }
pre.src-metapost:before { content: 'MetaPost'; }
pre.src-modula-2:before { content: 'Modula-2'; }
pre.src-pascal:before { content: 'Pascal'; }
pre.src-ps:before { content: 'PostScript'; }
pre.src-prolog:before { content: 'Prolog'; }
pre.src-simula:before { content: 'Simula'; }
pre.src-tcl:before { content: 'tcl'; }
pre.src-tex:before { content: 'TeX'; }
pre.src-plain-tex:before { content: 'Plain TeX'; }
pre.src-verilog:before { content: 'Verilog'; }
pre.src-vhdl:before { content: 'VHDL'; }
pre.src-xml:before { content: 'XML'; }
pre.src-nxml:before { content: 'XML'; }
/* add a generic configuration mode; LaTeX export needs an additional
(add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
pre.src-conf:before { content: 'Configuration File'; }
table { border-collapse:collapse; }
caption.t-above { caption-side: top; }
caption.t-bottom { caption-side: bottom; }
td, th { vertical-align:top; }
th.org-right { text-align: center; }
th.org-left { text-align: center; }
th.org-center { text-align: center; }
td.org-right { text-align: right; }
td.org-left { text-align: left; }
td.org-center { text-align: center; }
dt { font-weight: bold; }
.footpara { display: inline; }
.footdef { margin-bottom: 1em; }
.figure { padding: 1em; }
.figure p { text-align: center; }
.equation-container {
display: table;
text-align: center;
width: 100%;
}
.equation {
vertical-align: middle;
}
.equation-label {
display: table-cell;
text-align: right;
vertical-align: middle;
}
.inlinetask {
padding: 10px;
border: 2px solid gray;
margin: 10px;
background: #ffffcc;
}
#org-div-home-and-up
{ text-align: right; font-size: 70%; white-space: nowrap; }
textarea { overflow-x: auto; }
.linenr { font-size: smaller }
.code-highlighted { background-color: #ffff00; }
.org-info-js_info-navigation { border-style: none; }
#org-info-js_console-label
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
.org-info-js_search-highlight
{ background-color: #ffff00; color: #000000; font-weight: bold; }
.org-svg { width: 90%; }
/*]]>*/-->
</style>
<link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
<script type="text/javascript" src="org-info.js">
/**
*
* @source: org-info.js
*
* @licstart The following is the entire license notice for the
* JavaScript code in org-info.js.
*
* Copyright (C) 2012-2020 Free Software Foundation, Inc.
*
*
* The JavaScript code in this tag is free software: you can
* redistribute it and/or modify it under the terms of the GNU
* General Public License (GNU GPL) as published by the Free Software
* Foundation, either version 3 of the License, or (at your option)
* any later version. The code is distributed WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
*
* As additional permission under GNU GPL version 3 section 7, you
* may distribute non-source (e.g., minimized or compacted) forms of
* that code without the copy of the GNU GPL normally required by
* section 4, provided you include this license notice and a URL
* through which recipients can access the Corresponding Source.
*
* @licend The above is the entire license notice
* for the JavaScript code in org-info.js.
*
*/
</script>
<script type="text/javascript">
/*
@licstart The following is the entire license notice for the
JavaScript code in this tag.
Copyright (C) 2012-2020 Free Software Foundation, Inc.
The JavaScript code in this tag is free software: you can
redistribute it and/or modify it under the terms of the GNU
General Public License (GNU GPL) as published by the Free Software
Foundation, either version 3 of the License, or (at your option)
any later version. The code is distributed WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
As additional permission under GNU GPL version 3 section 7, you
may distribute non-source (e.g., minimized or compacted) forms of
that code without the copy of the GNU GPL normally required by
section 4, provided you include this license notice and a URL
through which recipients can access the Corresponding Source.
@licend The above is the entire license notice
for the JavaScript code in this tag.
*/
<!--/*--><![CDATA[/*><!--*/
org_html_manager.set("TOC_DEPTH", "4");
org_html_manager.set("LINK_HOME", "index.html");
org_html_manager.set("LINK_UP", "");
org_html_manager.set("LOCAL_TOC", "1");
org_html_manager.set("VIEW_BUTTONS", "0");
org_html_manager.set("MOUSE_HINT", "underline");
org_html_manager.set("FIXED_TOC", "0");
org_html_manager.set("TOC", "1");
org_html_manager.set("VIEW", "info");
org_html_manager.setup(); // activate after the parameters are set
/*]]>*///-->
</script>
<script type="text/javascript">
/*
@licstart The following is the entire license notice for the
JavaScript code in this tag.
Copyright (C) 2012-2020 Free Software Foundation, Inc.
The JavaScript code in this tag is free software: you can
redistribute it and/or modify it under the terms of the GNU
General Public License (GNU GPL) as published by the Free Software
Foundation, either version 3 of the License, or (at your option)
any later version. The code is distributed WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
As additional permission under GNU GPL version 3 section 7, you
may distribute non-source (e.g., minimized or compacted) forms of
that code without the copy of the GNU GPL normally required by
section 4, provided you include this license notice and a URL
through which recipients can access the Corresponding Source.
@licend The above is the entire license notice
for the JavaScript code in this tag.
*/
<!--/*--><![CDATA[/*><!--*/
function CodeHighlightOn(elem, id)
{
var target = document.getElementById(id);
if(null != target) {
elem.cacheClassElem = elem.className;
elem.cacheClassTarget = target.className;
target.className = "code-highlighted";
elem.className = "code-highlighted";
}
}
function CodeHighlightOff(elem, id)
{
var target = document.getElementById(id);
if(elem.cacheClassElem)
elem.className = elem.cacheClassElem;
if(elem.cacheClassTarget)
target.className = elem.cacheClassTarget;
}
/*]]>*///-->
</script>
<script type="text/x-mathjax-config">
MathJax.Hub.Config({
displayAlign: "center",
displayIndent: "0em",
"HTML-CSS": { scale: 100,
linebreaks: { automatic: "false" },
webFont: "TeX"
},
SVG: {scale: 100,
linebreaks: { automatic: "false" },
font: "TeX"},
NativeMML: {scale: 100},
TeX: { equationNumbers: {autoNumber: "AMS"},
MultLineWidth: "85%",
TagSide: "right",
TagIndent: ".8em"
}
});
</script>
<script type="text/javascript"
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_HTML"></script>
</head>
<body>
<div id="org-div-home-and-up">
<a accesskey="h" href=""> UP </a>
|
<a accesskey="H" href="index.html"> HOME </a>
</div><div id="content">
<h1 class="title">Sherman-Morrison-Woodbury</h1>
<div id="table-of-contents">
<h2>Table of Contents</h2>
<div id="text-table-of-contents">
<ul>
<li><a href="#org8310c36">1. Headers</a></li>
<li><a href="#org68f9179">2. Naïve Sherman-Morrison</a>
<ul>
<li><a href="#org7fbd1f9">2.1. <code>qmckl_sm_naive</code></a>
<ul>
<li><a href="#org36b6e4b">2.1.1. Introduction</a></li>
<li><a href="#org53c2299">2.1.2. API</a></li>
<li><a href="#org720462d">2.1.3. Requirements</a></li>
<li><a href="#org39ba843">2.1.4. Pedagogical kernel source (in Fortran)</a>
<ul>
<li><a href="#org7fa2714">2.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
</ul>
</li>
<li><a href="#org74b40ca">2.1.5. C headers (exposed in qmckl.h)</a></li>
<li><a href="#orgc945a9f">2.1.6. C sources</a></li>
<li><a href="#org5ace91c">2.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
<li><a href="#orga8a843c">2.1.8. Performance</a></li>
<li><a href="#orge462592">2.1.9. Tests</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#orge99c98b">3. Sherman-Morrison with Slagel Splitting (core)</a>
<ul>
<li><a href="#orgc5401e9">3.1. <code>qmckl_sm_splitting_core</code></a>
<ul>
<li><a href="#org4334aec">3.1.1. Introduction</a></li>
<li><a href="#orgbae027c">3.1.2. API</a></li>
<li><a href="#org74af35a">3.1.3. Requirements</a></li>
<li><a href="#org1324114">3.1.4. Pedagogical kernel source (in Fortran)</a>
<ul>
<li><a href="#org1b24ef6">3.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
</ul>
</li>
<li><a href="#org853216d">3.1.5. C headers (exposed in qmckl.h)</a></li>
<li><a href="#org595caa3">3.1.6. C sources</a></li>
<li><a href="#org95b0bb0">3.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
<li><a href="#orga260ae6">3.1.8. Performance</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#org5987d99">4. Sherman-Morrison with Slagel Splitting</a>
<ul>
<li><a href="#orgcd920e2">4.1. <code>qmckl_sm_splitting</code></a>
<ul>
<li><a href="#org135c76a">4.1.1. Introduction</a></li>
<li><a href="#org3623c4d">4.1.2. API</a></li>
<li><a href="#orgcba34b6">4.1.3. Requirements</a></li>
<li><a href="#orgdaeff2e">4.1.4. Pedagogical kernel source (in Fortran)</a>
<ul>
<li><a href="#org6f60107">4.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
</ul>
</li>
<li><a href="#org58729df">4.1.5. C headers (exposed in qmckl.h)</a></li>
<li><a href="#org6736786">4.1.6. C source</a></li>
<li><a href="#org83d10c2">4.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
<li><a href="#orgc65cc5b">4.1.8. Performance&#x2026;</a></li>
<li><a href="#org224e869">4.1.9. Test</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#orgad6265b">5. End of files</a></li>
</ul>
</div>
</div>
<div id="outline-container-org8310c36" class="outline-2">
<h2 id="org8310c36"><span class="section-number-2">1</span> Headers</h2>
<div class="outline-text-2" id="text-1">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
<span style="color: #483d8b;">#ifdef</span> HAVE_CONFIG_H
<span style="color: #483d8b;"> #include</span> <span style="color: #8b2252;">"config.h"</span>
<span style="color: #483d8b;">#endif</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">&lt;math.h&gt;</span>
<span style="color: #228b22;">int</span> <span style="color: #0000ff;">main</span>() {
<span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>;
context = qmckl_context_create();
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
</pre>
</div>
<p>
This is the range that determines the how many high performance kernel instantces will be generated, using the C-function templates defined in the sections below. If the name of the C-function template is called <code>qmckl_kernel_{Dim}</code>, then <code>range(K, L+1)</code> will results in kernel instances from <code>qmckl_kernel_K</code> to <code>qmckl_kernel_L</code>.
</p>
</div>
</div>
<div id="outline-container-org68f9179" class="outline-2">
<h2 id="org68f9179"><span class="section-number-2">2</span> Naïve Sherman-Morrison</h2>
<div class="outline-text-2" id="text-2">
</div>
<div id="outline-container-org7fbd1f9" class="outline-3">
<h3 id="org7fbd1f9"><span class="section-number-3">2.1</span> <code>qmckl_sm_naive</code></h3>
<div class="outline-text-3" id="text-2-1">
</div>
<div id="outline-container-org36b6e4b" class="outline-4">
<h4 id="org36b6e4b"><span class="section-number-4">2.1.1</span> Introduction</h4>
<div class="outline-text-4" id="text-2-1-1">
<p>
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
</p>
<p>
#+TODO
Change the math notation so that the update vectors appear as row in the math
so that it is consistent with the representation in C (memory)
</p>
<p>
The formula for any update \(u_j\) (index \(j\) is suppresed for clarity) that is applied is
\[
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
\]
</p>
<p>
where
\(S\) is the Slater-matrix,
\(u\) and \(v^T\) are the column and row vectors containing the updates,
\(S^{-1}\) is the inverse of the Slater-matrix.
</p>
<p>
Even though the Slater-matrix \(S\) with all updates applied at once is invertable, during the course of applying
updates to the inverse Slater-matrix \(S^{-1}\) one-by-one it can happen that one of the intermediate inverse
matrices \(S^{-1}\) becomes singular. Therefore a global threshold value \(\epsilon\) is defined that is used to
evaluate each individual update \(u_j\) when it is applied.
</p>
<p>
This value sets the lower bound for which the
denominator \(1+v_j^TS^{-1}u_j\) is considered to be too small and will most probably result in a singular matrix
\(S\), or at least in an inverse of \(S\) of very poor numerical quality. Therefore, when \(1+v_j^TS^{-1}u_j \geq \epsilon\),
the update is applied as usual and the kernel exits with return code \texttt{QMCKL_SUCCESS}.
If \(1+v_j^TS^{-1}u_j \leq \epsilon\) the update is rejected and the kernel exits with return code \texttt{QMCKL_FAILURE}.
</p>
<p>
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
from applying the updates to the original matrix.
</p>
</div>
</div>
<div id="outline-container-org53c2299" class="outline-4">
<h4 id="org53c2299"><span class="section-number-4">2.1.2</span> API</h4>
<div class="outline-text-4" id="text-2-1-2">
<table id="orge1303df" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
<colgroup>
<col class="org-left" />
<col class="org-left" />
<col class="org-left" />
<col class="org-left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Variable</th>
<th scope="col" class="org-left">Type</th>
<th scope="col" class="org-left">In/Out</th>
<th scope="col" class="org-left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>context</code></td>
<td class="org-left"><code>qmckl_context</code></td>
<td class="org-left">in</td>
<td class="org-left">Global state</td>
</tr>
<tr>
<td class="org-left"><code>LDS</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">in</td>
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left"><code>Dim</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">in</td>
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left"><code>N_updates</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">in</td>
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left"><code>Updates</code></td>
<td class="org-left"><code>double[N_updates*LDS]</code></td>
<td class="org-left">in</td>
<td class="org-left">Array containing the updates</td>
</tr>
<tr>
<td class="org-left"><code>Updates_index</code></td>
<td class="org-left"><code>uint64_t[N_updates]</code></td>
<td class="org-left">in</td>
<td class="org-left">Array containing the rank-1 updates</td>
</tr>
<tr>
<td class="org-left"><code>breakdown</code></td>
<td class="org-left"><code>double</code></td>
<td class="org-left">in</td>
<td class="org-left">Break-down parameter on which to fail or not</td>
</tr>
<tr>
<td class="org-left"><code>Slater_inv</code></td>
<td class="org-left"><code>double[Dim*LDS]</code></td>
<td class="org-left">inout</td>
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
</tr>
<tr>
<td class="org-left"><code>determinant</code></td>
<td class="org-left"><code>double</code></td>
<td class="org-left">inout</td>
<td class="org-left">Determinant of the Slater-matrix</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="outline-container-org720462d" class="outline-4">
<h4 id="org720462d"><span class="section-number-4">2.1.3</span> Requirements</h4>
<div class="outline-text-4" id="text-2-1-3">
<ul class="org-ul">
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
<li><code>LDS &gt;= 2</code></li>
<li><code>Dim &gt;= 2</code></li>
<li><code>N_updates &gt;= 1</code></li>
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
<li><code>determinant &gt; 0</code></li>
</ul>
</div>
</div>
<div id="outline-container-org39ba843" class="outline-4">
<h4 id="org39ba843"><span class="section-number-4">2.1.4</span> Pedagogical kernel source (in Fortran)</h4>
<div class="outline-text-4" id="text-2-1-4">
<p>
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
not be used in real workloads.
</p>
<div class="org-src-container">
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_sm_naive_doc_f(context, </span><span style="color: #a020f0;">&amp;</span>
lds, dim, <span style="color: #a020f0;">&amp;</span>
nupdates, <span style="color: #a020f0;">&amp;</span>
upds, <span style="color: #a020f0;">&amp;</span>
updates_index, <span style="color: #a020f0;">&amp;</span>
breakdown, <span style="color: #a020f0;">&amp;</span>
s_inv, <span style="color: #a020f0;">&amp;</span>
determinant) <span style="color: #a020f0;">result</span>(info)
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(nupdates * lds)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds, nupdates) ::<span style="color: #a0522d;"> Updates</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, lds) ::<span style="color: #a0522d;"> Inverse</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> C</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds) ::<span style="color: #a0522d;"> D</span>
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> denominator, idenominator, update</span>
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, l, row</span>
info = QMCKL_FAILURE
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
info = QMCKL_INVALID_CONTEXT
<span style="color: #a020f0;">return</span>
<span style="color: #a020f0;">endif</span>
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
l = 1;
! <span style="color: #b22222;">For each update do...</span>
<span style="color: #a020f0;">do while</span> (l &lt; nupdates + 1)
! <span style="color: #b22222;">Compute C = S^{-1}U(l)</span>
<span style="color: #a020f0;">do</span> i = 1, dim
C(i) = 0
<span style="color: #a020f0;">do</span> j = 1, dim
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
<span style="color: #a020f0;">end do</span>
<span style="color: #a020f0;">end do</span>
! <span style="color: #b22222;">Compute denominator = 1 + V(l)^TC</span>
row = updates_index(l)
denominator = 1 + C(row)
! <span style="color: #b22222;">Return early if denominator is too small</span>
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(denominator) &lt; breakdown) <span style="color: #a020f0;">return</span>
idenominator = 1 / denominator
! <span style="color: #b22222;">Update det(S)</span>
determinant = determinant * denominator
! <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
D = Inverse(row, :)
! <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / denominator</span>
<span style="color: #a020f0;">do</span> i = 1, dim
<span style="color: #a020f0;">do</span> j = 1, dim
update = C(i) * D(j) * idenominator
Inverse(i, j) = Inverse(i, j) - update
<span style="color: #a020f0;">end do</span>
<span style="color: #a020f0;">end do</span>
l = l + 1
<span style="color: #a020f0;">end do</span>
! <span style="color: #b22222;">Copy updated inverse back to s_inv</span>
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
info = QMCKL_SUCCESS
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_naive_doc_f</span>
</pre>
</div>
</div>
<div id="outline-container-org7fa2714" class="outline-5">
<h5 id="org7fa2714"><span class="section-number-5">2.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
<div class="outline-text-5" id="text-2-1-4-1">
<p>
The following Fortran function <code>qmckl_sm_naive_doc</code> makes sure
that the pedagogical kernel <code>qmckl_sm_naive_doc_f</code>, written in
Fortran, can be called from C using the <code>ISO_C_BINDING</code>. The Fortran function <code>qmckl_sm_naive_doc</code> will be exposed in the header file 'qmckl.h'
for C users and in the module file 'qmckl<sub>f.F90</sub>' for Fortran users.
</p>
</div>
</div>
</div>
<div id="outline-container-org74b40ca" class="outline-4">
<h4 id="org74b40ca"><span class="section-number-4">2.1.5</span> C headers (exposed in qmckl.h)</h4>
<div class="outline-text-4" id="text-2-1-5">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_hpc</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_doc</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
</div>
</div>
<div id="outline-container-orgc945a9f" class="outline-4">
<h4 id="orgc945a9f"><span class="section-number-4">2.1.6</span> C sources</h4>
<div class="outline-text-4" id="text-2-1-6">
<p>
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">&lt;stdbool.h&gt;</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">&lt;math.h&gt;</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"config.h"</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"stdio.h"</span>
// <span style="color: #b22222;">Order important because</span>
// <span style="color: #b22222;">__GNUC__ also set in ICC, ICX and CLANG</span>
// <span style="color: #b22222;">__clang__ also set in ICX</span>
<span style="color: #483d8b;">#if</span> <span style="color: #483d8b;">defined</span>(__INTEL_COMPILER)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"ivdep"</span>)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span> _Pragma(<span style="color: #8b2252;">"vector aligned"</span>)
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__INTEL_LLVM_COMPILER)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"ivdep"</span>)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span> _Pragma(<span style="color: #8b2252;">"vector aligned"</span>)
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__clang__)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"clang loop vectorize(enable)"</span>)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span>
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__GNUC__)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"GCC ivdep"</span>)
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span>
<span style="color: #483d8b;">#endif</span>
</pre>
</div>
<p>
<code>qmckl_sm_naive_hpc</code> is a high performance variation of
<code>qmckl_sm_naive</code> written in C. It is used in cases when <code>Dim</code> is
smaller than the leading dimension <code>LDS</code>, irrespective of whetether <code>LDS</code>
includes zero padding to benefit from SIMD instructions or not. Cases like this
include situations where one wants to apply updates to a square submatrix of the
full matrix.
It takes advantage of memory aligned data and assumes no data dependencies
inside the loops. The loops are fully vectorised whenever <code>Dim</code> is an integer
multiple of <code>SIMD_LENGTH</code>.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_hpc</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_hpc"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[Dim];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x u_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
}
}
// <span style="color: #b22222;">Denominator: v_l^T * C</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown)
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
D[j] = Slater_inv[cui * LDS + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<p>
<code>qmckl_exit_code qmckl_sm_naive_{Dim}</code> is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively.
</p>
<div class="org-src-container">
<pre class="src src-c" id="org5669e0b"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sm_naive_</span>{Dim}(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_{Dim}"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D</span>{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[{Dim}];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; {Dim}; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D{Dim}_P; j++) {
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D{Dim}_P; j++) {
D[j] = Slater_inv[cui * D{Dim}_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; {Dim}; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D{Dim}_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D{Dim}_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<p>
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
</p>
<div class="org-src-container">
<pre class="src src-python" id="org07bc9d7"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
<span style="color: #8b2252;">static inline qmckl_exit_code qmckl_sm_naive_{Dim}(</span>
<span style="color: #8b2252;"> const qmckl_context context,</span>
<span style="color: #8b2252;"> const uint64_t N_updates,</span>
<span style="color: #8b2252;"> const double* __restrict Updates,</span>
<span style="color: #8b2252;"> const uint64_t* __restrict Updates_index,</span>
<span style="color: #8b2252;"> const double breakdown,</span>
<span style="color: #8b2252;"> double* __restrict Slater_inv,</span>
<span style="color: #8b2252;"> double* __restrict determinant) {</span>
<span style="color: #8b2252;"> if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {</span>
<span style="color: #8b2252;"> return qmckl_failwith(context,</span>
<span style="color: #8b2252;"> QMCKL_NULL_CONTEXT,</span>
<span style="color: #8b2252;"> "qmckl_sm_naive_{Dim}",</span>
<span style="color: #8b2252;"> NULL);</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> #define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
<span style="color: #8b2252;"> double __attribute__((aligned(8))) C[{Dim}];</span>
<span style="color: #8b2252;"> double __attribute__((aligned(8))) D[D{Dim}_P];</span>
<span style="color: #8b2252;"> uint64_t l = 0;</span>
<span style="color: #8b2252;"> // For each update</span>
<span style="color: #8b2252;"> while (l &lt; N_updates) {</span>
<span style="color: #8b2252;"> // C = A^{-1} x U_l</span>
<span style="color: #8b2252;"> for (uint64_t i = 0; i &lt; {Dim}; i++) {</span>
<span style="color: #8b2252;"> C[i] = 0;</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t j = 0; j &lt; D{Dim}_P; j++) {</span>
<span style="color: #8b2252;"> C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> // Denominator</span>
<span style="color: #8b2252;"> const int cui = Updates_index[l] - 1;</span>
<span style="color: #8b2252;"> double den = 1.0f + C[cui];</span>
<span style="color: #8b2252;"> if (fabs(den) &lt; breakdown) {</span>
<span style="color: #8b2252;"> return QMCKL_FAILURE;</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> double iden = 1.0f / den;</span>
<span style="color: #8b2252;"> // Update det(A)</span>
<span style="color: #8b2252;"> if (determinant)</span>
<span style="color: #8b2252;"> *determinant *= den;</span>
<span style="color: #8b2252;"> // selecting column: D = v_l^T * S_inv</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t j = 0; j &lt; D{Dim}_P; j++) {</span>
<span style="color: #8b2252;"> D[j] = Slater_inv[cui * D{Dim}_P + j];</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> // A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #8b2252;"> for (uint64_t i = 0; i &lt; {Dim}; i++) {</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t j = 0; j &lt; D{Dim}_P; j++) {</span>
<span style="color: #8b2252;"> double update = C[i] * D[j] * iden;</span>
<span style="color: #8b2252;"> Slater_inv[i * D{Dim}_P + j] -= update;</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> l += 1;</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> return QMCKL_SUCCESS;</span>
<span style="color: #8b2252;">}</span>
<span style="color: #8b2252;">"""</span>
<span style="color: #a0522d;">result</span> = []
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim))
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
</pre>
</div>
<p>
Python script that generated C switch cases that call individual kernel instances.
</p>
<div class="org-src-container">
<pre class="src src-python" id="orgb45090f"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
<span style="color: #8b2252;">case {Dim}: </span>
<span style="color: #8b2252;"> return qmckl_sm_naive_{Dim}(context,</span>
<span style="color: #8b2252;"> N_updates,</span>
<span style="color: #8b2252;"> Updates,</span>
<span style="color: #8b2252;"> Updates_index,</span>
<span style="color: #8b2252;"> breakdown,</span>
<span style="color: #8b2252;"> Slater_inv,</span>
<span style="color: #8b2252;"> determinant);"""</span>
<span style="color: #a0522d;">result</span> = []
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim))
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_2</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_2"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D2_P</span> ((1+(2-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 2; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D2_P; j++) {
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D2_P; j++) {
D[j] = Slater_inv[cui * D2_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 2; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D2_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D2_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_3</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_3"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D3_P</span> ((1+(3-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 3; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D3_P; j++) {
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D3_P; j++) {
D[j] = Slater_inv[cui * D3_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 3; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D3_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D3_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_4</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_4"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D4_P</span> ((1+(4-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[4];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 4; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D4_P; j++) {
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D4_P; j++) {
D[j] = Slater_inv[cui * D4_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 4; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D4_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D4_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_5</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_5"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D5_P</span> ((1+(5-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[5];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 5; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D5_P; j++) {
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D5_P; j++) {
D[j] = Slater_inv[cui * D5_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 5; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D5_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D5_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_6</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_6"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D6_P</span> ((1+(6-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[6];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 6; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D6_P; j++) {
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D6_P; j++) {
D[j] = Slater_inv[cui * D6_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 6; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D6_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D6_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_7</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_7"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D7_P</span> ((1+(7-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[7];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 7; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D7_P; j++) {
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D7_P; j++) {
D[j] = Slater_inv[cui * D7_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 7; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D7_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D7_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_8</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_8"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D8_P</span> ((1+(8-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[8];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 8; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D8_P; j++) {
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D8_P; j++) {
D[j] = Slater_inv[cui * D8_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 8; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D8_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D8_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_9</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_9"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D9_P</span> ((1+(9-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[9];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 9; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D9_P; j++) {
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D9_P; j++) {
D[j] = Slater_inv[cui * D9_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 9; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D9_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D9_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_10</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_10"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D10_P</span> ((1+(10-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[10];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 10; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D10_P; j++) {
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D10_P; j++) {
D[j] = Slater_inv[cui * D10_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 10; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D10_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D10_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_11</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_11"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D11_P</span> ((1+(11-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[11];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 11; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D11_P; j++) {
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D11_P; j++) {
D[j] = Slater_inv[cui * D11_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 11; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D11_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D11_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_12</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_12"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D12_P</span> ((1+(12-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[12];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 12; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D12_P; j++) {
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D12_P; j++) {
D[j] = Slater_inv[cui * D12_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 12; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D12_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D12_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_13</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_13"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D13_P</span> ((1+(13-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[13];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 13; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D13_P; j++) {
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D13_P; j++) {
D[j] = Slater_inv[cui * D13_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 13; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D13_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D13_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_14</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_14"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D14_P</span> ((1+(14-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[14];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 14; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D14_P; j++) {
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D14_P; j++) {
D[j] = Slater_inv[cui * D14_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 14; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D14_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D14_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_15</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_15"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D15_P</span> ((1+(15-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[15];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 15; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D15_P; j++) {
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D15_P; j++) {
D[j] = Slater_inv[cui * D15_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 15; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D15_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D15_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_16</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_16"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D16_P</span> ((1+(16-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[16];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 16; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D16_P; j++) {
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D16_P; j++) {
D[j] = Slater_inv[cui * D16_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 16; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D16_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D16_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_17</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_17"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D17_P</span> ((1+(17-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[17];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 17; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D17_P; j++) {
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D17_P; j++) {
D[j] = Slater_inv[cui * D17_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 17; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D17_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D17_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_18</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_18"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D18_P</span> ((1+(18-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[18];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 18; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D18_P; j++) {
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D18_P; j++) {
D[j] = Slater_inv[cui * D18_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 18; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D18_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D18_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_19</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_19"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D19_P</span> ((1+(19-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[19];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 19; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D19_P; j++) {
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D19_P; j++) {
D[j] = Slater_inv[cui * D19_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 19; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D19_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D19_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_20</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_20"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D20_P</span> ((1+(20-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[20];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 20; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D20_P; j++) {
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D20_P; j++) {
D[j] = Slater_inv[cui * D20_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 20; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D20_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D20_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_21</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive_21"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D21_P</span> ((1+(21-1)/SIMD_LENGTH)*SIMD_LENGTH)
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[21];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 21; i++) {
C[i] = 0;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D21_P; j++) {
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
// <span style="color: #b22222;">Update det(A)</span>
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D21_P; j++) {
D[j] = Slater_inv[cui * D21_P + j];
}
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 21; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D21_P; j++) {
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D21_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<p>
<code>qmckl_sm_naive</code> is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (<code>--enable-doc</code> and <code>--enable-hpc</code>) and the passed array dimensions <code>LDS</code> and <code>Dim</code>.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_naive"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
<span style="color: #a020f0;">switch</span> (Dim) {
<span style="color: #a020f0;">case</span> 2:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_2(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 3:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_3(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 4:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_4(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 5:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_5(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 6:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_6(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 7:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_7(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 8:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_8(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 9:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_9(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 10:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_10(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 11:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_11(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 12:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_12(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 13:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_13(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 14:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_14(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 15:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_15(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 16:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_16(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 17:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_17(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 18:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_18(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 19:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_19(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 20:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_20(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #a020f0;">case</span> 21:
<span style="color: #a020f0;">return</span> qmckl_sm_naive_21(context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
}
}
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
<span style="color: #a020f0;">return</span> qmckl_sm_naive_hpc(
context,
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
}
<span style="color: #483d8b;"> #else</span>
<span style="color: #a020f0;">return</span> qmckl_sm_naive_doc(
context,
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #483d8b;"> #endif</span>
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
</pre>
</div>
</div>
</div>
<div id="outline-container-org5ace91c" class="outline-4">
<h4 id="org5ace91c"><span class="section-number-4">2.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
<div class="outline-text-4" id="text-2-1-7">
</div>
</div>
<div id="outline-container-orga8a843c" class="outline-4">
<h4 id="orga8a843c"><span class="section-number-4">2.1.8</span> Performance</h4>
<div class="outline-text-4" id="text-2-1-8">
<p>
This function performs best when there is only 1 rank-1 update in the update cycle. It is
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
can never resolve a situation where applying the update causes singular behaviour.
</p>
</div>
</div>
<div id="outline-container-orge462592" class="outline-4">
<h4 id="orge462592"><span class="section-number-4">2.1.9</span> Tests</h4>
<div class="outline-text-4" id="text-2-1-9">
<p>
The tests for the kernels are executed on datasets that are extracted from a run of
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
The tests are run such that the kernels reject the computed inverse whenever the computed
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
QMC=Chem. The tests will return QMCKL<sub>SUCCESS</sub> whenever all the elements of the final matrix
\(R=S.S^-1 - 1\) are smaller than the given tolerance value of 1e-3, and will return
QMCKL<sub>FAILURE</sub> if the values are larger than this tolerance value.
</p>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span> = 21;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span> = (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span> = 1e-3;
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">tolerance</span> = 1e-3;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">res</span>[441];
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"sm_test.h"</span>
<span style="color: #0000ff;">assert</span>(Updates1 != <span style="color: #008b8b;">NULL</span>);
<span style="color: #0000ff;">assert</span>(Updates_index1 != <span style="color: #008b8b;">NULL</span>);
<span style="color: #0000ff;">assert</span>(Slater_inv1 != <span style="color: #008b8b;">NULL</span>);
// <span style="color: #b22222;">original determinant of Slater1 (before applying updates)</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = 3.407025646103221e-10;
rc = qmckl_sm_naive(context,
LDS,
Dim,
N_updates1,
Updates1,
Updates_index1,
breakdown,
Slater_inv1,
&amp;det);
// <span style="color: #b22222;">Check that the determinant is updated properly</span>
<span style="color: #0000ff;">assert</span>(fabs(det + 4.120398385068217e-10) &lt; 1e-15);
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
res[i * Dim + j] = 0;
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k &lt; Dim; k++) {
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
}
}
}
rc = QMCKL_SUCCESS;
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
<span style="color: #a020f0;">if</span> (i == j &amp;&amp; fabs(res[i * Dim + j] - 1) &gt; tolerance) {
rc = QMCKL_FAILURE;
}
<span style="color: #a020f0;">if</span> (i != j &amp;&amp; fabs(res[i * Dim + j]) &gt; tolerance) {
rc = QMCKL_FAILURE;
}
}
}
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
</pre>
</div>
</div>
</div>
</div>
</div>
<div id="outline-container-orge99c98b" class="outline-2">
<h2 id="orge99c98b"><span class="section-number-2">3</span> Sherman-Morrison with Slagel Splitting (core)</h2>
<div class="outline-text-2" id="text-3">
</div>
<div id="outline-container-orgc5401e9" class="outline-3">
<h3 id="orgc5401e9"><span class="section-number-3">3.1</span> <code>qmckl_sm_splitting_core</code></h3>
<div class="outline-text-3" id="text-3-1">
</div>
<div id="outline-container-org4334aec" class="outline-4">
<h4 id="org4334aec"><span class="section-number-4">3.1.1</span> Introduction</h4>
<div class="outline-text-4" id="text-3-1-1">
<p>
<code>qmckl_sm_splitting_core</code> is the inner core part of 'Sherman-Morrison with update splitting' in the next section.
It is not normally used by itself but it is possible to use it nonetheless.
</p>
<p>
It has three extra parameters in its API:
</p>
<ul class="org-ul">
<li><code>later_updates</code> initially empty array that will contain the second halves of updates that were split during kernel execution</li>
<li><code>later_index</code> initially empty array that will contain the row/column numbers of the updates that were split during execution</li>
<li><code>later</code> initially zero integer that records the number of updates that were split during exection.</li>
</ul>
<p>
It is up to the user to decide what to do with these updates once the kernel returns. Normally <code>qmckl_sm_splitting_core</code> is
used as the core part of a recursive function, as is done in <code>qmckl_sm_splitting</code> or as part of a more complex
kernel like <code>qmckl_sherman_morrison_smw32s</code>.
</p>
<p>
If the determinant is passed it will only be partially updated if there were any update splits.
</p>
</div>
</div>
<div id="outline-container-orgbae027c" class="outline-4">
<h4 id="orgbae027c"><span class="section-number-4">3.1.2</span> API</h4>
<div class="outline-text-4" id="text-3-1-2">
<table id="org61d5afd" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
<colgroup>
<col class="org-left" />
<col class="org-left" />
<col class="org-left" />
<col class="org-left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Variable</th>
<th scope="col" class="org-left">Type</th>
<th scope="col" class="org-left">In/Out</th>
<th scope="col" class="org-left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left"><code>context</code></td>
<td class="org-left"><code>qmckl_context</code></td>
<td class="org-left">in</td>
<td class="org-left">Global state</td>
</tr>
<tr>
<td class="org-left"><code>LDS</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">in</td>
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left"><code>Dim</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">in</td>
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left"><code>N_updates</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">in</td>
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left"><code>Updates</code></td>
<td class="org-left"><code>double[LDS*N_updates]</code></td>
<td class="org-left">in</td>
<td class="org-left">Array containing the rank-1 updates</td>
</tr>
<tr>
<td class="org-left"><code>Updates_index</code></td>
<td class="org-left"><code>uint64_t[N_updates]</code></td>
<td class="org-left">in</td>
<td class="org-left">Array containing positions of the rank-1 updates</td>
</tr>
<tr>
<td class="org-left"><code>breakdown</code></td>
<td class="org-left"><code>double</code></td>
<td class="org-left">in</td>
<td class="org-left">Break-down parameter on which to fail or not</td>
</tr>
<tr>
<td class="org-left"><code>Slater_inv</code></td>
<td class="org-left"><code>double[Dim*LDS]</code></td>
<td class="org-left">inout</td>
<td class="org-left">Array containing the inverse Slater-matrix</td>
</tr>
<tr>
<td class="org-left"><code>later_updates</code></td>
<td class="org-left"><code>double[LDS*N_updates]</code></td>
<td class="org-left">inout</td>
<td class="org-left">Array containing the split updates for later</td>
</tr>
<tr>
<td class="org-left"><code>later_index</code></td>
<td class="org-left"><code>uint64_t[N_updates]</code></td>
<td class="org-left">inout</td>
<td class="org-left">Array containing the positions of the split updates for later</td>
</tr>
<tr>
<td class="org-left"><code>later</code></td>
<td class="org-left"><code>uint64_t</code></td>
<td class="org-left">inout</td>
<td class="org-left">Number of split updates for later</td>
</tr>
<tr>
<td class="org-left"><code>determinant</code></td>
<td class="org-left"><code>double</code></td>
<td class="org-left">inout</td>
<td class="org-left">Determinant of the Slater-matrix</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="outline-container-org74af35a" class="outline-4">
<h4 id="org74af35a"><span class="section-number-4">3.1.3</span> Requirements</h4>
<div class="outline-text-4" id="text-3-1-3">
<ul class="org-ul">
<li><code>LDS &gt;= 2</code></li>
<li><code>Dim &gt;= 2</code></li>
<li><code>N_updates &gt;= 1</code></li>
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
<li><code>later_updates</code> is allocated with \(later \times Dim\) elements</li>
<li><code>later_index</code> is allocated with \(N_updates\) elements</li>
<li><code>later &gt;= 0</code></li>
</ul>
</div>
</div>
<div id="outline-container-org1324114" class="outline-4">
<h4 id="org1324114"><span class="section-number-4">3.1.4</span> Pedagogical kernel source (in Fortran)</h4>
<div class="outline-text-4" id="text-3-1-4">
<p>
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
not be used in real workloads.
</p>
<div class="org-src-container">
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_sm_splitting_core_doc_f( </span><span style="color: #a020f0;">&amp;</span>
context, <span style="color: #a020f0;">&amp;</span>
lds, dim, <span style="color: #a020f0;">&amp;</span>
nupdates, <span style="color: #a020f0;">&amp;</span>
upds, <span style="color: #a020f0;">&amp;</span>
updates_index, <span style="color: #a020f0;">&amp;</span>
breakdown, <span style="color: #a020f0;">&amp;</span>
s_inv, <span style="color: #a020f0;">&amp;</span>
later_upds, <span style="color: #a020f0;">&amp;</span>
Later_index, <span style="color: #a020f0;">&amp;</span>
Later, <span style="color: #a020f0;">&amp;</span>
determinant) <span style="color: #a020f0;">result</span>(info)
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(lds * nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> Later</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> Later_index(nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> later_upds(lds * nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds, nupdates) ::<span style="color: #a0522d;"> Updates</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds, nupdates) ::<span style="color: #a0522d;"> Later_updates</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, lds) ::<span style="color: #a0522d;"> Inverse</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> C</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds) ::<span style="color: #a0522d;"> D</span>
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> denominator, idenominator, update</span>
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, l, row</span>
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Entering 'qmckl_sm_splittinig_core_doc_f'"</span>
info = QMCKL_FAILURE
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
info = QMCKL_INVALID_CONTEXT
<span style="color: #a020f0;">return</span>
<span style="color: #a020f0;">endif</span>
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
l = 1;
! <span style="color: #b22222;">For each update do...</span>
<span style="color: #a020f0;">do while</span> (l &lt; nupdates + 1)
! <span style="color: #b22222;">Compute C = S^{-1}U(l)</span>
<span style="color: #a020f0;">do</span> i = 1, dim
C(i) = 0
<span style="color: #a020f0;">do</span> j = 1, dim
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
<span style="color: #a020f0;">end do</span>
<span style="color: #a020f0;">end do</span>
! <span style="color: #b22222;">Compute denominator = 1 + V(l)^TC</span>
row = updates_index(l)
denominator = 1 + C(row)
! <span style="color: #b22222;">If denominator is too close to zero:</span>
! <span style="color: #b22222;">- Split update in 2 before storing in Later_updates</span>
! <span style="color: #b22222;">- Split previously computed vector C in 2</span>
! <span style="color: #b22222;">- Recompute the denominator</span>
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(denominator) &lt; breakdown) <span style="color: #a020f0;">then</span>
<span style="color: #a020f0;">do</span> i = 1, dim
Later_updates(i, l) = Updates(i, l) / 2
C(i) = C(i) / 2
<span style="color: #a020f0;">end do</span>
Later_index(Later + 1) = updates_index(l)
Later = Later + 1
denominator = 1 + C(row)
<span style="color: #a020f0;">end if</span>
idenominator = 1 / denominator
! <span style="color: #b22222;">Update det(S)</span>
determinant = determinant * denominator
! <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
D = Inverse(row, :)
! <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / denominator</span>
<span style="color: #a020f0;">do</span> i = 1, dim
<span style="color: #a020f0;">do</span> j = 1, dim
update = C(i) * D(j) * idenominator
Inverse(i, j) = Inverse(i, j) - update
<span style="color: #a020f0;">end do</span>
<span style="color: #a020f0;">end do</span>
l = l + 1
<span style="color: #a020f0;">end do</span>
! <span style="color: #b22222;">Copy updated inverse and later updates</span>
! <span style="color: #b22222;">back to s_inv and later_upds</span>
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_lu</span>(Later_Updates, later_upds, lds, nupdates)
info = QMCKL_SUCCESS
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Leaving 'qmckl_sm_splittinig_core_doc_f'"</span>
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_doc_f</span>
</pre>
</div>
</div>
<div id="outline-container-org1b24ef6" class="outline-5">
<h5 id="org1b24ef6"><span class="section-number-5">3.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
<div class="outline-text-5" id="text-3-1-4-1">
<p>
The function <code>qmckl_sm_splitting_core_doc</code> makes sure that
<code>qmckl_sm_splitting_core_doc_f</code> can be called from C using the
<code>ISO_C_BINDING</code>. Function <code>qmckl_sm_splitting_core_doc</code> will be
exposed in <code>qmckl.h</code> and <code>qmckl_f.F90</code>, but
<code>qmckl_sm_splitting_core_doc_f</code> will not.
</p>
</div>
</div>
</div>
<div id="outline-container-org853216d" class="outline-4">
<h4 id="org853216d"><span class="section-number-4">3.1.5</span> C headers (exposed in qmckl.h)</h4>
<div class="outline-text-4" id="text-3-1-5">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_hpc</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_doc</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
</div>
</div>
<div id="outline-container-org595caa3" class="outline-4">
<h4 id="org595caa3"><span class="section-number-4">3.1.6</span> C sources</h4>
<div class="outline-text-4" id="text-3-1-6">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_hpc</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_hpc"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[LDS];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; LDS; i++) {
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x LDS</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; LDS; j++) {
D[j] = Slater_inv[cui * LDS + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; LDS; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c" id="org6d5b0f9"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sm_splitting_core_</span>{Dim}(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_{Dim}"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D{Dim}_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; {Dim}; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D{Dim}_P; j++) {
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D{Dim}_P; i++) {
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D{Dim}_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D{Dim}_P; j++) {
D[j] = Slater_inv[cui * D{Dim}_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; {Dim}; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D{Dim}_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D{Dim}_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-python" id="org83b4f54"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
<span style="color: #8b2252;">static inline qmckl_exit_code qmckl_sm_splitting_core_{Dim}(</span>
<span style="color: #8b2252;"> const qmckl_context context,</span>
<span style="color: #8b2252;"> uint64_t N_updates,</span>
<span style="color: #8b2252;"> const double* __restrict Updates,</span>
<span style="color: #8b2252;"> const uint64_t* __restrict Updates_index,</span>
<span style="color: #8b2252;"> const double breakdown,</span>
<span style="color: #8b2252;"> double* __restrict Slater_inv,</span>
<span style="color: #8b2252;"> double* __restrict later_updates,</span>
<span style="color: #8b2252;"> uint64_t* __restrict later_index,</span>
<span style="color: #8b2252;"> uint64_t* __restrict later,</span>
<span style="color: #8b2252;"> double* __restrict determinant) {</span>
<span style="color: #8b2252;"> if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {</span>
<span style="color: #8b2252;"> return qmckl_failwith(</span>
<span style="color: #8b2252;"> context,</span>
<span style="color: #8b2252;"> QMCKL_NULL_CONTEXT,</span>
<span style="color: #8b2252;"> "qmckl_sm_splitting_core_{Dim}",</span>
<span style="color: #8b2252;"> NULL);</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> double __attribute__((aligned(8))) C[D{Dim}_P];</span>
<span style="color: #8b2252;"> double __attribute__((aligned(8))) D[D{Dim}_P];</span>
<span style="color: #8b2252;"> uint64_t l = 0;</span>
<span style="color: #8b2252;"> // For each update</span>
<span style="color: #8b2252;"> while (l &lt; N_updates) {</span>
<span style="color: #8b2252;"> // C = S^{-1} x U_l</span>
<span style="color: #8b2252;"> for (uint64_t i = 0; i &lt; {Dim}; i++) {</span>
<span style="color: #8b2252;"> C[i] = 0.0f;</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t j = 0; j &lt; D{Dim}_P; j++) {</span>
<span style="color: #8b2252;"> C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> // Denominator</span>
<span style="color: #8b2252;"> const int cui = Updates_index[l] - 1;</span>
<span style="color: #8b2252;"> double den = 1.0f + C[cui];</span>
<span style="color: #8b2252;"> if (fabs(den) &lt; breakdown) {</span>
<span style="color: #8b2252;"> // U_l = U_l / 2: split the update in 2 equal halves and save the</span>
<span style="color: #8b2252;"> // second halve in later_updates</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t i = 0; i &lt; D{Dim}_P; i++) {</span>
<span style="color: #8b2252;"> later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;</span>
<span style="color: #8b2252;"> C[i] *= 0.5f;</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> later_index[*later] = Updates_index[l];</span>
<span style="color: #8b2252;"> (*later)++;</span>
<span style="color: #8b2252;"> den = 1.0f + C[cui];</span>
<span style="color: #8b2252;"> } // From here onwards we continue with applying the first halve of the</span>
<span style="color: #8b2252;"> // update to Slater_inv</span>
<span style="color: #8b2252;"> double iden = 1.0f / den;</span>
<span style="color: #8b2252;"> if (determinant)</span>
<span style="color: #8b2252;"> *determinant *= den;</span>
<span style="color: #8b2252;"> // D = v^T x S^{-1} : 1 x D{Dim}_P</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t j = 0; j &lt; D{Dim}_P; j++) {</span>
<span style="color: #8b2252;"> D[j] = Slater_inv[cui * D{Dim}_P + j];</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> // S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #8b2252;"> for (uint64_t i = 0; i &lt; {Dim}; i++) {</span>
<span style="color: #8b2252;"> IVDEP</span>
<span style="color: #8b2252;"> ALIGNED</span>
<span style="color: #8b2252;"> for (uint64_t j = 0; j &lt; D{Dim}_P; j++) {</span>
<span style="color: #8b2252;"> const double update = C[i] * D[j] * iden;</span>
<span style="color: #8b2252;"> Slater_inv[i * D{Dim}_P + j] -= update;</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> l += 1;</span>
<span style="color: #8b2252;"> }</span>
<span style="color: #8b2252;"> return QMCKL_SUCCESS;</span>
<span style="color: #8b2252;">}</span>
<span style="color: #8b2252;">"""</span>
<span style="color: #a0522d;">result</span> = []
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
</pre>
</div>
<div class="org-src-container">
<pre class="src src-python" id="org328c849"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
<span style="color: #8b2252;">case {Dim}: {</span>
<span style="color: #8b2252;"> return qmckl_sm_splitting_core_{Dim}(</span>
<span style="color: #8b2252;"> context,</span>
<span style="color: #8b2252;"> N_updates,</span>
<span style="color: #8b2252;"> Updates,</span>
<span style="color: #8b2252;"> Updates_index,</span>
<span style="color: #8b2252;"> breakdown,</span>
<span style="color: #8b2252;"> Slater_inv,</span>
<span style="color: #8b2252;"> later_updates,</span>
<span style="color: #8b2252;"> later_index,</span>
<span style="color: #8b2252;"> later,</span>
<span style="color: #8b2252;"> determinant);</span>
<span style="color: #8b2252;"> break;</span>
<span style="color: #8b2252;">}"""</span>
<span style="color: #a0522d;">result</span> = []
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_2</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_2"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D2_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 2; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D2_P; j++) {
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D2_P; i++) {
later_updates[*later * D2_P + i] = Updates[l * D2_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D2_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D2_P; j++) {
D[j] = Slater_inv[cui * D2_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 2; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D2_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D2_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_3</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_3"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D3_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 3; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D3_P; j++) {
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D3_P; i++) {
later_updates[*later * D3_P + i] = Updates[l * D3_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D3_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D3_P; j++) {
D[j] = Slater_inv[cui * D3_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 3; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D3_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D3_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_4</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_4"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D4_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 4; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D4_P; j++) {
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D4_P; i++) {
later_updates[*later * D4_P + i] = Updates[l * D4_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D4_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D4_P; j++) {
D[j] = Slater_inv[cui * D4_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 4; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D4_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D4_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_5</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_5"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D5_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 5; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D5_P; j++) {
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D5_P; i++) {
later_updates[*later * D5_P + i] = Updates[l * D5_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D5_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D5_P; j++) {
D[j] = Slater_inv[cui * D5_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 5; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D5_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D5_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_6</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_6"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D6_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 6; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D6_P; j++) {
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D6_P; i++) {
later_updates[*later * D6_P + i] = Updates[l * D6_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D6_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D6_P; j++) {
D[j] = Slater_inv[cui * D6_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 6; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D6_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D6_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_7</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_7"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D7_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 7; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D7_P; j++) {
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D7_P; i++) {
later_updates[*later * D7_P + i] = Updates[l * D7_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D7_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D7_P; j++) {
D[j] = Slater_inv[cui * D7_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 7; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D7_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D7_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_8</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_8"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D8_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 8; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D8_P; j++) {
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D8_P; i++) {
later_updates[*later * D8_P + i] = Updates[l * D8_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D8_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D8_P; j++) {
D[j] = Slater_inv[cui * D8_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 8; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D8_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D8_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_9</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_9"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D9_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 9; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D9_P; j++) {
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D9_P; i++) {
later_updates[*later * D9_P + i] = Updates[l * D9_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D9_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D9_P; j++) {
D[j] = Slater_inv[cui * D9_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 9; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D9_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D9_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_10</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_10"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D10_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 10; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D10_P; j++) {
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D10_P; i++) {
later_updates[*later * D10_P + i] = Updates[l * D10_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D10_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D10_P; j++) {
D[j] = Slater_inv[cui * D10_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 10; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D10_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D10_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_11</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_11"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D11_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 11; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D11_P; j++) {
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D11_P; i++) {
later_updates[*later * D11_P + i] = Updates[l * D11_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D11_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D11_P; j++) {
D[j] = Slater_inv[cui * D11_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 11; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D11_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D11_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_12</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_12"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D12_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 12; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D12_P; j++) {
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D12_P; i++) {
later_updates[*later * D12_P + i] = Updates[l * D12_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D12_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D12_P; j++) {
D[j] = Slater_inv[cui * D12_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 12; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D12_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D12_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_13</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_13"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D13_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 13; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D13_P; j++) {
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D13_P; i++) {
later_updates[*later * D13_P + i] = Updates[l * D13_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D13_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D13_P; j++) {
D[j] = Slater_inv[cui * D13_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 13; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D13_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D13_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_14</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_14"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D14_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 14; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D14_P; j++) {
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D14_P; i++) {
later_updates[*later * D14_P + i] = Updates[l * D14_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D14_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D14_P; j++) {
D[j] = Slater_inv[cui * D14_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 14; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D14_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D14_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_15</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_15"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D15_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 15; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D15_P; j++) {
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D15_P; i++) {
later_updates[*later * D15_P + i] = Updates[l * D15_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D15_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D15_P; j++) {
D[j] = Slater_inv[cui * D15_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 15; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D15_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D15_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_16</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_16"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D16_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 16; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D16_P; j++) {
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D16_P; i++) {
later_updates[*later * D16_P + i] = Updates[l * D16_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D16_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D16_P; j++) {
D[j] = Slater_inv[cui * D16_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 16; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D16_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D16_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_17</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_17"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D17_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 17; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D17_P; j++) {
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D17_P; i++) {
later_updates[*later * D17_P + i] = Updates[l * D17_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D17_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D17_P; j++) {
D[j] = Slater_inv[cui * D17_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 17; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D17_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D17_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_18</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_18"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D18_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 18; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D18_P; j++) {
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D18_P; i++) {
later_updates[*later * D18_P + i] = Updates[l * D18_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D18_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D18_P; j++) {
D[j] = Slater_inv[cui * D18_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 18; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D18_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D18_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_19</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_19"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D19_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 19; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D19_P; j++) {
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D19_P; i++) {
later_updates[*later * D19_P + i] = Updates[l * D19_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D19_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D19_P; j++) {
D[j] = Slater_inv[cui * D19_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 19; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D19_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D19_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_20</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_20"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D20_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 20; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D20_P; j++) {
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D20_P; i++) {
later_updates[*later * D20_P + i] = Updates[l * D20_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D20_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D20_P; j++) {
D[j] = Slater_inv[cui * D20_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 20; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D20_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D20_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_21</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_core_21"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D21_P];
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
// <span style="color: #b22222;">For each update</span>
<span style="color: #a020f0;">while</span> (l &lt; N_updates) {
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 21; i++) {
C[i] = 0.0f;
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D21_P; j++) {
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
}
}
// <span style="color: #b22222;">Denominator</span>
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
<span style="color: #a020f0;">if</span> (fabs(den) &lt; breakdown) {
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
// <span style="color: #b22222;">second halve in later_updates</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; D21_P; i++) {
later_updates[*later * D21_P + i] = Updates[l * D21_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
// <span style="color: #b22222;">update to Slater_inv</span>
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
<span style="color: #a020f0;">if</span> (determinant)
*determinant *= den;
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D21_P</span>
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D21_P; j++) {
D[j] = Slater_inv[cui * D21_P + j];
}
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i &lt; 21; i++) {
<span style="color: #228b22;">IVDEP</span>
<span style="color: #0000ff;">ALIGNED</span>
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j &lt; D21_P; j++) {
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
Slater_inv[i * D21_P + j] -= update;
}
}
l += 1;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
<span style="color: #a020f0;">switch</span> (Dim) {
<span style="color: #a020f0;">case</span> 2: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_2(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 3: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_3(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 4: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_4(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 5: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_5(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 6: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_6(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 7: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_7(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 8: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_8(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 9: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_9(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 10: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_10(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 11: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_11(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 12: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_12(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 13: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_13(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 14: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_14(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 15: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_15(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 16: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_16(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 17: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_17(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 18: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_18(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 19: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_19(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 20: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_20(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 21: {
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_21(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">default</span>: {
assert(0 == 1 &amp;&amp; <span style="color: #8b2252;">"TEMPLATE NOT IMPLEMENTED!"</span>);
<span style="color: #a020f0;">break</span>;
}
}
}
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_hpc(
context,
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
}
<span style="color: #483d8b;"> #else</span>
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_doc(
context,
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
<span style="color: #483d8b;"> #endif</span>
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
</pre>
</div>
</div>
</div>
<div id="outline-container-org95b0bb0" class="outline-4">
<h4 id="org95b0bb0"><span class="section-number-4">3.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
<div class="outline-text-4" id="text-3-1-7">
</div>
</div>
<div id="outline-container-orga260ae6" class="outline-4">
<h4 id="orga260ae6"><span class="section-number-4">3.1.8</span> Performance</h4>
<div class="outline-text-4" id="text-3-1-8">
<p>
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
</p>
</div>
</div>
</div>
</div>
<div id="outline-container-org5987d99" class="outline-2">
<h2 id="org5987d99"><span class="section-number-2">4</span> Sherman-Morrison with Slagel Splitting</h2>
<div class="outline-text-2" id="text-4">
</div>
<div id="outline-container-orgcd920e2" class="outline-3">
<h3 id="orgcd920e2"><span class="section-number-3">4.1</span> <code>qmckl_sm_splitting</code></h3>
<div class="outline-text-3" id="text-4-1">
</div>
<div id="outline-container-org135c76a" class="outline-4">
<h4 id="org135c76a"><span class="section-number-4">4.1.1</span> Introduction</h4>
<div class="outline-text-4" id="text-4-1-1">
<p>
This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator \(1+v_j^T S^{-1} u_j\) in
the Sherman-Morrison formula is deemed to be too close to zero, the update \(u_j\) is split in half:
\(u_j \rightarrow \frac{1}{2} u_j\). One half is applied immediately &#x2013;necessarily increasing the value of the
denominator because of the split&#x2013; while the other halve is put in a queue that will be applied when all the
remaining updates have been treated.
</p>
<p>
The kernel is executed recursively until the queue is eiter empty and all
updates are applied successfully, or the size of the queue equals the number of initial updates. In the last
case the Slater-matrix that would have resulted from applying the updates is singular and therefore the
kernel exits with an exit code.
</p>
<p>
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
from applying the updates to the original matrix.
</p>
</div>
</div>
<div id="outline-container-org3623c4d" class="outline-4">
<h4 id="org3623c4d"><span class="section-number-4">4.1.2</span> API</h4>
<div class="outline-text-4" id="text-4-1-2">
<table id="org91022e0" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
<colgroup>
<col class="org-left" />
<col class="org-left" />
<col class="org-left" />
<col class="org-left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="org-left">Variable</th>
<th scope="col" class="org-left">Type</th>
<th scope="col" class="org-left">In/Out</th>
<th scope="col" class="org-left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td class="org-left">context</td>
<td class="org-left">qmckl<sub>context</sub></td>
<td class="org-left">in</td>
<td class="org-left">Global state</td>
</tr>
<tr>
<td class="org-left">LDS</td>
<td class="org-left">uint64<sub>t</sub></td>
<td class="org-left">in</td>
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left">Dim</td>
<td class="org-left">uint64<sub>t</sub></td>
<td class="org-left">in</td>
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left">N<sub>updates</sub></td>
<td class="org-left">uint64<sub>t</sub></td>
<td class="org-left">in</td>
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
</tr>
<tr>
<td class="org-left">Updates</td>
<td class="org-left">double[N<sub>updates</sub>*LDS]</td>
<td class="org-left">in</td>
<td class="org-left">Array containing the updates</td>
</tr>
<tr>
<td class="org-left">Updates<sub>index</sub></td>
<td class="org-left">uint64<sub>t</sub>[N<sub>updates</sub>]</td>
<td class="org-left">in</td>
<td class="org-left">Array containing the rank-1 updates</td>
</tr>
<tr>
<td class="org-left">breakdown</td>
<td class="org-left">double</td>
<td class="org-left">in</td>
<td class="org-left">Break-down parameter on which to fail or not</td>
</tr>
<tr>
<td class="org-left">Slater<sub>inv</sub></td>
<td class="org-left">double[Dim*LDS]</td>
<td class="org-left">inout</td>
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
</tr>
<tr>
<td class="org-left">determinant</td>
<td class="org-left">double</td>
<td class="org-left">inout</td>
<td class="org-left">Determinant of the Slater-matrix</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="outline-container-orgcba34b6" class="outline-4">
<h4 id="orgcba34b6"><span class="section-number-4">4.1.3</span> Requirements</h4>
<div class="outline-text-4" id="text-4-1-3">
<ul class="org-ul">
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
<li><code>LDS &gt;= 2</code></li>
<li><code>Dim &gt;= 2</code></li>
<li><code>N_updates &gt;= 1</code></li>
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
</ul>
</div>
</div>
<div id="outline-container-orgdaeff2e" class="outline-4">
<h4 id="orgdaeff2e"><span class="section-number-4">4.1.4</span> Pedagogical kernel source (in Fortran)</h4>
<div class="outline-text-4" id="text-4-1-4">
<p>
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
not be used in real workloads.
</p>
<div class="org-src-container">
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> recursive function qmckl_sm_splitting_doc_f( </span><span style="color: #a020f0;">&amp;</span>
context, <span style="color: #a020f0;">&amp;</span>
lds, dim, <span style="color: #a020f0;">&amp;</span>
nupdates, <span style="color: #a020f0;">&amp;</span>
upds, <span style="color: #a020f0;">&amp;</span>
updates_index, <span style="color: #a020f0;">&amp;</span>
breakdown, <span style="color: #a020f0;">&amp;</span>
s_inv, <span style="color: #a020f0;">&amp;</span>
determinant) <span style="color: #a020f0;">result</span>(info)
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(lds * nupdates)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
<span style="color: #228b22;">integer</span> , <span style="color: #a020f0;">external</span> ::<span style="color: #a0522d;"> qmckl_sm_splitting_core_doc_f</span>
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> Later</span>
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(nupdates) ::<span style="color: #a0522d;"> Later_index</span>
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds * nupdates) ::<span style="color: #a0522d;"> Later_updates</span>
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Entering 'qmckl_sm_splitting_doc_f'"</span>
info = QMCKL_FAILURE
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
info = QMCKL_INVALID_CONTEXT
<span style="color: #a020f0;">return</span>
<span style="color: #a020f0;">endif</span>
Later = 0
Later_index = 0
Later_updates = 0
info = qmckl_sm_splitting_core_doc_f( <span style="color: #a020f0;">&amp;</span>
context, <span style="color: #a020f0;">&amp;</span>
lds, dim, <span style="color: #a020f0;">&amp;</span>
nupdates, <span style="color: #a020f0;">&amp;</span>
upds, <span style="color: #a020f0;">&amp;</span>
updates_index, <span style="color: #a020f0;">&amp;</span>
breakdown, <span style="color: #a020f0;">&amp;</span>
s_inv, <span style="color: #a020f0;">&amp;</span>
Later_updates, <span style="color: #a020f0;">&amp;</span>
Later_index, <span style="color: #a020f0;">&amp;</span>
Later, <span style="color: #a020f0;">&amp;</span>
determinant)
<span style="color: #a020f0;">if</span> (Later &gt; 0) <span style="color: #a020f0;">then</span>
info = qmckl_sm_splitting_doc_f( <span style="color: #a020f0;">&amp;</span>
context, <span style="color: #a020f0;">&amp;</span>
lds, dim, <span style="color: #a020f0;">&amp;</span>
Later, <span style="color: #a020f0;">&amp;</span>
Later_updates, <span style="color: #a020f0;">&amp;</span>
Later_index, <span style="color: #a020f0;">&amp;</span>
breakdown, <span style="color: #a020f0;">&amp;</span>
s_inv, <span style="color: #a020f0;">&amp;</span>
determinant)
<span style="color: #a020f0;">end if</span>
info = QMCKL_SUCCESS
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Leaving 'qmckl_sm_splitting_doc_f'"</span>
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_splitting_doc_f</span>
</pre>
</div>
</div>
<div id="outline-container-org6f60107" class="outline-5">
<h5 id="org6f60107"><span class="section-number-5">4.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
<div class="outline-text-5" id="text-4-1-4-1">
<p>
The following Fortran function <code>qmckl_sm_splitting_core_doc</code> makes sure
that the pedagogical kernel <code>qmckl_sm_splitting_core_doc_f</code>, written in
Fortran, can be called from C using the <code>ISO_C_BINDING</code>. The Fortran function
<code>qmckl_sm_splitting_core_doc</code> will be exposed in the header file 'qmckl.h'
for C users and in the module file 'qmckl<sub>f.F90</sub>' for Fortran users.
</p>
</div>
</div>
</div>
<div id="outline-container-org58729df" class="outline-4">
<h4 id="org58729df"><span class="section-number-4">4.1.5</span> C headers (exposed in qmckl.h)</h4>
<div class="outline-text-4" id="text-4-1-5">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_hpc</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_doc</span> (
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
</pre>
</div>
</div>
</div>
<div id="outline-container-org6736786" class="outline-4">
<h4 id="org6736786"><span class="section-number-4">4.1.6</span> C source</h4>
<div class="outline-text-4" id="text-4-1-6">
<div class="org-src-container">
<pre class="src src-python" id="orgbf50b14"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
<span style="color: #8b2252;">case {Dim}: {</span>
<span style="color: #8b2252;"> rc = qmckl_sm_splitting_core_{Dim}(</span>
<span style="color: #8b2252;"> context,</span>
<span style="color: #8b2252;"> N_updates,</span>
<span style="color: #8b2252;"> Updates,</span>
<span style="color: #8b2252;"> Updates_index,</span>
<span style="color: #8b2252;"> breakdown,</span>
<span style="color: #8b2252;"> Slater_inv,</span>
<span style="color: #8b2252;"> later_updates,</span>
<span style="color: #8b2252;"> later_index, &amp;later, determinant);</span>
<span style="color: #8b2252;"> break;</span>
<span style="color: #8b2252;">}</span>
<span style="color: #8b2252;">"""</span>
<span style="color: #a0522d;">result</span> = []
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">'\n'</span>.join(result)
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_hpc</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting_hpc"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">later_updates</span>[LDS * N_updates];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later_index</span>[N_updates];
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later</span> = 0;
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) {
<span style="color: #a020f0;">switch</span> (Dim) {
<span style="color: #a020f0;">case</span> 2: {
rc = qmckl_sm_splitting_core_2(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 3: {
rc = qmckl_sm_splitting_core_3(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 4: {
rc = qmckl_sm_splitting_core_4(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 5: {
rc = qmckl_sm_splitting_core_5(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 6: {
rc = qmckl_sm_splitting_core_6(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 7: {
rc = qmckl_sm_splitting_core_7(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 8: {
rc = qmckl_sm_splitting_core_8(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 9: {
rc = qmckl_sm_splitting_core_9(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 10: {
rc = qmckl_sm_splitting_core_10(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 11: {
rc = qmckl_sm_splitting_core_11(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 12: {
rc = qmckl_sm_splitting_core_12(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 13: {
rc = qmckl_sm_splitting_core_13(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 14: {
rc = qmckl_sm_splitting_core_14(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 15: {
rc = qmckl_sm_splitting_core_15(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 16: {
rc = qmckl_sm_splitting_core_16(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 17: {
rc = qmckl_sm_splitting_core_17(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 18: {
rc = qmckl_sm_splitting_core_18(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 19: {
rc = qmckl_sm_splitting_core_19(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 20: {
rc = qmckl_sm_splitting_core_20(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">case</span> 21: {
rc = qmckl_sm_splitting_core_21(
context,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index, &amp;later, determinant);
<span style="color: #a020f0;">break</span>;
}
<span style="color: #a020f0;">default</span>: {
assert(0 == 1 &amp;&amp; <span style="color: #8b2252;">"TEMPLATE NOT IMPLEMENTED!"</span>);
<span style="color: #a020f0;">break</span>;
}
}
} <span style="color: #a020f0;">else</span> {
rc = qmckl_sm_splitting_core_hpc(
context, LDS, Dim, N_updates, Updates, Updates_index,
breakdown, Slater_inv, later_updates,
later_index, &amp;later, determinant);
}
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
<span style="color: #a020f0;">if</span> (later &gt; 0) {
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_sm_splitting_hpc(
context, LDS, Dim, later,
later_updates, later_index,
breakdown, Slater_inv, determinant);
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
}
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
<div class="org-src-container">
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting</span>(
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
printf(<span style="color: #8b2252;">"Entering 'qmckl_sm_splitting'\n"</span>);
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
<span style="color: #a020f0;">return</span> qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
<span style="color: #8b2252;">"qmckl_sm_splitting"</span>,
<span style="color: #008b8b;">NULL</span>);
}
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_hpc(
context,
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #483d8b;"> #else</span>
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_doc(
context,
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
<span style="color: #483d8b;"> #endif</span>
printf(<span style="color: #8b2252;">"Leaving 'qmckl_sm_splitting'\n"</span>);
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
}
</pre>
</div>
</div>
</div>
<div id="outline-container-org83d10c2" class="outline-4">
<h4 id="org83d10c2"><span class="section-number-4">4.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
<div class="outline-text-4" id="text-4-1-7">
</div>
</div>
<div id="outline-container-orgc65cc5b" class="outline-4">
<h4 id="orgc65cc5b"><span class="section-number-4">4.1.8</span> Performance&#x2026;</h4>
<div class="outline-text-4" id="text-4-1-8">
<p>
This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high.
</p>
</div>
</div>
<div id="outline-container-org224e869" class="outline-4">
<h4 id="org224e869"><span class="section-number-4">4.1.9</span> Test</h4>
<div class="outline-text-4" id="text-4-1-9">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #0000ff;">assert</span>(Updates3 != <span style="color: #008b8b;">NULL</span>);
<span style="color: #0000ff;">assert</span>(Updates_index3 != <span style="color: #008b8b;">NULL</span>);
<span style="color: #0000ff;">assert</span>(Slater_inv3_2 != <span style="color: #008b8b;">NULL</span>);
det = -1.23743195512859e-09;
rc = qmckl_sm_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &amp;det);
<span style="color: #0000ff;">assert</span>(fabs(det - 1.602708950725074e-10) &lt; 1e-15);
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
res[i * Dim + j] = 0;
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k &lt; Dim; k++) {
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
}
}
}
rc = QMCKL_SUCCESS;
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i &lt; Dim; i++) {
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j &lt; Dim; j++) {
<span style="color: #a020f0;">if</span> (i == j &amp;&amp; fabs(res[i * Dim + j] - 1) &gt; tolerance) {
rc = QMCKL_FAILURE;
}
<span style="color: #a020f0;">if</span> (i != j &amp;&amp; fabs(res[i * Dim + j]) &gt; tolerance) {
rc = QMCKL_FAILURE;
}
}
}
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
</pre>
</div>
</div>
</div>
</div>
</div>
<div id="outline-container-orgad6265b" class="outline-2">
<h2 id="orgad6265b"><span class="section-number-2">5</span> End of files</h2>
<div class="outline-text-2" id="text-5">
<div class="org-src-container">
<pre class="src src-c"><span style="color: #0000ff;">assert</span> (<span style="color: #228b22;">qmckl_context_destroy</span>(<span style="color: #a0522d;">context</span>) == QMCKL_SUCCESS);
<span style="color: #a020f0;">return</span> 0;
}
</pre>
</div>
</div>
</div>
</div>
<div id="postamble" class="status">
<p class="author">Author: TREX CoE</p>
<p class="date">Created: 2023-03-09 Thu 10:03</p>
<p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
</div>
</body>
</html>