mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-24 03:22:11 +01:00
6173 lines
363 KiB
HTML
6173 lines
363 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
|
<head>
|
|
<!-- 2023-03-09 Thu 10:03 -->
|
|
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
<title>Sherman-Morrison-Woodbury</title>
|
|
<meta name="generator" content="Org mode" />
|
|
<meta name="author" content="TREX CoE" />
|
|
<style type="text/css">
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
.title { text-align: center;
|
|
margin-bottom: .2em; }
|
|
.subtitle { text-align: center;
|
|
font-size: medium;
|
|
font-weight: bold;
|
|
margin-top:0; }
|
|
.todo { font-family: monospace; color: red; }
|
|
.done { font-family: monospace; color: green; }
|
|
.priority { font-family: monospace; color: orange; }
|
|
.tag { background-color: #eee; font-family: monospace;
|
|
padding: 2px; font-size: 80%; font-weight: normal; }
|
|
.timestamp { color: #bebebe; }
|
|
.timestamp-kwd { color: #5f9ea0; }
|
|
.org-right { margin-left: auto; margin-right: 0px; text-align: right; }
|
|
.org-left { margin-left: 0px; margin-right: auto; text-align: left; }
|
|
.org-center { margin-left: auto; margin-right: auto; text-align: center; }
|
|
.underline { text-decoration: underline; }
|
|
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
|
|
p.verse { margin-left: 3%; }
|
|
pre {
|
|
border: 1px solid #ccc;
|
|
box-shadow: 3px 3px 3px #eee;
|
|
padding: 8pt;
|
|
font-family: monospace;
|
|
overflow: auto;
|
|
margin: 1.2em;
|
|
}
|
|
pre.src {
|
|
position: relative;
|
|
overflow: visible;
|
|
padding-top: 1.2em;
|
|
}
|
|
pre.src:before {
|
|
display: none;
|
|
position: absolute;
|
|
background-color: white;
|
|
top: -10px;
|
|
right: 10px;
|
|
padding: 3px;
|
|
border: 1px solid black;
|
|
}
|
|
pre.src:hover:before { display: inline;}
|
|
/* Languages per Org manual */
|
|
pre.src-asymptote:before { content: 'Asymptote'; }
|
|
pre.src-awk:before { content: 'Awk'; }
|
|
pre.src-C:before { content: 'C'; }
|
|
/* pre.src-C++ doesn't work in CSS */
|
|
pre.src-clojure:before { content: 'Clojure'; }
|
|
pre.src-css:before { content: 'CSS'; }
|
|
pre.src-D:before { content: 'D'; }
|
|
pre.src-ditaa:before { content: 'ditaa'; }
|
|
pre.src-dot:before { content: 'Graphviz'; }
|
|
pre.src-calc:before { content: 'Emacs Calc'; }
|
|
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
|
|
pre.src-fortran:before { content: 'Fortran'; }
|
|
pre.src-gnuplot:before { content: 'gnuplot'; }
|
|
pre.src-haskell:before { content: 'Haskell'; }
|
|
pre.src-hledger:before { content: 'hledger'; }
|
|
pre.src-java:before { content: 'Java'; }
|
|
pre.src-js:before { content: 'Javascript'; }
|
|
pre.src-latex:before { content: 'LaTeX'; }
|
|
pre.src-ledger:before { content: 'Ledger'; }
|
|
pre.src-lisp:before { content: 'Lisp'; }
|
|
pre.src-lilypond:before { content: 'Lilypond'; }
|
|
pre.src-lua:before { content: 'Lua'; }
|
|
pre.src-matlab:before { content: 'MATLAB'; }
|
|
pre.src-mscgen:before { content: 'Mscgen'; }
|
|
pre.src-ocaml:before { content: 'Objective Caml'; }
|
|
pre.src-octave:before { content: 'Octave'; }
|
|
pre.src-org:before { content: 'Org mode'; }
|
|
pre.src-oz:before { content: 'OZ'; }
|
|
pre.src-plantuml:before { content: 'Plantuml'; }
|
|
pre.src-processing:before { content: 'Processing.js'; }
|
|
pre.src-python:before { content: 'Python'; }
|
|
pre.src-R:before { content: 'R'; }
|
|
pre.src-ruby:before { content: 'Ruby'; }
|
|
pre.src-sass:before { content: 'Sass'; }
|
|
pre.src-scheme:before { content: 'Scheme'; }
|
|
pre.src-screen:before { content: 'Gnu Screen'; }
|
|
pre.src-sed:before { content: 'Sed'; }
|
|
pre.src-sh:before { content: 'shell'; }
|
|
pre.src-sql:before { content: 'SQL'; }
|
|
pre.src-sqlite:before { content: 'SQLite'; }
|
|
/* additional languages in org.el's org-babel-load-languages alist */
|
|
pre.src-forth:before { content: 'Forth'; }
|
|
pre.src-io:before { content: 'IO'; }
|
|
pre.src-J:before { content: 'J'; }
|
|
pre.src-makefile:before { content: 'Makefile'; }
|
|
pre.src-maxima:before { content: 'Maxima'; }
|
|
pre.src-perl:before { content: 'Perl'; }
|
|
pre.src-picolisp:before { content: 'Pico Lisp'; }
|
|
pre.src-scala:before { content: 'Scala'; }
|
|
pre.src-shell:before { content: 'Shell Script'; }
|
|
pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
|
|
/* additional language identifiers per "defun org-babel-execute"
|
|
in ob-*.el */
|
|
pre.src-cpp:before { content: 'C++'; }
|
|
pre.src-abc:before { content: 'ABC'; }
|
|
pre.src-coq:before { content: 'Coq'; }
|
|
pre.src-groovy:before { content: 'Groovy'; }
|
|
/* additional language identifiers from org-babel-shell-names in
|
|
ob-shell.el: ob-shell is the only babel language using a lambda to put
|
|
the execution function name together. */
|
|
pre.src-bash:before { content: 'bash'; }
|
|
pre.src-csh:before { content: 'csh'; }
|
|
pre.src-ash:before { content: 'ash'; }
|
|
pre.src-dash:before { content: 'dash'; }
|
|
pre.src-ksh:before { content: 'ksh'; }
|
|
pre.src-mksh:before { content: 'mksh'; }
|
|
pre.src-posh:before { content: 'posh'; }
|
|
/* Additional Emacs modes also supported by the LaTeX listings package */
|
|
pre.src-ada:before { content: 'Ada'; }
|
|
pre.src-asm:before { content: 'Assembler'; }
|
|
pre.src-caml:before { content: 'Caml'; }
|
|
pre.src-delphi:before { content: 'Delphi'; }
|
|
pre.src-html:before { content: 'HTML'; }
|
|
pre.src-idl:before { content: 'IDL'; }
|
|
pre.src-mercury:before { content: 'Mercury'; }
|
|
pre.src-metapost:before { content: 'MetaPost'; }
|
|
pre.src-modula-2:before { content: 'Modula-2'; }
|
|
pre.src-pascal:before { content: 'Pascal'; }
|
|
pre.src-ps:before { content: 'PostScript'; }
|
|
pre.src-prolog:before { content: 'Prolog'; }
|
|
pre.src-simula:before { content: 'Simula'; }
|
|
pre.src-tcl:before { content: 'tcl'; }
|
|
pre.src-tex:before { content: 'TeX'; }
|
|
pre.src-plain-tex:before { content: 'Plain TeX'; }
|
|
pre.src-verilog:before { content: 'Verilog'; }
|
|
pre.src-vhdl:before { content: 'VHDL'; }
|
|
pre.src-xml:before { content: 'XML'; }
|
|
pre.src-nxml:before { content: 'XML'; }
|
|
/* add a generic configuration mode; LaTeX export needs an additional
|
|
(add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
|
|
pre.src-conf:before { content: 'Configuration File'; }
|
|
|
|
table { border-collapse:collapse; }
|
|
caption.t-above { caption-side: top; }
|
|
caption.t-bottom { caption-side: bottom; }
|
|
td, th { vertical-align:top; }
|
|
th.org-right { text-align: center; }
|
|
th.org-left { text-align: center; }
|
|
th.org-center { text-align: center; }
|
|
td.org-right { text-align: right; }
|
|
td.org-left { text-align: left; }
|
|
td.org-center { text-align: center; }
|
|
dt { font-weight: bold; }
|
|
.footpara { display: inline; }
|
|
.footdef { margin-bottom: 1em; }
|
|
.figure { padding: 1em; }
|
|
.figure p { text-align: center; }
|
|
.equation-container {
|
|
display: table;
|
|
text-align: center;
|
|
width: 100%;
|
|
}
|
|
.equation {
|
|
vertical-align: middle;
|
|
}
|
|
.equation-label {
|
|
display: table-cell;
|
|
text-align: right;
|
|
vertical-align: middle;
|
|
}
|
|
.inlinetask {
|
|
padding: 10px;
|
|
border: 2px solid gray;
|
|
margin: 10px;
|
|
background: #ffffcc;
|
|
}
|
|
#org-div-home-and-up
|
|
{ text-align: right; font-size: 70%; white-space: nowrap; }
|
|
textarea { overflow-x: auto; }
|
|
.linenr { font-size: smaller }
|
|
.code-highlighted { background-color: #ffff00; }
|
|
.org-info-js_info-navigation { border-style: none; }
|
|
#org-info-js_console-label
|
|
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
|
|
.org-info-js_search-highlight
|
|
{ background-color: #ffff00; color: #000000; font-weight: bold; }
|
|
.org-svg { width: 90%; }
|
|
/*]]>*/-->
|
|
</style>
|
|
<link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
|
|
|
|
<script type="text/javascript" src="org-info.js">
|
|
/**
|
|
*
|
|
* @source: org-info.js
|
|
*
|
|
* @licstart The following is the entire license notice for the
|
|
* JavaScript code in org-info.js.
|
|
*
|
|
* Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
*
|
|
*
|
|
* The JavaScript code in this tag is free software: you can
|
|
* redistribute it and/or modify it under the terms of the GNU
|
|
* General Public License (GNU GPL) as published by the Free Software
|
|
* Foundation, either version 3 of the License, or (at your option)
|
|
* any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
*
|
|
* As additional permission under GNU GPL version 3 section 7, you
|
|
* may distribute non-source (e.g., minimized or compacted) forms of
|
|
* that code without the copy of the GNU GPL normally required by
|
|
* section 4, provided you include this license notice and a URL
|
|
* through which recipients can access the Corresponding Source.
|
|
*
|
|
* @licend The above is the entire license notice
|
|
* for the JavaScript code in org-info.js.
|
|
*
|
|
*/
|
|
</script>
|
|
|
|
<script type="text/javascript">
|
|
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
org_html_manager.set("TOC_DEPTH", "4");
|
|
org_html_manager.set("LINK_HOME", "index.html");
|
|
org_html_manager.set("LINK_UP", "");
|
|
org_html_manager.set("LOCAL_TOC", "1");
|
|
org_html_manager.set("VIEW_BUTTONS", "0");
|
|
org_html_manager.set("MOUSE_HINT", "underline");
|
|
org_html_manager.set("FIXED_TOC", "0");
|
|
org_html_manager.set("TOC", "1");
|
|
org_html_manager.set("VIEW", "info");
|
|
org_html_manager.setup(); // activate after the parameters are set
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/javascript">
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
function CodeHighlightOn(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(null != target) {
|
|
elem.cacheClassElem = elem.className;
|
|
elem.cacheClassTarget = target.className;
|
|
target.className = "code-highlighted";
|
|
elem.className = "code-highlighted";
|
|
}
|
|
}
|
|
function CodeHighlightOff(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(elem.cacheClassElem)
|
|
elem.className = elem.cacheClassElem;
|
|
if(elem.cacheClassTarget)
|
|
target.className = elem.cacheClassTarget;
|
|
}
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/x-mathjax-config">
|
|
MathJax.Hub.Config({
|
|
displayAlign: "center",
|
|
displayIndent: "0em",
|
|
|
|
"HTML-CSS": { scale: 100,
|
|
linebreaks: { automatic: "false" },
|
|
webFont: "TeX"
|
|
},
|
|
SVG: {scale: 100,
|
|
linebreaks: { automatic: "false" },
|
|
font: "TeX"},
|
|
NativeMML: {scale: 100},
|
|
TeX: { equationNumbers: {autoNumber: "AMS"},
|
|
MultLineWidth: "85%",
|
|
TagSide: "right",
|
|
TagIndent: ".8em"
|
|
}
|
|
});
|
|
</script>
|
|
<script type="text/javascript"
|
|
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_HTML"></script>
|
|
</head>
|
|
<body>
|
|
<div id="org-div-home-and-up">
|
|
<a accesskey="h" href=""> UP </a>
|
|
|
|
|
<a accesskey="H" href="index.html"> HOME </a>
|
|
</div><div id="content">
|
|
<h1 class="title">Sherman-Morrison-Woodbury</h1>
|
|
<div id="table-of-contents">
|
|
<h2>Table of Contents</h2>
|
|
<div id="text-table-of-contents">
|
|
<ul>
|
|
<li><a href="#org8310c36">1. Headers</a></li>
|
|
<li><a href="#org68f9179">2. Naïve Sherman-Morrison</a>
|
|
<ul>
|
|
<li><a href="#org7fbd1f9">2.1. <code>qmckl_sm_naive</code></a>
|
|
<ul>
|
|
<li><a href="#org36b6e4b">2.1.1. Introduction</a></li>
|
|
<li><a href="#org53c2299">2.1.2. API</a></li>
|
|
<li><a href="#org720462d">2.1.3. Requirements</a></li>
|
|
<li><a href="#org39ba843">2.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#org7fa2714">2.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org74b40ca">2.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#orgc945a9f">2.1.6. C sources</a></li>
|
|
<li><a href="#org5ace91c">2.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#orga8a843c">2.1.8. Performance</a></li>
|
|
<li><a href="#orge462592">2.1.9. Tests</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orge99c98b">3. Sherman-Morrison with Slagel Splitting (core)</a>
|
|
<ul>
|
|
<li><a href="#orgc5401e9">3.1. <code>qmckl_sm_splitting_core</code></a>
|
|
<ul>
|
|
<li><a href="#org4334aec">3.1.1. Introduction</a></li>
|
|
<li><a href="#orgbae027c">3.1.2. API</a></li>
|
|
<li><a href="#org74af35a">3.1.3. Requirements</a></li>
|
|
<li><a href="#org1324114">3.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#org1b24ef6">3.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org853216d">3.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#org595caa3">3.1.6. C sources</a></li>
|
|
<li><a href="#org95b0bb0">3.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#orga260ae6">3.1.8. Performance</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org5987d99">4. Sherman-Morrison with Slagel Splitting</a>
|
|
<ul>
|
|
<li><a href="#orgcd920e2">4.1. <code>qmckl_sm_splitting</code></a>
|
|
<ul>
|
|
<li><a href="#org135c76a">4.1.1. Introduction</a></li>
|
|
<li><a href="#org3623c4d">4.1.2. API</a></li>
|
|
<li><a href="#orgcba34b6">4.1.3. Requirements</a></li>
|
|
<li><a href="#orgdaeff2e">4.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#org6f60107">4.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org58729df">4.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#org6736786">4.1.6. C source</a></li>
|
|
<li><a href="#org83d10c2">4.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#orgc65cc5b">4.1.8. Performance…</a></li>
|
|
<li><a href="#org224e869">4.1.9. Test</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orgad6265b">5. End of files</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org8310c36" class="outline-2">
|
|
<h2 id="org8310c36"><span class="section-number-2">1</span> Headers</h2>
|
|
<div class="outline-text-2" id="text-1">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
|
|
<span style="color: #483d8b;">#ifdef</span> HAVE_CONFIG_H
|
|
<span style="color: #483d8b;"> #include</span> <span style="color: #8b2252;">"config.h"</span>
|
|
<span style="color: #483d8b;">#endif</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><math.h></span>
|
|
|
|
<span style="color: #228b22;">int</span> <span style="color: #0000ff;">main</span>() {
|
|
<span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>;
|
|
context = qmckl_context_create();
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
This is the range that determines the how many high performance kernel instantces will be generated, using the C-function templates defined in the sections below. If the name of the C-function template is called <code>qmckl_kernel_{Dim}</code>, then <code>range(K, L+1)</code> will results in kernel instances from <code>qmckl_kernel_K</code> to <code>qmckl_kernel_L</code>.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div id="outline-container-org68f9179" class="outline-2">
|
|
<h2 id="org68f9179"><span class="section-number-2">2</span> Naïve Sherman-Morrison</h2>
|
|
<div class="outline-text-2" id="text-2">
|
|
</div>
|
|
<div id="outline-container-org7fbd1f9" class="outline-3">
|
|
<h3 id="org7fbd1f9"><span class="section-number-3">2.1</span> <code>qmckl_sm_naive</code></h3>
|
|
<div class="outline-text-3" id="text-2-1">
|
|
</div>
|
|
|
|
<div id="outline-container-org36b6e4b" class="outline-4">
|
|
<h4 id="org36b6e4b"><span class="section-number-4">2.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-2-1-1">
|
|
<p>
|
|
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
|
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
|
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
|
</p>
|
|
|
|
<p>
|
|
#+TODO
|
|
Change the math notation so that the update vectors appear as row in the math
|
|
so that it is consistent with the representation in C (memory)
|
|
</p>
|
|
|
|
<p>
|
|
The formula for any update \(u_j\) (index \(j\) is suppresed for clarity) that is applied is
|
|
\[
|
|
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
|
|
\]
|
|
</p>
|
|
|
|
<p>
|
|
where
|
|
\(S\) is the Slater-matrix,
|
|
\(u\) and \(v^T\) are the column and row vectors containing the updates,
|
|
\(S^{-1}\) is the inverse of the Slater-matrix.
|
|
</p>
|
|
|
|
<p>
|
|
Even though the Slater-matrix \(S\) with all updates applied at once is invertable, during the course of applying
|
|
updates to the inverse Slater-matrix \(S^{-1}\) one-by-one it can happen that one of the intermediate inverse
|
|
matrices \(S^{-1}\) becomes singular. Therefore a global threshold value \(\epsilon\) is defined that is used to
|
|
evaluate each individual update \(u_j\) when it is applied.
|
|
</p>
|
|
|
|
<p>
|
|
This value sets the lower bound for which the
|
|
denominator \(1+v_j^TS^{-1}u_j\) is considered to be too small and will most probably result in a singular matrix
|
|
\(S\), or at least in an inverse of \(S\) of very poor numerical quality. Therefore, when \(1+v_j^TS^{-1}u_j \geq \epsilon\),
|
|
the update is applied as usual and the kernel exits with return code \texttt{QMCKL_SUCCESS}.
|
|
If \(1+v_j^TS^{-1}u_j \leq \epsilon\) the update is rejected and the kernel exits with return code \texttt{QMCKL_FAILURE}.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org53c2299" class="outline-4">
|
|
<h4 id="org53c2299"><span class="section-number-4">2.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-2-1-2">
|
|
<table id="orge1303df" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left"><code>context</code></td>
|
|
<td class="org-left"><code>qmckl_context</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>LDS</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Dim</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>N_updates</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates</code></td>
|
|
<td class="org-left"><code>double[N_updates*LDS]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates_index</code></td>
|
|
<td class="org-left"><code>uint64_t[N_updates]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>breakdown</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Slater_inv</code></td>
|
|
<td class="org-left"><code>double[Dim*LDS]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>determinant</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org720462d" class="outline-4">
|
|
<h4 id="org720462d"><span class="section-number-4">2.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-2-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
<li><code>determinant > 0</code></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org39ba843" class="outline-4">
|
|
<h4 id="org39ba843"><span class="section-number-4">2.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-2-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_sm_naive_doc_f(context, </span><span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(nupdates * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds, nupdates) ::<span style="color: #a0522d;"> Updates</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, lds) ::<span style="color: #a0522d;"> Inverse</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> C</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds) ::<span style="color: #a0522d;"> D</span>
|
|
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> denominator, idenominator, update</span>
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, l, row</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
|
|
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
|
|
l = 1;
|
|
! <span style="color: #b22222;">For each update do...</span>
|
|
<span style="color: #a020f0;">do while</span> (l < nupdates + 1)
|
|
|
|
! <span style="color: #b22222;">Compute C = S^{-1}U(l)</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
C(i) = 0
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute denominator = 1 + V(l)^TC</span>
|
|
row = updates_index(l)
|
|
denominator = 1 + C(row)
|
|
|
|
! <span style="color: #b22222;">Return early if denominator is too small</span>
|
|
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(denominator) < breakdown) <span style="color: #a020f0;">return</span>
|
|
idenominator = 1 / denominator
|
|
|
|
! <span style="color: #b22222;">Update det(S)</span>
|
|
determinant = determinant * denominator
|
|
|
|
! <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
D = Inverse(row, :)
|
|
|
|
! <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / denominator</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
update = C(i) * D(j) * idenominator
|
|
Inverse(i, j) = Inverse(i, j) - update
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
l = l + 1
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Copy updated inverse back to s_inv</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_naive_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org7fa2714" class="outline-5">
|
|
<h5 id="org7fa2714"><span class="section-number-5">2.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-2-1-4-1">
|
|
<p>
|
|
The following Fortran function <code>qmckl_sm_naive_doc</code> makes sure
|
|
that the pedagogical kernel <code>qmckl_sm_naive_doc_f</code>, written in
|
|
Fortran, can be called from C using the <code>ISO_C_BINDING</code>. The Fortran function <code>qmckl_sm_naive_doc</code> will be exposed in the header file 'qmckl.h'
|
|
for C users and in the module file 'qmckl<sub>f.F90</sub>' for Fortran users.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org74b40ca" class="outline-4">
|
|
<h4 id="org74b40ca"><span class="section-number-4">2.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-2-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgc945a9f" class="outline-4">
|
|
<h4 id="orgc945a9f"><span class="section-number-4">2.1.6</span> C sources</h4>
|
|
<div class="outline-text-4" id="text-2-1-6">
|
|
<p>
|
|
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><stdbool.h></span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><math.h></span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"config.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"stdio.h"</span>
|
|
|
|
// <span style="color: #b22222;">Order important because</span>
|
|
// <span style="color: #b22222;">__GNUC__ also set in ICC, ICX and CLANG</span>
|
|
// <span style="color: #b22222;">__clang__ also set in ICX</span>
|
|
<span style="color: #483d8b;">#if</span> <span style="color: #483d8b;">defined</span>(__INTEL_COMPILER)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"ivdep"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span> _Pragma(<span style="color: #8b2252;">"vector aligned"</span>)
|
|
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__INTEL_LLVM_COMPILER)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"ivdep"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span> _Pragma(<span style="color: #8b2252;">"vector aligned"</span>)
|
|
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__clang__)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"clang loop vectorize(enable)"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span>
|
|
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__GNUC__)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"GCC ivdep"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span>
|
|
<span style="color: #483d8b;">#endif</span>
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<code>qmckl_sm_naive_hpc</code> is a high performance variation of
|
|
<code>qmckl_sm_naive</code> written in C. It is used in cases when <code>Dim</code> is
|
|
smaller than the leading dimension <code>LDS</code>, irrespective of whetether <code>LDS</code>
|
|
includes zero padding to benefit from SIMD instructions or not. Cases like this
|
|
include situations where one wants to apply updates to a square submatrix of the
|
|
full matrix.
|
|
It takes advantage of memory aligned data and assumes no data dependencies
|
|
inside the loops. The loops are fully vectorised whenever <code>Dim</code> is an integer
|
|
multiple of <code>SIMD_LENGTH</code>.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[Dim];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x u_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator: v_l^T * C</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown)
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<code>qmckl_exit_code qmckl_sm_naive_{Dim}</code> is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org5669e0b"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sm_naive_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D</span>{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[{Dim}];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org07bc9d7"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">static inline qmckl_exit_code qmckl_sm_naive_{Dim}(</span>
|
|
<span style="color: #8b2252;"> const qmckl_context context,</span>
|
|
<span style="color: #8b2252;"> const uint64_t N_updates,</span>
|
|
<span style="color: #8b2252;"> const double* __restrict Updates,</span>
|
|
<span style="color: #8b2252;"> const uint64_t* __restrict Updates_index,</span>
|
|
<span style="color: #8b2252;"> const double breakdown,</span>
|
|
<span style="color: #8b2252;"> double* __restrict Slater_inv,</span>
|
|
<span style="color: #8b2252;"> double* __restrict determinant) {</span>
|
|
|
|
<span style="color: #8b2252;"> if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {</span>
|
|
<span style="color: #8b2252;"> return qmckl_failwith(context,</span>
|
|
<span style="color: #8b2252;"> QMCKL_NULL_CONTEXT,</span>
|
|
<span style="color: #8b2252;"> "qmckl_sm_naive_{Dim}",</span>
|
|
<span style="color: #8b2252;"> NULL);</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> #define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
|
|
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) C[{Dim}];</span>
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) D[D{Dim}_P];</span>
|
|
|
|
<span style="color: #8b2252;"> uint64_t l = 0;</span>
|
|
<span style="color: #8b2252;"> // For each update</span>
|
|
<span style="color: #8b2252;"> while (l < N_updates) {</span>
|
|
<span style="color: #8b2252;"> // C = A^{-1} x U_l</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> C[i] = 0;</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // Denominator</span>
|
|
<span style="color: #8b2252;"> const int cui = Updates_index[l] - 1;</span>
|
|
<span style="color: #8b2252;"> double den = 1.0f + C[cui];</span>
|
|
|
|
<span style="color: #8b2252;"> if (fabs(den) < breakdown) {</span>
|
|
<span style="color: #8b2252;"> return QMCKL_FAILURE;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> double iden = 1.0f / den;</span>
|
|
|
|
<span style="color: #8b2252;"> // Update det(A)</span>
|
|
<span style="color: #8b2252;"> if (determinant)</span>
|
|
<span style="color: #8b2252;"> *determinant *= den;</span>
|
|
|
|
<span style="color: #8b2252;"> // selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> D[j] = Slater_inv[cui * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> double update = C[i] * D[j] * iden;</span>
|
|
<span style="color: #8b2252;"> Slater_inv[i * D{Dim}_P + j] -= update;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> l += 1;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> return QMCKL_SUCCESS;</span>
|
|
<span style="color: #8b2252;">}</span>
|
|
<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim))
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
Python script that generated C switch cases that call individual kernel instances.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="orgb45090f"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">case {Dim}: </span>
|
|
<span style="color: #8b2252;"> return qmckl_sm_naive_{Dim}(context,</span>
|
|
<span style="color: #8b2252;"> N_updates,</span>
|
|
<span style="color: #8b2252;"> Updates,</span>
|
|
<span style="color: #8b2252;"> Updates_index,</span>
|
|
<span style="color: #8b2252;"> breakdown,</span>
|
|
<span style="color: #8b2252;"> Slater_inv,</span>
|
|
<span style="color: #8b2252;"> determinant);"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim))
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D2_P</span> ((1+(2-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
D[j] = Slater_inv[cui * D2_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D2_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D3_P</span> ((1+(3-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
D[j] = Slater_inv[cui * D3_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D3_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D4_P</span> ((1+(4-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[4];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
D[j] = Slater_inv[cui * D4_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D4_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D5_P</span> ((1+(5-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[5];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
D[j] = Slater_inv[cui * D5_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D5_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D6_P</span> ((1+(6-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[6];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
D[j] = Slater_inv[cui * D6_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D6_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D7_P</span> ((1+(7-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[7];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
D[j] = Slater_inv[cui * D7_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D7_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D8_P</span> ((1+(8-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[8];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
D[j] = Slater_inv[cui * D8_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D8_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D9_P</span> ((1+(9-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[9];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
D[j] = Slater_inv[cui * D9_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D9_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D10_P</span> ((1+(10-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[10];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
D[j] = Slater_inv[cui * D10_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D10_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D11_P</span> ((1+(11-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[11];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
D[j] = Slater_inv[cui * D11_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D11_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D12_P</span> ((1+(12-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[12];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
D[j] = Slater_inv[cui * D12_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D12_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D13_P</span> ((1+(13-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[13];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
D[j] = Slater_inv[cui * D13_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D13_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D14_P</span> ((1+(14-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[14];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
D[j] = Slater_inv[cui * D14_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D14_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D15_P</span> ((1+(15-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[15];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
D[j] = Slater_inv[cui * D15_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D15_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D16_P</span> ((1+(16-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[16];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
D[j] = Slater_inv[cui * D16_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D16_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D17_P</span> ((1+(17-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[17];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
D[j] = Slater_inv[cui * D17_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D17_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D18_P</span> ((1+(18-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[18];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
D[j] = Slater_inv[cui * D18_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D18_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D19_P</span> ((1+(19-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[19];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
D[j] = Slater_inv[cui * D19_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D19_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D20_P</span> ((1+(20-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[20];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
D[j] = Slater_inv[cui * D20_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D20_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D21_P</span> ((1+(21-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[21];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
D[j] = Slater_inv[cui * D21_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D21_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<code>qmckl_sm_naive</code> is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (<code>--enable-doc</code> and <code>--enable-hpc</code>) and the passed array dimensions <code>LDS</code> and <code>Dim</code>.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_2(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_3(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_4(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_5(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_6(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_7(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_8(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_9(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_10(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_11(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_12(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_13(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_14(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_15(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_16(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_17(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_18(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_19(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_20(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_21(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org5ace91c" class="outline-4">
|
|
<h4 id="org5ace91c"><span class="section-number-4">2.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-2-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orga8a843c" class="outline-4">
|
|
<h4 id="orga8a843c"><span class="section-number-4">2.1.8</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-2-1-8">
|
|
<p>
|
|
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
|
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
|
can never resolve a situation where applying the update causes singular behaviour.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div id="outline-container-orge462592" class="outline-4">
|
|
<h4 id="orge462592"><span class="section-number-4">2.1.9</span> Tests</h4>
|
|
<div class="outline-text-4" id="text-2-1-9">
|
|
<p>
|
|
The tests for the kernels are executed on datasets that are extracted from a run of
|
|
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
|
The tests are run such that the kernels reject the computed inverse whenever the computed
|
|
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
|
|
QMC=Chem. The tests will return QMCKL<sub>SUCCESS</sub> whenever all the elements of the final matrix
|
|
\(R=S.S^-1 - 1\) are smaller than the given tolerance value of 1e-3, and will return
|
|
QMCKL<sub>FAILURE</sub> if the values are larger than this tolerance value.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span> = 21;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span> = (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span> = 1e-3;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">tolerance</span> = 1e-3;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">res</span>[441];
|
|
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"sm_test.h"</span>
|
|
|
|
<span style="color: #0000ff;">assert</span>(Updates1 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Updates_index1 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Slater_inv1 != <span style="color: #008b8b;">NULL</span>);
|
|
|
|
// <span style="color: #b22222;">original determinant of Slater1 (before applying updates)</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = 3.407025646103221e-10;
|
|
rc = qmckl_sm_naive(context,
|
|
LDS,
|
|
Dim,
|
|
N_updates1,
|
|
Updates1,
|
|
Updates_index1,
|
|
breakdown,
|
|
Slater_inv1,
|
|
&det);
|
|
|
|
// <span style="color: #b22222;">Check that the determinant is updated properly</span>
|
|
<span style="color: #0000ff;">assert</span>(fabs(det + 4.120398385068217e-10) < 1e-15);
|
|
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">if</span> (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-orge99c98b" class="outline-2">
|
|
<h2 id="orge99c98b"><span class="section-number-2">3</span> Sherman-Morrison with Slagel Splitting (core)</h2>
|
|
<div class="outline-text-2" id="text-3">
|
|
</div>
|
|
<div id="outline-container-orgc5401e9" class="outline-3">
|
|
<h3 id="orgc5401e9"><span class="section-number-3">3.1</span> <code>qmckl_sm_splitting_core</code></h3>
|
|
<div class="outline-text-3" id="text-3-1">
|
|
</div>
|
|
|
|
<div id="outline-container-org4334aec" class="outline-4">
|
|
<h4 id="org4334aec"><span class="section-number-4">3.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-3-1-1">
|
|
<p>
|
|
<code>qmckl_sm_splitting_core</code> is the inner core part of 'Sherman-Morrison with update splitting' in the next section.
|
|
It is not normally used by itself but it is possible to use it nonetheless.
|
|
</p>
|
|
|
|
<p>
|
|
It has three extra parameters in its API:
|
|
</p>
|
|
<ul class="org-ul">
|
|
<li><code>later_updates</code> initially empty array that will contain the second halves of updates that were split during kernel execution</li>
|
|
<li><code>later_index</code> initially empty array that will contain the row/column numbers of the updates that were split during execution</li>
|
|
<li><code>later</code> initially zero integer that records the number of updates that were split during exection.</li>
|
|
</ul>
|
|
|
|
<p>
|
|
It is up to the user to decide what to do with these updates once the kernel returns. Normally <code>qmckl_sm_splitting_core</code> is
|
|
used as the core part of a recursive function, as is done in <code>qmckl_sm_splitting</code> or as part of a more complex
|
|
kernel like <code>qmckl_sherman_morrison_smw32s</code>.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant is passed it will only be partially updated if there were any update splits.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgbae027c" class="outline-4">
|
|
<h4 id="orgbae027c"><span class="section-number-4">3.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-3-1-2">
|
|
<table id="org61d5afd" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left"><code>context</code></td>
|
|
<td class="org-left"><code>qmckl_context</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>LDS</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Dim</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>N_updates</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates</code></td>
|
|
<td class="org-left"><code>double[LDS*N_updates]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates_index</code></td>
|
|
<td class="org-left"><code>uint64_t[N_updates]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing positions of the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>breakdown</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Slater_inv</code></td>
|
|
<td class="org-left"><code>double[Dim*LDS]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>later_updates</code></td>
|
|
<td class="org-left"><code>double[LDS*N_updates]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>later_index</code></td>
|
|
<td class="org-left"><code>uint64_t[N_updates]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the positions of the split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>later</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Number of split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>determinant</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org74af35a" class="outline-4">
|
|
<h4 id="org74af35a"><span class="section-number-4">3.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-3-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
<li><code>later_updates</code> is allocated with \(later \times Dim\) elements</li>
|
|
<li><code>later_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>later >= 0</code></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org1324114" class="outline-4">
|
|
<h4 id="org1324114"><span class="section-number-4">3.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-3-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_sm_splitting_core_doc_f( </span><span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
later_upds, <span style="color: #a020f0;">&</span>
|
|
Later_index, <span style="color: #a020f0;">&</span>
|
|
Later, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(lds * nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> Later</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> Later_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> later_upds(lds * nupdates)</span>
|
|
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds, nupdates) ::<span style="color: #a0522d;"> Updates</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds, nupdates) ::<span style="color: #a0522d;"> Later_updates</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, lds) ::<span style="color: #a0522d;"> Inverse</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> C</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds) ::<span style="color: #a0522d;"> D</span>
|
|
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> denominator, idenominator, update</span>
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, l, row</span>
|
|
|
|
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Entering 'qmckl_sm_splittinig_core_doc_f'"</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
|
|
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
|
|
|
|
l = 1;
|
|
! <span style="color: #b22222;">For each update do...</span>
|
|
<span style="color: #a020f0;">do while</span> (l < nupdates + 1)
|
|
|
|
! <span style="color: #b22222;">Compute C = S^{-1}U(l)</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
C(i) = 0
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute denominator = 1 + V(l)^TC</span>
|
|
row = updates_index(l)
|
|
denominator = 1 + C(row)
|
|
|
|
! <span style="color: #b22222;">If denominator is too close to zero:</span>
|
|
! <span style="color: #b22222;">- Split update in 2 before storing in Later_updates</span>
|
|
! <span style="color: #b22222;">- Split previously computed vector C in 2</span>
|
|
! <span style="color: #b22222;">- Recompute the denominator</span>
|
|
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(denominator) < breakdown) <span style="color: #a020f0;">then</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
Later_updates(i, l) = Updates(i, l) / 2
|
|
C(i) = C(i) / 2
|
|
<span style="color: #a020f0;">end do</span>
|
|
Later_index(Later + 1) = updates_index(l)
|
|
Later = Later + 1
|
|
denominator = 1 + C(row)
|
|
<span style="color: #a020f0;">end if</span>
|
|
|
|
idenominator = 1 / denominator
|
|
|
|
! <span style="color: #b22222;">Update det(S)</span>
|
|
determinant = determinant * denominator
|
|
|
|
! <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
D = Inverse(row, :)
|
|
|
|
! <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / denominator</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
update = C(i) * D(j) * idenominator
|
|
Inverse(i, j) = Inverse(i, j) - update
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
l = l + 1
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Copy updated inverse and later updates</span>
|
|
! <span style="color: #b22222;">back to s_inv and later_upds</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_lu</span>(Later_Updates, later_upds, lds, nupdates)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Leaving 'qmckl_sm_splittinig_core_doc_f'"</span>
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org1b24ef6" class="outline-5">
|
|
<h5 id="org1b24ef6"><span class="section-number-5">3.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-3-1-4-1">
|
|
<p>
|
|
The function <code>qmckl_sm_splitting_core_doc</code> makes sure that
|
|
<code>qmckl_sm_splitting_core_doc_f</code> can be called from C using the
|
|
<code>ISO_C_BINDING</code>. Function <code>qmckl_sm_splitting_core_doc</code> will be
|
|
exposed in <code>qmckl.h</code> and <code>qmckl_f.F90</code>, but
|
|
<code>qmckl_sm_splitting_core_doc_f</code> will not.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org853216d" class="outline-4">
|
|
<h4 id="org853216d"><span class="section-number-4">3.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-3-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org595caa3" class="outline-4">
|
|
<h4 id="org595caa3"><span class="section-number-4">3.1.6</span> C sources</h4>
|
|
<div class="outline-text-4" id="text-3-1-6">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[LDS];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < LDS; i++) {
|
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x LDS</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org6d5b0f9"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sm_splitting_core_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D{Dim}_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D{Dim}_P; i++) {
|
|
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org83b4f54"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">static inline qmckl_exit_code qmckl_sm_splitting_core_{Dim}(</span>
|
|
<span style="color: #8b2252;"> const qmckl_context context,</span>
|
|
<span style="color: #8b2252;"> uint64_t N_updates,</span>
|
|
<span style="color: #8b2252;"> const double* __restrict Updates,</span>
|
|
<span style="color: #8b2252;"> const uint64_t* __restrict Updates_index,</span>
|
|
<span style="color: #8b2252;"> const double breakdown,</span>
|
|
<span style="color: #8b2252;"> double* __restrict Slater_inv,</span>
|
|
<span style="color: #8b2252;"> double* __restrict later_updates,</span>
|
|
<span style="color: #8b2252;"> uint64_t* __restrict later_index,</span>
|
|
<span style="color: #8b2252;"> uint64_t* __restrict later,</span>
|
|
<span style="color: #8b2252;"> double* __restrict determinant) {</span>
|
|
|
|
<span style="color: #8b2252;"> if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {</span>
|
|
<span style="color: #8b2252;"> return qmckl_failwith(</span>
|
|
<span style="color: #8b2252;"> context,</span>
|
|
<span style="color: #8b2252;"> QMCKL_NULL_CONTEXT,</span>
|
|
<span style="color: #8b2252;"> "qmckl_sm_splitting_core_{Dim}",</span>
|
|
<span style="color: #8b2252;"> NULL);</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) C[D{Dim}_P];</span>
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) D[D{Dim}_P];</span>
|
|
|
|
<span style="color: #8b2252;"> uint64_t l = 0;</span>
|
|
<span style="color: #8b2252;"> // For each update</span>
|
|
<span style="color: #8b2252;"> while (l < N_updates) {</span>
|
|
<span style="color: #8b2252;"> // C = S^{-1} x U_l</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> C[i] = 0.0f;</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // Denominator</span>
|
|
<span style="color: #8b2252;"> const int cui = Updates_index[l] - 1;</span>
|
|
<span style="color: #8b2252;"> double den = 1.0f + C[cui];</span>
|
|
<span style="color: #8b2252;"> if (fabs(den) < breakdown) {</span>
|
|
<span style="color: #8b2252;"> // U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
<span style="color: #8b2252;"> // second halve in later_updates</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < D{Dim}_P; i++) {</span>
|
|
<span style="color: #8b2252;"> later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;</span>
|
|
<span style="color: #8b2252;"> C[i] *= 0.5f;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> later_index[*later] = Updates_index[l];</span>
|
|
<span style="color: #8b2252;"> (*later)++;</span>
|
|
|
|
<span style="color: #8b2252;"> den = 1.0f + C[cui];</span>
|
|
<span style="color: #8b2252;"> } // From here onwards we continue with applying the first halve of the</span>
|
|
<span style="color: #8b2252;"> // update to Slater_inv</span>
|
|
<span style="color: #8b2252;"> double iden = 1.0f / den;</span>
|
|
|
|
<span style="color: #8b2252;"> if (determinant)</span>
|
|
<span style="color: #8b2252;"> *determinant *= den;</span>
|
|
|
|
<span style="color: #8b2252;"> // D = v^T x S^{-1} : 1 x D{Dim}_P</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> D[j] = Slater_inv[cui * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> const double update = C[i] * D[j] * iden;</span>
|
|
<span style="color: #8b2252;"> Slater_inv[i * D{Dim}_P + j] -= update;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> l += 1;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> return QMCKL_SUCCESS;</span>
|
|
<span style="color: #8b2252;">}</span>
|
|
<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org328c849"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">case {Dim}: {</span>
|
|
<span style="color: #8b2252;"> return qmckl_sm_splitting_core_{Dim}(</span>
|
|
<span style="color: #8b2252;"> context,</span>
|
|
<span style="color: #8b2252;"> N_updates,</span>
|
|
<span style="color: #8b2252;"> Updates,</span>
|
|
<span style="color: #8b2252;"> Updates_index,</span>
|
|
<span style="color: #8b2252;"> breakdown,</span>
|
|
<span style="color: #8b2252;"> Slater_inv,</span>
|
|
<span style="color: #8b2252;"> later_updates,</span>
|
|
<span style="color: #8b2252;"> later_index,</span>
|
|
<span style="color: #8b2252;"> later,</span>
|
|
<span style="color: #8b2252;"> determinant);</span>
|
|
<span style="color: #8b2252;"> break;</span>
|
|
<span style="color: #8b2252;">}"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D2_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D2_P; i++) {
|
|
later_updates[*later * D2_P + i] = Updates[l * D2_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D2_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
D[j] = Slater_inv[cui * D2_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D2_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D3_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D3_P; i++) {
|
|
later_updates[*later * D3_P + i] = Updates[l * D3_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D3_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
D[j] = Slater_inv[cui * D3_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D3_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D4_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D4_P; i++) {
|
|
later_updates[*later * D4_P + i] = Updates[l * D4_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D4_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
D[j] = Slater_inv[cui * D4_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D4_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D5_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D5_P; i++) {
|
|
later_updates[*later * D5_P + i] = Updates[l * D5_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D5_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
D[j] = Slater_inv[cui * D5_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D5_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D6_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D6_P; i++) {
|
|
later_updates[*later * D6_P + i] = Updates[l * D6_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D6_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
D[j] = Slater_inv[cui * D6_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D6_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D7_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D7_P; i++) {
|
|
later_updates[*later * D7_P + i] = Updates[l * D7_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D7_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
D[j] = Slater_inv[cui * D7_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D7_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D8_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D8_P; i++) {
|
|
later_updates[*later * D8_P + i] = Updates[l * D8_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D8_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
D[j] = Slater_inv[cui * D8_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D8_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D9_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D9_P; i++) {
|
|
later_updates[*later * D9_P + i] = Updates[l * D9_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D9_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
D[j] = Slater_inv[cui * D9_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D9_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D10_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D10_P; i++) {
|
|
later_updates[*later * D10_P + i] = Updates[l * D10_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D10_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
D[j] = Slater_inv[cui * D10_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D10_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D11_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D11_P; i++) {
|
|
later_updates[*later * D11_P + i] = Updates[l * D11_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D11_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
D[j] = Slater_inv[cui * D11_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D11_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D12_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D12_P; i++) {
|
|
later_updates[*later * D12_P + i] = Updates[l * D12_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D12_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
D[j] = Slater_inv[cui * D12_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D12_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D13_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D13_P; i++) {
|
|
later_updates[*later * D13_P + i] = Updates[l * D13_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D13_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
D[j] = Slater_inv[cui * D13_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D13_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D14_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D14_P; i++) {
|
|
later_updates[*later * D14_P + i] = Updates[l * D14_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D14_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
D[j] = Slater_inv[cui * D14_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D14_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D15_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D15_P; i++) {
|
|
later_updates[*later * D15_P + i] = Updates[l * D15_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D15_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
D[j] = Slater_inv[cui * D15_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D15_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D16_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D16_P; i++) {
|
|
later_updates[*later * D16_P + i] = Updates[l * D16_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D16_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
D[j] = Slater_inv[cui * D16_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D16_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D17_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D17_P; i++) {
|
|
later_updates[*later * D17_P + i] = Updates[l * D17_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D17_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
D[j] = Slater_inv[cui * D17_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D17_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D18_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D18_P; i++) {
|
|
later_updates[*later * D18_P + i] = Updates[l * D18_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D18_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
D[j] = Slater_inv[cui * D18_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D18_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D19_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D19_P; i++) {
|
|
later_updates[*later * D19_P + i] = Updates[l * D19_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D19_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
D[j] = Slater_inv[cui * D19_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D19_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D20_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D20_P; i++) {
|
|
later_updates[*later * D20_P + i] = Updates[l * D20_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D20_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
D[j] = Slater_inv[cui * D20_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D20_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D21_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D21_P; i++) {
|
|
later_updates[*later * D21_P + i] = Updates[l * D21_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D21_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
D[j] = Slater_inv[cui * D21_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D21_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_2(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 3: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_3(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 4: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_4(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 5: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_5(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 6: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_6(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 7: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_7(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 8: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_8(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 9: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_9(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 10: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_10(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 11: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_11(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 12: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_12(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 13: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_13(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 14: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_14(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 15: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_15(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 16: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_16(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 17: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_17(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 18: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_18(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 19: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_19(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 20: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_20(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">case</span> 21: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_21(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">default</span>: {
|
|
assert(0 == 1 && <span style="color: #8b2252;">"TEMPLATE NOT IMPLEMENTED!"</span>);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org95b0bb0" class="outline-4">
|
|
<h4 id="org95b0bb0"><span class="section-number-4">3.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-3-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orga260ae6" class="outline-4">
|
|
<h4 id="orga260ae6"><span class="section-number-4">3.1.8</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-3-1-8">
|
|
<p>
|
|
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
|
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-org5987d99" class="outline-2">
|
|
<h2 id="org5987d99"><span class="section-number-2">4</span> Sherman-Morrison with Slagel Splitting</h2>
|
|
<div class="outline-text-2" id="text-4">
|
|
</div>
|
|
<div id="outline-container-orgcd920e2" class="outline-3">
|
|
<h3 id="orgcd920e2"><span class="section-number-3">4.1</span> <code>qmckl_sm_splitting</code></h3>
|
|
<div class="outline-text-3" id="text-4-1">
|
|
</div>
|
|
|
|
<div id="outline-container-org135c76a" class="outline-4">
|
|
<h4 id="org135c76a"><span class="section-number-4">4.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-4-1-1">
|
|
<p>
|
|
This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator \(1+v_j^T S^{-1} u_j\) in
|
|
the Sherman-Morrison formula is deemed to be too close to zero, the update \(u_j\) is split in half:
|
|
\(u_j \rightarrow \frac{1}{2} u_j\). One half is applied immediately –necessarily increasing the value of the
|
|
denominator because of the split– while the other halve is put in a queue that will be applied when all the
|
|
remaining updates have been treated.
|
|
</p>
|
|
|
|
<p>
|
|
The kernel is executed recursively until the queue is eiter empty and all
|
|
updates are applied successfully, or the size of the queue equals the number of initial updates. In the last
|
|
case the Slater-matrix that would have resulted from applying the updates is singular and therefore the
|
|
kernel exits with an exit code.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org3623c4d" class="outline-4">
|
|
<h4 id="org3623c4d"><span class="section-number-4">4.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-4-1-2">
|
|
<table id="org91022e0" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">N<sub>updates</sub></td>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Updates</td>
|
|
<td class="org-left">double[N<sub>updates</sub>*LDS]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Updates<sub>index</sub></td>
|
|
<td class="org-left">uint64<sub>t</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Slater<sub>inv</sub></td>
|
|
<td class="org-left">double[Dim*LDS]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgcba34b6" class="outline-4">
|
|
<h4 id="orgcba34b6"><span class="section-number-4">4.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-4-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgdaeff2e" class="outline-4">
|
|
<h4 id="orgdaeff2e"><span class="section-number-4">4.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-4-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> recursive function qmckl_sm_splitting_doc_f( </span><span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(lds * nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
|
|
<span style="color: #228b22;">integer</span> , <span style="color: #a020f0;">external</span> ::<span style="color: #a0522d;"> qmckl_sm_splitting_core_doc_f</span>
|
|
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> Later</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(nupdates) ::<span style="color: #a0522d;"> Later_index</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds * nupdates) ::<span style="color: #a0522d;"> Later_updates</span>
|
|
|
|
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Entering 'qmckl_sm_splitting_doc_f'"</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
Later = 0
|
|
Later_index = 0
|
|
Later_updates = 0
|
|
|
|
info = qmckl_sm_splitting_core_doc_f( <span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
Later_updates, <span style="color: #a020f0;">&</span>
|
|
Later_index, <span style="color: #a020f0;">&</span>
|
|
Later, <span style="color: #a020f0;">&</span>
|
|
determinant)
|
|
|
|
<span style="color: #a020f0;">if</span> (Later > 0) <span style="color: #a020f0;">then</span>
|
|
info = qmckl_sm_splitting_doc_f( <span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
Later, <span style="color: #a020f0;">&</span>
|
|
Later_updates, <span style="color: #a020f0;">&</span>
|
|
Later_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant)
|
|
<span style="color: #a020f0;">end if</span>
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">write</span>(*,*) <span style="color: #8b2252;">"Leaving 'qmckl_sm_splitting_doc_f'"</span>
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_splitting_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org6f60107" class="outline-5">
|
|
<h5 id="org6f60107"><span class="section-number-5">4.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-4-1-4-1">
|
|
<p>
|
|
The following Fortran function <code>qmckl_sm_splitting_core_doc</code> makes sure
|
|
that the pedagogical kernel <code>qmckl_sm_splitting_core_doc_f</code>, written in
|
|
Fortran, can be called from C using the <code>ISO_C_BINDING</code>. The Fortran function
|
|
<code>qmckl_sm_splitting_core_doc</code> will be exposed in the header file 'qmckl.h'
|
|
for C users and in the module file 'qmckl<sub>f.F90</sub>' for Fortran users.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org58729df" class="outline-4">
|
|
<h4 id="org58729df"><span class="section-number-4">4.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-4-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org6736786" class="outline-4">
|
|
<h4 id="org6736786"><span class="section-number-4">4.1.6</span> C source</h4>
|
|
<div class="outline-text-4" id="text-4-1-6">
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="orgbf50b14"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">case {Dim}: {</span>
|
|
<span style="color: #8b2252;"> rc = qmckl_sm_splitting_core_{Dim}(</span>
|
|
<span style="color: #8b2252;"> context,</span>
|
|
<span style="color: #8b2252;"> N_updates,</span>
|
|
<span style="color: #8b2252;"> Updates,</span>
|
|
<span style="color: #8b2252;"> Updates_index,</span>
|
|
<span style="color: #8b2252;"> breakdown,</span>
|
|
<span style="color: #8b2252;"> Slater_inv,</span>
|
|
<span style="color: #8b2252;"> later_updates,</span>
|
|
<span style="color: #8b2252;"> later_index, &later, determinant);</span>
|
|
<span style="color: #8b2252;"> break;</span>
|
|
<span style="color: #8b2252;">}</span>
|
|
<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">'\n'</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">later_updates</span>[LDS * N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later_index</span>[N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later</span> = 0;
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) {
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2: {
|
|
rc = qmckl_sm_splitting_core_2(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 3: {
|
|
rc = qmckl_sm_splitting_core_3(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 4: {
|
|
rc = qmckl_sm_splitting_core_4(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 5: {
|
|
rc = qmckl_sm_splitting_core_5(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 6: {
|
|
rc = qmckl_sm_splitting_core_6(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 7: {
|
|
rc = qmckl_sm_splitting_core_7(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 8: {
|
|
rc = qmckl_sm_splitting_core_8(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 9: {
|
|
rc = qmckl_sm_splitting_core_9(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 10: {
|
|
rc = qmckl_sm_splitting_core_10(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 11: {
|
|
rc = qmckl_sm_splitting_core_11(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 12: {
|
|
rc = qmckl_sm_splitting_core_12(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 13: {
|
|
rc = qmckl_sm_splitting_core_13(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 14: {
|
|
rc = qmckl_sm_splitting_core_14(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 15: {
|
|
rc = qmckl_sm_splitting_core_15(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 16: {
|
|
rc = qmckl_sm_splitting_core_16(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 17: {
|
|
rc = qmckl_sm_splitting_core_17(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 18: {
|
|
rc = qmckl_sm_splitting_core_18(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 19: {
|
|
rc = qmckl_sm_splitting_core_19(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 20: {
|
|
rc = qmckl_sm_splitting_core_20(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 21: {
|
|
rc = qmckl_sm_splitting_core_21(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">default</span>: {
|
|
assert(0 == 1 && <span style="color: #8b2252;">"TEMPLATE NOT IMPLEMENTED!"</span>);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
}
|
|
} <span style="color: #a020f0;">else</span> {
|
|
rc = qmckl_sm_splitting_core_hpc(
|
|
context, LDS, Dim, N_updates, Updates, Updates_index,
|
|
breakdown, Slater_inv, later_updates,
|
|
later_index, &later, determinant);
|
|
}
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
|
|
<span style="color: #a020f0;">if</span> (later > 0) {
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_sm_splitting_hpc(
|
|
context, LDS, Dim, later,
|
|
later_updates, later_index,
|
|
breakdown, Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
printf(<span style="color: #8b2252;">"Entering 'qmckl_sm_splitting'\n"</span>);
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
printf(<span style="color: #8b2252;">"Leaving 'qmckl_sm_splitting'\n"</span>);
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org83d10c2" class="outline-4">
|
|
<h4 id="org83d10c2"><span class="section-number-4">4.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-4-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgc65cc5b" class="outline-4">
|
|
<h4 id="orgc65cc5b"><span class="section-number-4">4.1.8</span> Performance…</h4>
|
|
<div class="outline-text-4" id="text-4-1-8">
|
|
<p>
|
|
This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org224e869" class="outline-4">
|
|
<h4 id="org224e869"><span class="section-number-4">4.1.9</span> Test</h4>
|
|
<div class="outline-text-4" id="text-4-1-9">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #0000ff;">assert</span>(Updates3 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Updates_index3 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Slater_inv3_2 != <span style="color: #008b8b;">NULL</span>);
|
|
det = -1.23743195512859e-09;
|
|
rc = qmckl_sm_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
|
|
<span style="color: #0000ff;">assert</span>(fabs(det - 1.602708950725074e-10) < 1e-15);
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">if</span> (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div id="outline-container-orgad6265b" class="outline-2">
|
|
<h2 id="orgad6265b"><span class="section-number-2">5</span> End of files</h2>
|
|
<div class="outline-text-2" id="text-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #0000ff;">assert</span> (<span style="color: #228b22;">qmckl_context_destroy</span>(<span style="color: #a0522d;">context</span>) == QMCKL_SUCCESS);
|
|
<span style="color: #a020f0;">return</span> 0;
|
|
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="postamble" class="status">
|
|
<p class="author">Author: TREX CoE</p>
|
|
<p class="date">Created: 2023-03-09 Thu 10:03</p>
|
|
<p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
|
|
</div>
|
|
</body>
|
|
</html>
|