mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-03 20:54:09 +01:00
9933 lines
651 KiB
HTML
9933 lines
651 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
|
<head>
|
|
<!-- 2023-02-14 Tue 12:35 -->
|
|
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
<title>Sherman-Morrison-Woodbury</title>
|
|
<meta name="generator" content="Org mode" />
|
|
<meta name="author" content="TREX CoE" />
|
|
<style type="text/css">
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
.title { text-align: center;
|
|
margin-bottom: .2em; }
|
|
.subtitle { text-align: center;
|
|
font-size: medium;
|
|
font-weight: bold;
|
|
margin-top:0; }
|
|
.todo { font-family: monospace; color: red; }
|
|
.done { font-family: monospace; color: green; }
|
|
.priority { font-family: monospace; color: orange; }
|
|
.tag { background-color: #eee; font-family: monospace;
|
|
padding: 2px; font-size: 80%; font-weight: normal; }
|
|
.timestamp { color: #bebebe; }
|
|
.timestamp-kwd { color: #5f9ea0; }
|
|
.org-right { margin-left: auto; margin-right: 0px; text-align: right; }
|
|
.org-left { margin-left: 0px; margin-right: auto; text-align: left; }
|
|
.org-center { margin-left: auto; margin-right: auto; text-align: center; }
|
|
.underline { text-decoration: underline; }
|
|
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
|
|
p.verse { margin-left: 3%; }
|
|
pre {
|
|
border: 1px solid #ccc;
|
|
box-shadow: 3px 3px 3px #eee;
|
|
padding: 8pt;
|
|
font-family: monospace;
|
|
overflow: auto;
|
|
margin: 1.2em;
|
|
}
|
|
pre.src {
|
|
position: relative;
|
|
overflow: visible;
|
|
padding-top: 1.2em;
|
|
}
|
|
pre.src:before {
|
|
display: none;
|
|
position: absolute;
|
|
background-color: white;
|
|
top: -10px;
|
|
right: 10px;
|
|
padding: 3px;
|
|
border: 1px solid black;
|
|
}
|
|
pre.src:hover:before { display: inline;}
|
|
/* Languages per Org manual */
|
|
pre.src-asymptote:before { content: 'Asymptote'; }
|
|
pre.src-awk:before { content: 'Awk'; }
|
|
pre.src-C:before { content: 'C'; }
|
|
/* pre.src-C++ doesn't work in CSS */
|
|
pre.src-clojure:before { content: 'Clojure'; }
|
|
pre.src-css:before { content: 'CSS'; }
|
|
pre.src-D:before { content: 'D'; }
|
|
pre.src-ditaa:before { content: 'ditaa'; }
|
|
pre.src-dot:before { content: 'Graphviz'; }
|
|
pre.src-calc:before { content: 'Emacs Calc'; }
|
|
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
|
|
pre.src-fortran:before { content: 'Fortran'; }
|
|
pre.src-gnuplot:before { content: 'gnuplot'; }
|
|
pre.src-haskell:before { content: 'Haskell'; }
|
|
pre.src-hledger:before { content: 'hledger'; }
|
|
pre.src-java:before { content: 'Java'; }
|
|
pre.src-js:before { content: 'Javascript'; }
|
|
pre.src-latex:before { content: 'LaTeX'; }
|
|
pre.src-ledger:before { content: 'Ledger'; }
|
|
pre.src-lisp:before { content: 'Lisp'; }
|
|
pre.src-lilypond:before { content: 'Lilypond'; }
|
|
pre.src-lua:before { content: 'Lua'; }
|
|
pre.src-matlab:before { content: 'MATLAB'; }
|
|
pre.src-mscgen:before { content: 'Mscgen'; }
|
|
pre.src-ocaml:before { content: 'Objective Caml'; }
|
|
pre.src-octave:before { content: 'Octave'; }
|
|
pre.src-org:before { content: 'Org mode'; }
|
|
pre.src-oz:before { content: 'OZ'; }
|
|
pre.src-plantuml:before { content: 'Plantuml'; }
|
|
pre.src-processing:before { content: 'Processing.js'; }
|
|
pre.src-python:before { content: 'Python'; }
|
|
pre.src-R:before { content: 'R'; }
|
|
pre.src-ruby:before { content: 'Ruby'; }
|
|
pre.src-sass:before { content: 'Sass'; }
|
|
pre.src-scheme:before { content: 'Scheme'; }
|
|
pre.src-screen:before { content: 'Gnu Screen'; }
|
|
pre.src-sed:before { content: 'Sed'; }
|
|
pre.src-sh:before { content: 'shell'; }
|
|
pre.src-sql:before { content: 'SQL'; }
|
|
pre.src-sqlite:before { content: 'SQLite'; }
|
|
/* additional languages in org.el's org-babel-load-languages alist */
|
|
pre.src-forth:before { content: 'Forth'; }
|
|
pre.src-io:before { content: 'IO'; }
|
|
pre.src-J:before { content: 'J'; }
|
|
pre.src-makefile:before { content: 'Makefile'; }
|
|
pre.src-maxima:before { content: 'Maxima'; }
|
|
pre.src-perl:before { content: 'Perl'; }
|
|
pre.src-picolisp:before { content: 'Pico Lisp'; }
|
|
pre.src-scala:before { content: 'Scala'; }
|
|
pre.src-shell:before { content: 'Shell Script'; }
|
|
pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
|
|
/* additional language identifiers per "defun org-babel-execute"
|
|
in ob-*.el */
|
|
pre.src-cpp:before { content: 'C++'; }
|
|
pre.src-abc:before { content: 'ABC'; }
|
|
pre.src-coq:before { content: 'Coq'; }
|
|
pre.src-groovy:before { content: 'Groovy'; }
|
|
/* additional language identifiers from org-babel-shell-names in
|
|
ob-shell.el: ob-shell is the only babel language using a lambda to put
|
|
the execution function name together. */
|
|
pre.src-bash:before { content: 'bash'; }
|
|
pre.src-csh:before { content: 'csh'; }
|
|
pre.src-ash:before { content: 'ash'; }
|
|
pre.src-dash:before { content: 'dash'; }
|
|
pre.src-ksh:before { content: 'ksh'; }
|
|
pre.src-mksh:before { content: 'mksh'; }
|
|
pre.src-posh:before { content: 'posh'; }
|
|
/* Additional Emacs modes also supported by the LaTeX listings package */
|
|
pre.src-ada:before { content: 'Ada'; }
|
|
pre.src-asm:before { content: 'Assembler'; }
|
|
pre.src-caml:before { content: 'Caml'; }
|
|
pre.src-delphi:before { content: 'Delphi'; }
|
|
pre.src-html:before { content: 'HTML'; }
|
|
pre.src-idl:before { content: 'IDL'; }
|
|
pre.src-mercury:before { content: 'Mercury'; }
|
|
pre.src-metapost:before { content: 'MetaPost'; }
|
|
pre.src-modula-2:before { content: 'Modula-2'; }
|
|
pre.src-pascal:before { content: 'Pascal'; }
|
|
pre.src-ps:before { content: 'PostScript'; }
|
|
pre.src-prolog:before { content: 'Prolog'; }
|
|
pre.src-simula:before { content: 'Simula'; }
|
|
pre.src-tcl:before { content: 'tcl'; }
|
|
pre.src-tex:before { content: 'TeX'; }
|
|
pre.src-plain-tex:before { content: 'Plain TeX'; }
|
|
pre.src-verilog:before { content: 'Verilog'; }
|
|
pre.src-vhdl:before { content: 'VHDL'; }
|
|
pre.src-xml:before { content: 'XML'; }
|
|
pre.src-nxml:before { content: 'XML'; }
|
|
/* add a generic configuration mode; LaTeX export needs an additional
|
|
(add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
|
|
pre.src-conf:before { content: 'Configuration File'; }
|
|
|
|
table { border-collapse:collapse; }
|
|
caption.t-above { caption-side: top; }
|
|
caption.t-bottom { caption-side: bottom; }
|
|
td, th { vertical-align:top; }
|
|
th.org-right { text-align: center; }
|
|
th.org-left { text-align: center; }
|
|
th.org-center { text-align: center; }
|
|
td.org-right { text-align: right; }
|
|
td.org-left { text-align: left; }
|
|
td.org-center { text-align: center; }
|
|
dt { font-weight: bold; }
|
|
.footpara { display: inline; }
|
|
.footdef { margin-bottom: 1em; }
|
|
.figure { padding: 1em; }
|
|
.figure p { text-align: center; }
|
|
.equation-container {
|
|
display: table;
|
|
text-align: center;
|
|
width: 100%;
|
|
}
|
|
.equation {
|
|
vertical-align: middle;
|
|
}
|
|
.equation-label {
|
|
display: table-cell;
|
|
text-align: right;
|
|
vertical-align: middle;
|
|
}
|
|
.inlinetask {
|
|
padding: 10px;
|
|
border: 2px solid gray;
|
|
margin: 10px;
|
|
background: #ffffcc;
|
|
}
|
|
#org-div-home-and-up
|
|
{ text-align: right; font-size: 70%; white-space: nowrap; }
|
|
textarea { overflow-x: auto; }
|
|
.linenr { font-size: smaller }
|
|
.code-highlighted { background-color: #ffff00; }
|
|
.org-info-js_info-navigation { border-style: none; }
|
|
#org-info-js_console-label
|
|
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
|
|
.org-info-js_search-highlight
|
|
{ background-color: #ffff00; color: #000000; font-weight: bold; }
|
|
.org-svg { width: 90%; }
|
|
/*]]>*/-->
|
|
</style>
|
|
<link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
|
|
|
|
<script type="text/javascript" src="org-info.js">
|
|
/**
|
|
*
|
|
* @source: org-info.js
|
|
*
|
|
* @licstart The following is the entire license notice for the
|
|
* JavaScript code in org-info.js.
|
|
*
|
|
* Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
*
|
|
*
|
|
* The JavaScript code in this tag is free software: you can
|
|
* redistribute it and/or modify it under the terms of the GNU
|
|
* General Public License (GNU GPL) as published by the Free Software
|
|
* Foundation, either version 3 of the License, or (at your option)
|
|
* any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
*
|
|
* As additional permission under GNU GPL version 3 section 7, you
|
|
* may distribute non-source (e.g., minimized or compacted) forms of
|
|
* that code without the copy of the GNU GPL normally required by
|
|
* section 4, provided you include this license notice and a URL
|
|
* through which recipients can access the Corresponding Source.
|
|
*
|
|
* @licend The above is the entire license notice
|
|
* for the JavaScript code in org-info.js.
|
|
*
|
|
*/
|
|
</script>
|
|
|
|
<script type="text/javascript">
|
|
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
org_html_manager.set("TOC_DEPTH", "4");
|
|
org_html_manager.set("LINK_HOME", "index.html");
|
|
org_html_manager.set("LINK_UP", "");
|
|
org_html_manager.set("LOCAL_TOC", "1");
|
|
org_html_manager.set("VIEW_BUTTONS", "0");
|
|
org_html_manager.set("MOUSE_HINT", "underline");
|
|
org_html_manager.set("FIXED_TOC", "0");
|
|
org_html_manager.set("TOC", "1");
|
|
org_html_manager.set("VIEW", "info");
|
|
org_html_manager.setup(); // activate after the parameters are set
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/javascript">
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
function CodeHighlightOn(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(null != target) {
|
|
elem.cacheClassElem = elem.className;
|
|
elem.cacheClassTarget = target.className;
|
|
target.className = "code-highlighted";
|
|
elem.className = "code-highlighted";
|
|
}
|
|
}
|
|
function CodeHighlightOff(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(elem.cacheClassElem)
|
|
elem.className = elem.cacheClassElem;
|
|
if(elem.cacheClassTarget)
|
|
target.className = elem.cacheClassTarget;
|
|
}
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/x-mathjax-config">
|
|
MathJax.Hub.Config({
|
|
displayAlign: "center",
|
|
displayIndent: "0em",
|
|
|
|
"HTML-CSS": { scale: 100,
|
|
linebreaks: { automatic: "false" },
|
|
webFont: "TeX"
|
|
},
|
|
SVG: {scale: 100,
|
|
linebreaks: { automatic: "false" },
|
|
font: "TeX"},
|
|
NativeMML: {scale: 100},
|
|
TeX: { equationNumbers: {autoNumber: "AMS"},
|
|
MultLineWidth: "85%",
|
|
TagSide: "right",
|
|
TagIndent: ".8em"
|
|
}
|
|
});
|
|
</script>
|
|
<script type="text/javascript"
|
|
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_HTML"></script>
|
|
</head>
|
|
<body>
|
|
<div id="org-div-home-and-up">
|
|
<a accesskey="h" href=""> UP </a>
|
|
|
|
|
<a accesskey="H" href="index.html"> HOME </a>
|
|
</div><div id="content">
|
|
<h1 class="title">Sherman-Morrison-Woodbury</h1>
|
|
<div id="table-of-contents">
|
|
<h2>Table of Contents</h2>
|
|
<div id="text-table-of-contents">
|
|
<ul>
|
|
<li><a href="#org4acc4e8">1. Headers</a></li>
|
|
<li><a href="#org48acf0b">2. Naïve Sherman-Morrison</a>
|
|
<ul>
|
|
<li><a href="#org3be8ad6">2.1. <code>qmckl_sherman_morrison_naive</code></a>
|
|
<ul>
|
|
<li><a href="#org1e7c2fb">2.1.1. Requirements</a></li>
|
|
<li><a href="#org03add53">2.1.2. C header</a></li>
|
|
<li><a href="#orgd3ba045">2.1.3. Performance</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org90ecc0a">3. Woodbury 2x2</a>
|
|
<ul>
|
|
<li><a href="#org3941fc2">3.1. <code>qmckl_woodbury_2x2</code></a>
|
|
<ul>
|
|
<li><a href="#orgf630e48">3.1.1. Requirements</a></li>
|
|
<li><a href="#org8e6d252">3.1.2. C header</a></li>
|
|
<li><a href="#org7c7e324">3.1.3. C source</a></li>
|
|
<li><a href="#org7bb60b6">3.1.4. Performance</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org30fa431">4. Woodbury 3x3</a>
|
|
<ul>
|
|
<li><a href="#org651ef38">4.1. <code>qmckl_woodbury_3x3</code></a>
|
|
<ul>
|
|
<li><a href="#org6dd14e5">4.1.1. Requirements</a></li>
|
|
<li><a href="#org23c0058">4.1.2. C header</a></li>
|
|
<li><a href="#org2c0a917">4.1.3. C source</a></li>
|
|
<li><a href="#orgb9b802e">4.1.4. Performance…</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orgcab48ae">5. Sherman-Morrison with update splitting</a>
|
|
<ul>
|
|
<li><a href="#org8b56fa0">5.1. <code>qmckl_sherman_morrison_splitting</code></a>
|
|
<ul>
|
|
<li><a href="#org497be13">5.1.1. Requirements</a></li>
|
|
<li><a href="#orgf7014af">5.1.2. C header</a></li>
|
|
<li><a href="#orgb2e07aa">5.1.3. C source</a></li>
|
|
<li><a href="#org06e3e4f">5.1.4. Performance…</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org4ae3319">6. Woodbury 3x3 and 2x2 with Sherman-Morrison and update splitting</a>
|
|
<ul>
|
|
<li><a href="#org74035f3">6.1. <code>qmckl_sherman_morrison_smw32s</code></a>
|
|
<ul>
|
|
<li><a href="#orgf4a0dec">6.1.1. Requirements</a></li>
|
|
<li><a href="#org3ea8837">6.1.2. C header</a></li>
|
|
<li><a href="#org89b8220">6.1.3. C source</a></li>
|
|
<li><a href="#orgba7b003">6.1.4. Performance…</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org1bf2a57">7. Helper Functions</a>
|
|
<ul>
|
|
<li><a href="#org199a6ae">7.1. <code>qmckl_slagel_splitting</code></a>
|
|
<ul>
|
|
<li><a href="#org43dc581">7.1.1. Requirements</a></li>
|
|
<li><a href="#orgc4613cf">7.1.2. C header</a></li>
|
|
<li><a href="#org48b08bd">7.1.3. C source</a></li>
|
|
<li><a href="#org02dd595">7.1.4. Performance</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orgaac1d31">8. End of files</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org4acc4e8" class="outline-2">
|
|
<h2 id="org4acc4e8"><span class="section-number-2">1</span> Headers</h2>
|
|
<div class="outline-text-2" id="text-1">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
|
|
<span style="color: #483d8b;">#ifdef</span> HAVE_CONFIG_H
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"config.h"</span>
|
|
<span style="color: #483d8b;">#endif</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><math.h></span>
|
|
|
|
<span style="color: #228b22;">int</span> <span style="color: #0000ff;">main</span>() {
|
|
<span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>;
|
|
context = qmckl_context_create();
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org48acf0b" class="outline-2">
|
|
<h2 id="org48acf0b"><span class="section-number-2">2</span> Naïve Sherman-Morrison</h2>
|
|
<div class="outline-text-2" id="text-2">
|
|
</div>
|
|
<div id="outline-container-org3be8ad6" class="outline-3">
|
|
<h3 id="org3be8ad6"><span class="section-number-3">2.1</span> <code>qmckl_sherman_morrison_naive</code></h3>
|
|
<div class="outline-text-3" id="text-2-1">
|
|
<p>
|
|
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
|
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
|
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
|
</p>
|
|
|
|
<p>
|
|
The formula for any update \(u_j\) (index \(j\) is suppresed for clarity) that is applied is
|
|
\[
|
|
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
|
|
\]
|
|
</p>
|
|
|
|
<p>
|
|
where
|
|
\(S\) is the Slater-matrix,
|
|
\(u\) and \(v^T\) are the column and row vectors containing the updates,
|
|
\(S^{-1}\) is the inverse of the Slater-matrix.
|
|
</p>
|
|
|
|
<p>
|
|
Even though the Slater-matrix \(S\) with all updates applied at once is invertable, during the course of applying
|
|
updates to the inverse Slater-matrix \(S^{-1}\) one-by-one it can happen that one of the intermediate inverse
|
|
matrices \(S^{-1}\) becomes singular. Therefore a global threshold value \(\epsilon\) is defined that is used to
|
|
evaluate each individual update \(u_j\) when it is applied.
|
|
</p>
|
|
|
|
<p>
|
|
This value sets the lower bound for which the
|
|
denominator \(1+v_j^TS^{-1}u_j\) is considered to be too small and will most probably result in a singular matrix
|
|
\(S\), or at least in an inverse of \(S\) of very poor numerical quality. Therefore, when \(1+v_j^TS^{-1}u_j \geq \epsilon\),
|
|
the update is applied as usual and the kernel exits with return code \texttt{QMCKL_SUCCESS}.
|
|
If \(1+v_j^TS^{-1}u_j \leq \epsilon\) the update is rejected and the kernel exits with return code \texttt{QMCKL_FAILURE}.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
|
|
<table id="org71ec673" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">N<sub>updates</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Updates[N<sub>updates</sub>*Dim]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Updates<sub>index</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Slater<sub>inv</sub>[LDS*Dim]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double*</td>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
<div id="outline-container-org1e7c2fb" class="outline-4">
|
|
<h4 id="org1e7c2fb"><span class="section-number-4">2.1.1</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-2-1-1">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org03add53" class="outline-4">
|
|
<h4 id="org03add53"><span class="section-number-4">2.1.2</span> C header</h4>
|
|
<div class="outline-text-4" id="text-2-1-2">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">context</span> <span style="color: #a0522d;">qmckl_context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">LDS</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Dim</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">N_updates</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">breakdown</span> <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">Slater_inv</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">determinant</span>* <span style="color: #228b22;">double</span>* );
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><stdbool.h></span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><math.h></span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"config.h"</span>
|
|
|
|
// <span style="color: #b22222;">Order important because</span>
|
|
// <span style="color: #b22222;">__GNUC__ also set in ICC, ICX and CLANG</span>
|
|
// <span style="color: #b22222;">__clang__ also set in ICX</span>
|
|
<span style="color: #483d8b;">#if</span> <span style="color: #483d8b;">defined</span>(__INTEL_COMPILER)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"ivdep"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span> _Pragma(<span style="color: #8b2252;">"vector aligned"</span>)
|
|
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__INTEL_LLVM_COMPILER)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"ivdep"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span> _Pragma(<span style="color: #8b2252;">"vector aligned"</span>)
|
|
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__clang__)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"clang loop vectorize(enable)"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span>
|
|
<span style="color: #483d8b;">#elif</span> <span style="color: #483d8b;">defined</span>(__GNUC__)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">IVDEP</span> _Pragma(<span style="color: #8b2252;">"GCC ivdep"</span>)
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">ALIGNED</span>
|
|
<span style="color: #483d8b;">#endif</span>
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[Dim];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x u_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator: v_l^T * C</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org49148ce"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sherman_morrison_naive_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D</span>{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[{Dim}];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D2_P</span> ((1+(2-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
D[j] = Slater_inv[cui * D2_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D2_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D3_P</span> ((1+(3-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
D[j] = Slater_inv[cui * D3_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D3_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D4_P</span> ((1+(4-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[4];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
D[j] = Slater_inv[cui * D4_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D4_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D5_P</span> ((1+(5-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[5];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
D[j] = Slater_inv[cui * D5_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D5_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D6_P</span> ((1+(6-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[6];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
D[j] = Slater_inv[cui * D6_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D6_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D7_P</span> ((1+(7-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[7];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
D[j] = Slater_inv[cui * D7_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D7_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D8_P</span> ((1+(8-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[8];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
D[j] = Slater_inv[cui * D8_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D8_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D9_P</span> ((1+(9-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[9];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
D[j] = Slater_inv[cui * D9_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D9_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D10_P</span> ((1+(10-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[10];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
D[j] = Slater_inv[cui * D10_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D10_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D11_P</span> ((1+(11-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[11];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
D[j] = Slater_inv[cui * D11_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D11_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D12_P</span> ((1+(12-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[12];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
D[j] = Slater_inv[cui * D12_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D12_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D13_P</span> ((1+(13-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[13];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
D[j] = Slater_inv[cui * D13_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D13_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D14_P</span> ((1+(14-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[14];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
D[j] = Slater_inv[cui * D14_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D14_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D15_P</span> ((1+(15-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[15];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
D[j] = Slater_inv[cui * D15_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D15_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D16_P</span> ((1+(16-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[16];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
D[j] = Slater_inv[cui * D16_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D16_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D17_P</span> ((1+(17-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[17];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
D[j] = Slater_inv[cui * D17_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D17_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D18_P</span> ((1+(18-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[18];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
D[j] = Slater_inv[cui * D18_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D18_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D19_P</span> ((1+(19-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[19];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
D[j] = Slater_inv[cui * D19_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D19_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D20_P</span> ((1+(20-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[20];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
D[j] = Slater_inv[cui * D20_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D20_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D21_P</span> ((1+(21-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[21];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
D[j] = Slater_inv[cui * D21_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D21_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_naive</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_naive"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_2(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_3(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_4(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_5(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_6(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_7(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_8(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_9(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_10(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_11(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_12(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_13(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_14(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_15(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_16(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_17(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_18(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_19(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_20(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_21(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sherman_morrison_naive_hpc(context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<div id="outline-container-orgd3ba045" class="outline-4">
|
|
<h4 id="orgd3ba045"><span class="section-number-4">2.1.3</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-2-1-3">
|
|
<p>
|
|
This function performs best when there is only 1 rank-1 update in the update cycle. It is not useful to
|
|
use Sherman-Morrison with update splitting for these cycles since splitting can never resolve a situation
|
|
where applying the update causes singular behaviour.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org90ecc0a" class="outline-2">
|
|
<h2 id="org90ecc0a"><span class="section-number-2">3</span> Woodbury 2x2</h2>
|
|
<div class="outline-text-2" id="text-3">
|
|
</div>
|
|
<div id="outline-container-org3941fc2" class="outline-3">
|
|
<h3 id="org3941fc2"><span class="section-number-3">3.1</span> <code>qmckl_woodbury_2x2</code></h3>
|
|
<div class="outline-text-3" id="text-3-1">
|
|
<p>
|
|
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
|
this algorithm is called the Woodbury Matrix Identity
|
|
\[
|
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
|
\]
|
|
where
|
|
\(S\) is the Slater-matrix
|
|
\(U\) and \(V\) are the matrices containing the updates and the canonical basis matrix
|
|
\(S^{-1}\) is the inverse of the Slater-matrix
|
|
\(C:= S^{-1}U\), a Dim \(\times 2\) matrix
|
|
\(B := 1 + VC\), the \(2 \times 2\) matrix that is going to be inverted
|
|
\(D := VS^{-1}\), a \(2 \times Dim\) matrix
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
|
|
|
|
|
|
<table id="orgab2e2a2" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Updates[2*Dim]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Updates<sub>index</sub>[2]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Slater<sub>inv</sub>[LDS*Dim]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double*</td>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
<div id="outline-container-orgf630e48" class="outline-4">
|
|
<h4 id="orgf630e48"><span class="section-number-4">3.1.1</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-3-1-1">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>qmckl_null_context</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>Updates</code> is allocated with \(2 \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(2\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org8e6d252" class="outline-4">
|
|
<h4 id="org8e6d252"><span class="section-number-4">3.1.2</span> C header</h4>
|
|
<div class="outline-text-4" id="text-3-1-2">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">context</span> <span style="color: #a0522d;">qmckl_context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">LDS</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Dim</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">breakdown</span> <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">Slater_inv</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">determinant</span>* <span style="color: #228b22;">double</span>* );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org7c7e324" class="outline-4">
|
|
<h4 id="org7c7e324"><span class="section-number-4">3.1.3</span> C source</h4>
|
|
<div class="outline-text-4" id="text-3-1-3">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_hpc</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : Dim x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * Dim];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < LDS; k++) {
|
|
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x LDS</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * LDS];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * LDS]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * LDS]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : Dim x LDS</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="orgf70602e"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_woodbury_2x2_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : {Dim} x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * {Dim}];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D{Dim}_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D{Dim}_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D{Dim}_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D{Dim}_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 2 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 2];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D2_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D2_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D2_P + k] * Updates[D2_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D2_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D2_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D2_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D2_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D2_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 2 x D2_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
Slater_inv[i * D2_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D2_P + j] -= C[i * 2 + 1] * tmp[D2_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 3 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 3];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D3_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D3_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D3_P + k] * Updates[D3_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D3_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D3_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D3_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D3_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D3_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 3 x D3_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
Slater_inv[i * D3_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D3_P + j] -= C[i * 2 + 1] * tmp[D3_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 4 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 4];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D4_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D4_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D4_P + k] * Updates[D4_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D4_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D4_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D4_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D4_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D4_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 4 x D4_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
Slater_inv[i * D4_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D4_P + j] -= C[i * 2 + 1] * tmp[D4_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 5 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 5];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D5_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D5_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D5_P + k] * Updates[D5_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D5_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D5_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D5_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D5_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D5_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 5 x D5_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
Slater_inv[i * D5_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D5_P + j] -= C[i * 2 + 1] * tmp[D5_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 6 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 6];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D6_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D6_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D6_P + k] * Updates[D6_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D6_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D6_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D6_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D6_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D6_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 6 x D6_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
Slater_inv[i * D6_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D6_P + j] -= C[i * 2 + 1] * tmp[D6_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 7 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 7];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D7_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D7_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D7_P + k] * Updates[D7_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D7_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D7_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D7_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D7_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D7_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 7 x D7_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
Slater_inv[i * D7_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D7_P + j] -= C[i * 2 + 1] * tmp[D7_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 8 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 8];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D8_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D8_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D8_P + k] * Updates[D8_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D8_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D8_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D8_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D8_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D8_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 8 x D8_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
Slater_inv[i * D8_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D8_P + j] -= C[i * 2 + 1] * tmp[D8_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 9 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 9];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D9_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D9_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D9_P + k] * Updates[D9_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D9_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D9_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D9_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D9_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D9_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 9 x D9_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
Slater_inv[i * D9_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D9_P + j] -= C[i * 2 + 1] * tmp[D9_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 10 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 10];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D10_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D10_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D10_P + k] * Updates[D10_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D10_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D10_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D10_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D10_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D10_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 10 x D10_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
Slater_inv[i * D10_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D10_P + j] -= C[i * 2 + 1] * tmp[D10_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 11 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 11];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D11_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D11_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D11_P + k] * Updates[D11_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D11_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D11_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D11_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D11_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D11_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 11 x D11_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
Slater_inv[i * D11_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D11_P + j] -= C[i * 2 + 1] * tmp[D11_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 12 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 12];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D12_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D12_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D12_P + k] * Updates[D12_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D12_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D12_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D12_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D12_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D12_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 12 x D12_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
Slater_inv[i * D12_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D12_P + j] -= C[i * 2 + 1] * tmp[D12_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 13 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 13];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D13_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D13_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D13_P + k] * Updates[D13_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D13_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D13_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D13_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D13_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D13_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 13 x D13_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
Slater_inv[i * D13_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D13_P + j] -= C[i * 2 + 1] * tmp[D13_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 14 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 14];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D14_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D14_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D14_P + k] * Updates[D14_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D14_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D14_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D14_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D14_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D14_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 14 x D14_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
Slater_inv[i * D14_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D14_P + j] -= C[i * 2 + 1] * tmp[D14_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 15 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 15];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D15_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D15_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D15_P + k] * Updates[D15_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D15_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D15_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D15_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D15_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D15_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 15 x D15_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
Slater_inv[i * D15_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D15_P + j] -= C[i * 2 + 1] * tmp[D15_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 16 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 16];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D16_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D16_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D16_P + k] * Updates[D16_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D16_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D16_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D16_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D16_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D16_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 16 x D16_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
Slater_inv[i * D16_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D16_P + j] -= C[i * 2 + 1] * tmp[D16_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 17 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 17];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D17_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D17_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D17_P + k] * Updates[D17_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D17_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D17_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D17_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D17_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D17_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 17 x D17_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
Slater_inv[i * D17_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D17_P + j] -= C[i * 2 + 1] * tmp[D17_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 18 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 18];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D18_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D18_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D18_P + k] * Updates[D18_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D18_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D18_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D18_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D18_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D18_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 18 x D18_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
Slater_inv[i * D18_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D18_P + j] -= C[i * 2 + 1] * tmp[D18_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 19 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 19];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D19_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D19_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D19_P + k] * Updates[D19_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D19_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D19_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D19_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D19_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D19_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 19 x D19_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
Slater_inv[i * D19_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D19_P + j] -= C[i * 2 + 1] * tmp[D19_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 20 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 20];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D20_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D20_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D20_P + k] * Updates[D20_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D20_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D20_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D20_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D20_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D20_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 20 x D20_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
Slater_inv[i * D20_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D20_P + j] -= C[i * 2 + 1] * tmp[D20_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 21 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 21];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D21_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D21_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D21_P + k] * Updates[D21_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D21_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D21_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D21_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D21_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D21_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 21 x D21_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
Slater_inv[i * D21_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D21_P + j] -= C[i * 2 + 1] * tmp[D21_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_2(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_3(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_4(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_5(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_6(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_7(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_8(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_9(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_10(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_11(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_12(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_13(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_14(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_15(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_16(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_17(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_18(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_19(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_20(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_21(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_hpc(context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
|
|
<div id="outline-container-org7bb60b6" class="outline-4">
|
|
<h4 id="org7bb60b6"><span class="section-number-4">3.1.4</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-3-1-4">
|
|
<p>
|
|
This function is most efficient when used in cases where there are only 2 rank-1 updates and
|
|
it is sure they will not result in a singular matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org30fa431" class="outline-2">
|
|
<h2 id="org30fa431"><span class="section-number-2">4</span> Woodbury 3x3</h2>
|
|
<div class="outline-text-2" id="text-4">
|
|
</div>
|
|
<div id="outline-container-org651ef38" class="outline-3">
|
|
<h3 id="org651ef38"><span class="section-number-3">4.1</span> <code>qmckl_woodbury_3x3</code></h3>
|
|
<div class="outline-text-3" id="text-4-1">
|
|
<p>
|
|
The 3x3 version of the Woodbury 2x2 kernel. It is used to apply three
|
|
rank-1 updates at once. The formula used in this kernel is the same as for Woodbury 2x2,
|
|
except for the sizes of the following matrices:
|
|
</p>
|
|
|
|
<p>
|
|
\(C:= S^{-1}U\), a Dim \(\times 3\) matrix
|
|
\(B := 1 + VC\), the \(3 \times 3\) matrix that is going to be inverted
|
|
\(D := VS^{-1}\), a \(3 \times Dim\) matrix
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
#pragma ivdep
|
|
#pragma vector aligned
|
|
</p>
|
|
|
|
<table id="org15a40c5" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Updates[3*Dim]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Updates<sub>index</sub>[3]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Slater<sub>inv</sub>[LDS*Dim]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double*</td>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
<div id="outline-container-org6dd14e5" class="outline-4">
|
|
<h4 id="org6dd14e5"><span class="section-number-4">4.1.1</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-4-1-1">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>qmckl_null_context</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>Updates</code> is allocated with \(3 \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(3\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org23c0058" class="outline-4">
|
|
<h4 id="org23c0058"><span class="section-number-4">4.1.2</span> C header</h4>
|
|
<div class="outline-text-4" id="text-4-1-2">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">context</span> <span style="color: #a0522d;">qmckl_context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">LDS</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Dim</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">breakdown</span> <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">Slater_inv</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">determinant</span>* <span style="color: #228b22;">double</span>* );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org2c0a917" class="outline-4">
|
|
<h4 id="org2c0a917"><span class="section-number-4">4.1.3</span> C source</h4>
|
|
<div class="outline-text-4" id="text-4-1-3">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_hpc</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : Dim x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * Dim];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < LDS; k++) {
|
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x LDS</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * LDS];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * LDS]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * LDS]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * LDS]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[LDS + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * LDS + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : Dim x LDS</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org2e13b76"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_woodbury_3x3_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : {Dim} x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * {Dim}];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D{Dim}_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D{Dim}_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D{Dim}_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D{Dim}_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D{Dim}_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D{Dim}_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D{Dim}_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 2 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 2];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D2_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D2_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D2_P + k] * Updates[D2_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D2_P + k] * Updates[2 * D2_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D2_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D2_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D2_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D2_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D2_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D2_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D2_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 2 x D2_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
Slater_inv[i * D2_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D2_P + j] -= C[i * 3 + 1] * tmp[D2_P + j];
|
|
Slater_inv[i * D2_P + j] -= C[i * 3 + 2] * tmp[2 * D2_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 3 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 3];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D3_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D3_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D3_P + k] * Updates[D3_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D3_P + k] * Updates[2 * D3_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D3_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D3_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D3_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D3_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D3_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D3_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D3_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 3 x D3_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
Slater_inv[i * D3_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D3_P + j] -= C[i * 3 + 1] * tmp[D3_P + j];
|
|
Slater_inv[i * D3_P + j] -= C[i * 3 + 2] * tmp[2 * D3_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 4 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 4];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D4_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D4_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D4_P + k] * Updates[D4_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D4_P + k] * Updates[2 * D4_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D4_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D4_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D4_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D4_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D4_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D4_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D4_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 4 x D4_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
Slater_inv[i * D4_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D4_P + j] -= C[i * 3 + 1] * tmp[D4_P + j];
|
|
Slater_inv[i * D4_P + j] -= C[i * 3 + 2] * tmp[2 * D4_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 5 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 5];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D5_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D5_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D5_P + k] * Updates[D5_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D5_P + k] * Updates[2 * D5_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D5_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D5_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D5_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D5_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D5_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D5_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D5_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 5 x D5_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
Slater_inv[i * D5_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D5_P + j] -= C[i * 3 + 1] * tmp[D5_P + j];
|
|
Slater_inv[i * D5_P + j] -= C[i * 3 + 2] * tmp[2 * D5_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 6 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 6];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D6_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D6_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D6_P + k] * Updates[D6_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D6_P + k] * Updates[2 * D6_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D6_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D6_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D6_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D6_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D6_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D6_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D6_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 6 x D6_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
Slater_inv[i * D6_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D6_P + j] -= C[i * 3 + 1] * tmp[D6_P + j];
|
|
Slater_inv[i * D6_P + j] -= C[i * 3 + 2] * tmp[2 * D6_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 7 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 7];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D7_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D7_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D7_P + k] * Updates[D7_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D7_P + k] * Updates[2 * D7_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D7_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D7_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D7_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D7_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D7_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D7_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D7_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 7 x D7_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
Slater_inv[i * D7_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D7_P + j] -= C[i * 3 + 1] * tmp[D7_P + j];
|
|
Slater_inv[i * D7_P + j] -= C[i * 3 + 2] * tmp[2 * D7_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 8 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 8];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D8_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D8_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D8_P + k] * Updates[D8_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D8_P + k] * Updates[2 * D8_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D8_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D8_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D8_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D8_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D8_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D8_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D8_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 8 x D8_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
Slater_inv[i * D8_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D8_P + j] -= C[i * 3 + 1] * tmp[D8_P + j];
|
|
Slater_inv[i * D8_P + j] -= C[i * 3 + 2] * tmp[2 * D8_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 9 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 9];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D9_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D9_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D9_P + k] * Updates[D9_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D9_P + k] * Updates[2 * D9_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D9_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D9_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D9_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D9_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D9_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D9_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D9_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 9 x D9_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
Slater_inv[i * D9_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D9_P + j] -= C[i * 3 + 1] * tmp[D9_P + j];
|
|
Slater_inv[i * D9_P + j] -= C[i * 3 + 2] * tmp[2 * D9_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 10 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 10];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D10_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D10_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D10_P + k] * Updates[D10_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D10_P + k] * Updates[2 * D10_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D10_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D10_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D10_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D10_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D10_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D10_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D10_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 10 x D10_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
Slater_inv[i * D10_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D10_P + j] -= C[i * 3 + 1] * tmp[D10_P + j];
|
|
Slater_inv[i * D10_P + j] -= C[i * 3 + 2] * tmp[2 * D10_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 11 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 11];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D11_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D11_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D11_P + k] * Updates[D11_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D11_P + k] * Updates[2 * D11_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D11_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D11_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D11_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D11_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D11_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D11_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D11_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 11 x D11_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
Slater_inv[i * D11_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D11_P + j] -= C[i * 3 + 1] * tmp[D11_P + j];
|
|
Slater_inv[i * D11_P + j] -= C[i * 3 + 2] * tmp[2 * D11_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 12 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 12];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D12_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D12_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D12_P + k] * Updates[D12_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D12_P + k] * Updates[2 * D12_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D12_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D12_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D12_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D12_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D12_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D12_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D12_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 12 x D12_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
Slater_inv[i * D12_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D12_P + j] -= C[i * 3 + 1] * tmp[D12_P + j];
|
|
Slater_inv[i * D12_P + j] -= C[i * 3 + 2] * tmp[2 * D12_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 13 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 13];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D13_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D13_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D13_P + k] * Updates[D13_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D13_P + k] * Updates[2 * D13_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D13_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D13_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D13_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D13_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D13_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D13_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D13_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 13 x D13_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
Slater_inv[i * D13_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D13_P + j] -= C[i * 3 + 1] * tmp[D13_P + j];
|
|
Slater_inv[i * D13_P + j] -= C[i * 3 + 2] * tmp[2 * D13_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 14 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 14];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D14_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D14_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D14_P + k] * Updates[D14_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D14_P + k] * Updates[2 * D14_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D14_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D14_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D14_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D14_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D14_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D14_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D14_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 14 x D14_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
Slater_inv[i * D14_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D14_P + j] -= C[i * 3 + 1] * tmp[D14_P + j];
|
|
Slater_inv[i * D14_P + j] -= C[i * 3 + 2] * tmp[2 * D14_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 15 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 15];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D15_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D15_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D15_P + k] * Updates[D15_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D15_P + k] * Updates[2 * D15_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D15_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D15_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D15_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D15_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D15_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D15_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D15_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 15 x D15_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
Slater_inv[i * D15_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D15_P + j] -= C[i * 3 + 1] * tmp[D15_P + j];
|
|
Slater_inv[i * D15_P + j] -= C[i * 3 + 2] * tmp[2 * D15_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 16 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 16];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D16_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D16_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D16_P + k] * Updates[D16_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D16_P + k] * Updates[2 * D16_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D16_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D16_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D16_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D16_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D16_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D16_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D16_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 16 x D16_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
Slater_inv[i * D16_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D16_P + j] -= C[i * 3 + 1] * tmp[D16_P + j];
|
|
Slater_inv[i * D16_P + j] -= C[i * 3 + 2] * tmp[2 * D16_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 17 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 17];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D17_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D17_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D17_P + k] * Updates[D17_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D17_P + k] * Updates[2 * D17_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D17_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D17_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D17_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D17_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D17_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D17_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D17_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 17 x D17_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
Slater_inv[i * D17_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D17_P + j] -= C[i * 3 + 1] * tmp[D17_P + j];
|
|
Slater_inv[i * D17_P + j] -= C[i * 3 + 2] * tmp[2 * D17_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 18 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 18];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D18_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D18_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D18_P + k] * Updates[D18_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D18_P + k] * Updates[2 * D18_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D18_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D18_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D18_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D18_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D18_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D18_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D18_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 18 x D18_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
Slater_inv[i * D18_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D18_P + j] -= C[i * 3 + 1] * tmp[D18_P + j];
|
|
Slater_inv[i * D18_P + j] -= C[i * 3 + 2] * tmp[2 * D18_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 19 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 19];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D19_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D19_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D19_P + k] * Updates[D19_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D19_P + k] * Updates[2 * D19_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D19_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D19_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D19_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D19_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D19_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D19_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D19_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 19 x D19_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
Slater_inv[i * D19_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D19_P + j] -= C[i * 3 + 1] * tmp[D19_P + j];
|
|
Slater_inv[i * D19_P + j] -= C[i * 3 + 2] * tmp[2 * D19_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 20 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 20];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D20_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D20_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D20_P + k] * Updates[D20_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D20_P + k] * Updates[2 * D20_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D20_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D20_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D20_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D20_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D20_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D20_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D20_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 20 x D20_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
Slater_inv[i * D20_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D20_P + j] -= C[i * 3 + 1] * tmp[D20_P + j];
|
|
Slater_inv[i * D20_P + j] -= C[i * 3 + 2] * tmp[2 * D20_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 21 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 21];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D21_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D21_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D21_P + k] * Updates[D21_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D21_P + k] * Updates[2 * D21_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D21_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D21_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D21_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D21_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D21_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D21_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D21_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 21 x D21_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
Slater_inv[i * D21_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D21_P + j] -= C[i * 3 + 1] * tmp[D21_P + j];
|
|
Slater_inv[i * D21_P + j] -= C[i * 3 + 2] * tmp[2 * D21_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_2(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_3(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_4(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_5(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_6(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_7(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_8(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_9(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_10(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_11(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_12(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_13(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_14(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_15(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_16(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_17(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_18(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_19(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_20(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_21(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_hpc(context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div id="outline-container-orgb9b802e" class="outline-4">
|
|
<h4 id="orgb9b802e"><span class="section-number-4">4.1.4</span> Performance…</h4>
|
|
<div class="outline-text-4" id="text-4-1-4">
|
|
<p>
|
|
This function is most efficient when used in cases where there are only 3 rank-1 updates and
|
|
it is sure they will not result in a singular matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgcab48ae" class="outline-2">
|
|
<h2 id="orgcab48ae"><span class="section-number-2">5</span> Sherman-Morrison with update splitting</h2>
|
|
<div class="outline-text-2" id="text-5">
|
|
</div>
|
|
<div id="outline-container-org8b56fa0" class="outline-3">
|
|
<h3 id="org8b56fa0"><span class="section-number-3">5.1</span> <code>qmckl_sherman_morrison_splitting</code></h3>
|
|
<div class="outline-text-3" id="text-5-1">
|
|
<p>
|
|
This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator \(1+v_j^T S^{-1} u_j\) in
|
|
the Sherman-Morrison formula is deemed to be too close to zero, the update \(u_j\) is split in half:
|
|
\(u_j \rightarrow \frac{1}{2} u_j\). One half is applied immediately –necessarily increasing the value of the
|
|
denominator because of the split– while the other halve is put in a queue that will be applied when all the
|
|
remaining updates have been treated.
|
|
</p>
|
|
|
|
<p>
|
|
The kernel is executed recursively until the queue is eiter empty and all
|
|
updates are applied successfully, or the size of the queue equals the number of initial updates. In the last
|
|
case the Slater-matrix that would have resulted from applying the updates is singular and therefore the
|
|
kernel exits with an exit code.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
|
|
<table id="orgf9e9c92" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">N<sub>updates</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Updates[N<sub>updates</sub>*Dim]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Updates<sub>index</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Slater<sub>inv</sub>[LDS*Dim]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double*</td>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-org497be13" class="outline-4">
|
|
<h4 id="org497be13"><span class="section-number-4">5.1.1</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-5-1-1">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgf7014af" class="outline-4">
|
|
<h4 id="orgf7014af"><span class="section-number-4">5.1.2</span> C header</h4>
|
|
<div class="outline-text-4" id="text-5-1-2">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_splitting</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">context</span> <span style="color: #a0522d;">qmckl_context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">LDS</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Dim</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">N_updates</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">breakdown</span> <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">Slater_inv</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">determinant</span>* <span style="color: #228b22;">double</span>* );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgb2e07aa" class="outline-4">
|
|
<h4 id="orgb2e07aa"><span class="section-number-4">5.1.3</span> C source</h4>
|
|
<div class="outline-text-4" id="text-5-1-3">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_splitting</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_splitting"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">later_updates</span>[LDS * N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later_index</span>[N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later</span> = 0;
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_slagel_splitting(
|
|
LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv,
|
|
later_updates, later_index, &later, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
|
|
<span style="color: #a020f0;">if</span> (later > 0) {
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_sherman_morrison_splitting(
|
|
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
|
Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org06e3e4f" class="outline-4">
|
|
<h4 id="org06e3e4f"><span class="section-number-4">5.1.4</span> Performance…</h4>
|
|
<div class="outline-text-4" id="text-5-1-4">
|
|
<p>
|
|
This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org4ae3319" class="outline-2">
|
|
<h2 id="org4ae3319"><span class="section-number-2">6</span> Woodbury 3x3 and 2x2 with Sherman-Morrison and update splitting</h2>
|
|
<div class="outline-text-2" id="text-6">
|
|
</div>
|
|
<div id="outline-container-org74035f3" class="outline-3">
|
|
<h3 id="org74035f3"><span class="section-number-3">6.1</span> <code>qmckl_sherman_morrison_smw32s</code></h3>
|
|
<div class="outline-text-3" id="text-6-1">
|
|
<p>
|
|
The Woodbury 3x3 and 2x2 kernel with Sherman-Morrison and update splitting combines the low-level Woodbury 3x3 kernel,
|
|
the Woobury 2x2 kernel and Sherman-Morrison with update splitting. It works the almost the same as Woodbury 3x3 with
|
|
Sherman-Morrison and update splitting, except that when there is a remainder of two rank-1 updates, it is first tried
|
|
with Woodbury 2x2 instead of sending them all to Sherman-Morrison with update splitting. For example, in the case of
|
|
5 updates the updates are applied in 1 block of 3 updates end 1 block of 2 updates.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
|
|
<table id="orgfe263fe" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">N<sub>updates</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Updates[N<sub>updates</sub>*Dim]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Updates<sub>index</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Slater<sub>inv</sub>[LDS*Dim]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double*</td>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-orgf4a0dec" class="outline-4">
|
|
<h4 id="orgf4a0dec"><span class="section-number-4">6.1.1</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-6-1-1">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org3ea8837" class="outline-4">
|
|
<h4 id="org3ea8837"><span class="section-number-4">6.1.2</span> C header</h4>
|
|
<div class="outline-text-4" id="text-6-1-2">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_smw32s</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">context</span> <span style="color: #a0522d;">qmckl_context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">LDS</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Dim</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">N_updates</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">breakdown</span> <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">Slater_inv</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">determinant</span>* <span style="color: #228b22;">double</span>* );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org89b8220" class="outline-4">
|
|
<h4 id="org89b8220"><span class="section-number-4">6.1.3</span> C source</h4>
|
|
<div class="outline-text-4" id="text-6-1-3">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sherman_morrison_smw32s</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sherman_morrison_smw32s"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">later_updates</span>[LDS * N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later_index</span>[N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later</span> = 0;
|
|
|
|
// <span style="color: #b22222;">Special case for 4 rank-1 updates: 2+2</span>
|
|
<span style="color: #a020f0;">if</span> (N_updates == 4) {
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> =
|
|
qmckl_woodbury_2x2(context, LDS, Dim, Updates, Updates_index,
|
|
breakdown, Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) { // <span style="color: #b22222;">Send the entire block to slagel_splitting</span>
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates, Updates_index,
|
|
breakdown, Slater_inv,
|
|
later_updates + (LDS * later),
|
|
later_index + later, &l, determinant);
|
|
later += l;
|
|
}
|
|
rc = qmckl_woodbury_2x2(context, LDS, Dim, &Updates[2 * LDS],
|
|
&Updates_index[2], breakdown, Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) { // <span style="color: #b22222;">Send the entire block to slagel_splitting</span>
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
rc = qmckl_slagel_splitting(
|
|
LDS, Dim, 2, &Updates[2 * LDS], &Updates_index[2], breakdown,
|
|
Slater_inv, later_updates + (LDS * later), later_index + later,
|
|
&l, determinant);
|
|
later += l;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (later > 0) {
|
|
rc = qmckl_sherman_morrison_splitting(
|
|
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
|
Slater_inv, determinant);
|
|
}
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
// <span style="color: #b22222;">And for the other cases != 4</span>
|
|
// <span style="color: #b22222;">Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates</span>
|
|
// <span style="color: #b22222;">with Woodbury 3x3 kernel</span>
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">n_of_3blocks</span> = N_updates / 3;
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">remainder</span> = N_updates % 3;
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">length_3block</span> = 3 * LDS;
|
|
|
|
<span style="color: #a020f0;">if</span> (n_of_3blocks > 0) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < n_of_3blocks; i++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates_3block</span> = &Updates[i * length_3block];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index_3block</span> = &Updates_index[i * 3];
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_woodbury_3x3(
|
|
context, LDS, Dim, Updates_3block, Updates_index_3block,
|
|
breakdown, Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) { // <span style="color: #b22222;">Send the entire block to slagel_splitting</span>
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
rc = qmckl_slagel_splitting(
|
|
LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
|
breakdown, Slater_inv, later_updates + (LDS * later),
|
|
later_index + later, &l, determinant);
|
|
later += l;
|
|
}
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Apply last remaining block of 2 updates with Woodbury 2x2 kernel</span>
|
|
<span style="color: #a020f0;">if</span> (remainder == 2) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates_2block</span> = &Updates[n_of_3blocks * length_3block];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index_2block</span> = &Updates_index[3 * n_of_3blocks];
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_woodbury_2x2(
|
|
context, LDS, Dim, Updates_2block, Updates_index_2block,
|
|
breakdown, Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) { // <span style="color: #b22222;">Send the entire block to slagel_splitting</span>
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
rc = qmckl_slagel_splitting(
|
|
LDS, Dim, 2, Updates_2block, Updates_index_2block, breakdown,
|
|
Slater_inv, later_updates + (LDS * later), later_index + later,
|
|
&l, determinant);
|
|
later += l;
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Apply last remaining update with slagel_splitting</span>
|
|
<span style="color: #a020f0;">if</span> (remainder == 1) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates_1block</span> = &Updates[n_of_3blocks * length_3block];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index_1block</span> = &Updates_index[3 * n_of_3blocks];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_slagel_splitting(
|
|
LDS, Dim, 1, Updates_1block, Updates_index_1block, breakdown,
|
|
Slater_inv, later_updates + (LDS * later), later_index + later, &l,
|
|
determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
later += l;
|
|
}
|
|
|
|
<span style="color: #a020f0;">if</span> (later > 0) {
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_sherman_morrison_splitting(
|
|
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
|
Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgba7b003" class="outline-4">
|
|
<h4 id="orgba7b003"><span class="section-number-4">6.1.4</span> Performance…</h4>
|
|
<div class="outline-text-4" id="text-6-1-4">
|
|
<p>
|
|
This kernel performs best for update cycles with 2 or more rank-1 updates and the fail-rate is low.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org1bf2a57" class="outline-2">
|
|
<h2 id="org1bf2a57"><span class="section-number-2">7</span> Helper Functions</h2>
|
|
<div class="outline-text-2" id="text-7">
|
|
<p>
|
|
Private helper-functions that are used by the Sherman-Morrison-Woodbury kernels.
|
|
These functions can only be used internally by the kernels in this module.
|
|
</p>
|
|
</div>
|
|
|
|
<div id="outline-container-org199a6ae" class="outline-3">
|
|
<h3 id="org199a6ae"><span class="section-number-3">7.1</span> <code>qmckl_slagel_splitting</code></h3>
|
|
<div class="outline-text-3" id="text-7-1">
|
|
<p>
|
|
<code>qmckl_slagel_splitting</code> is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel.
|
|
It is used internally to apply a collection of \(N\) rank-1 updates to the inverse Slater-matrix \(S^{-1}\) and
|
|
splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update,
|
|
while putting the second half in a waiting queue to be applied at the end.
|
|
</p>
|
|
|
|
<p>
|
|
Therefore, when \(1+v_j^TS^{-1}u_j \geq \epsilon\), the update is applied as usual. Otherwise, \(u_j\) will be redefined
|
|
as \(\frac{1}{2}u_j\). One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors
|
|
\(u_{j'}=\frac{1}{2}u_j\) and \(v_{j'}^T=v_{j}^T\), which are stored in the array \texttt{later_updates}.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
|
|
<table id="org84035e5" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">N<sub>updates</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Updates[N<sub>updates</sub>*Dim]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">Updates<sub>index</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing positions of the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">Slater<sub>inv</sub>[LDS*Dim]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">later<sub>updates</sub>[Dim * N<sub>updates</sub>]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">later<sub>index</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the positions of the split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">later</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Number of split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">double*</td>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-org43dc581" class="outline-4">
|
|
<h4 id="org43dc581"><span class="section-number-4">7.1.1</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-7-1-1">
|
|
<ul class="org-ul">
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
<li><code>later_updates</code> is allocated with \(later \times Dim\) elements</li>
|
|
<li><code>later_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>later >= 0</code></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgc4613cf" class="outline-4">
|
|
<h4 id="orgc4613cf"><span class="section-number-4">7.1.2</span> C header</h4>
|
|
<div class="outline-text-4" id="text-7-1-2">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">double</span> <span style="color: #0000ff;">qmckl_slagel_splitting</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">LDS</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Dim</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">N_updates</span> <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">Updates_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">breakdown</span> <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">Slater_inv</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">later_updates</span>* <span style="color: #228b22;">double</span>,
|
|
<span style="color: #228b22;">later_index</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #228b22;">later</span>* <span style="color: #a0522d;">uint64_t</span>,
|
|
<span style="color: #228b22;">determinant</span>* <span style="color: #228b22;">double</span>* );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org48b08bd" class="outline-4">
|
|
<h4 id="org48b08bd"><span class="section-number-4">7.1.3</span> C source</h4>
|
|
<div class="outline-text-4" id="text-7-1-3">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_hpc</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[LDS];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < LDS; i++) {
|
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x LDS</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org93bc812"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_slagel_splitting_</span>{Dim}(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D{Dim}_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D{Dim}_P; i++) {
|
|
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_2</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D2_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D2_P; i++) {
|
|
later_updates[*later * D2_P + i] = Updates[l * D2_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D2_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
D[j] = Slater_inv[cui * D2_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D2_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_3</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D3_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D3_P; i++) {
|
|
later_updates[*later * D3_P + i] = Updates[l * D3_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D3_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
D[j] = Slater_inv[cui * D3_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D3_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_4</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D4_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D4_P; i++) {
|
|
later_updates[*later * D4_P + i] = Updates[l * D4_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D4_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
D[j] = Slater_inv[cui * D4_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D4_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_5</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D5_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D5_P; i++) {
|
|
later_updates[*later * D5_P + i] = Updates[l * D5_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D5_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
D[j] = Slater_inv[cui * D5_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D5_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_6</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D6_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D6_P; i++) {
|
|
later_updates[*later * D6_P + i] = Updates[l * D6_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D6_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
D[j] = Slater_inv[cui * D6_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D6_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_7</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D7_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D7_P; i++) {
|
|
later_updates[*later * D7_P + i] = Updates[l * D7_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D7_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
D[j] = Slater_inv[cui * D7_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D7_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_8</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D8_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D8_P; i++) {
|
|
later_updates[*later * D8_P + i] = Updates[l * D8_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D8_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
D[j] = Slater_inv[cui * D8_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D8_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_9</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D9_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D9_P; i++) {
|
|
later_updates[*later * D9_P + i] = Updates[l * D9_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D9_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
D[j] = Slater_inv[cui * D9_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D9_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_10</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D10_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D10_P; i++) {
|
|
later_updates[*later * D10_P + i] = Updates[l * D10_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D10_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
D[j] = Slater_inv[cui * D10_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D10_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_11</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D11_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D11_P; i++) {
|
|
later_updates[*later * D11_P + i] = Updates[l * D11_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D11_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
D[j] = Slater_inv[cui * D11_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D11_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_12</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D12_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D12_P; i++) {
|
|
later_updates[*later * D12_P + i] = Updates[l * D12_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D12_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
D[j] = Slater_inv[cui * D12_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D12_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_13</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D13_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D13_P; i++) {
|
|
later_updates[*later * D13_P + i] = Updates[l * D13_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D13_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
D[j] = Slater_inv[cui * D13_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D13_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_14</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D14_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D14_P; i++) {
|
|
later_updates[*later * D14_P + i] = Updates[l * D14_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D14_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
D[j] = Slater_inv[cui * D14_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D14_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_15</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D15_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D15_P; i++) {
|
|
later_updates[*later * D15_P + i] = Updates[l * D15_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D15_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
D[j] = Slater_inv[cui * D15_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D15_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_16</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D16_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D16_P; i++) {
|
|
later_updates[*later * D16_P + i] = Updates[l * D16_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D16_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
D[j] = Slater_inv[cui * D16_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D16_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_17</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D17_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D17_P; i++) {
|
|
later_updates[*later * D17_P + i] = Updates[l * D17_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D17_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
D[j] = Slater_inv[cui * D17_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D17_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_18</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D18_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D18_P; i++) {
|
|
later_updates[*later * D18_P + i] = Updates[l * D18_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D18_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
D[j] = Slater_inv[cui * D18_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D18_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_19</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D19_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D19_P; i++) {
|
|
later_updates[*later * D19_P + i] = Updates[l * D19_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D19_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
D[j] = Slater_inv[cui * D19_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D19_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_20</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D20_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D20_P; i++) {
|
|
later_updates[*later * D20_P + i] = Updates[l * D20_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D20_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
D[j] = Slater_inv[cui * D20_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D20_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting_21</span>(
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D21_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D21_P; i++) {
|
|
later_updates[*later * D21_P + i] = Updates[l * D21_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D21_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
D[j] = Slater_inv[cui * D21_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D21_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_slagel_splitting</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_2(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_3(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_4(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_5(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_6(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_7(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_8(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_9(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_10(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_11(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_12(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_13(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_14(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_15(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_16(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_17(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_18(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_19(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_20(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_21(
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_slagel_splitting_hpc(
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-org02dd595" class="outline-4">
|
|
<h4 id="org02dd595"><span class="section-number-4">7.1.4</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-7-1-4">
|
|
<p>
|
|
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
|
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgaac1d31" class="outline-2">
|
|
<h2 id="orgaac1d31"><span class="section-number-2">8</span> End of files</h2>
|
|
<div class="outline-text-2" id="text-8">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"> <span style="color: #0000ff;">assert</span> (<span style="color: #228b22;">qmckl_context_destroy</span>(<span style="color: #a0522d;">context</span>) == QMCKL_SUCCESS);
|
|
<span style="color: #a020f0;">return</span> 0;
|
|
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="postamble" class="status">
|
|
<p class="author">Author: TREX CoE</p>
|
|
<p class="date">Created: 2023-02-14 Tue 12:35</p>
|
|
<p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
|
|
</div>
|
|
</body>
|
|
</html>
|