mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-03 12:43:57 +01:00
11459 lines
731 KiB
HTML
11459 lines
731 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
|
<head>
|
|
<!-- 2023-09-14 Thu 09:02 -->
|
|
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
<title>Sherman-Morrison-Woodbury</title>
|
|
<meta name="generator" content="Org mode" />
|
|
<meta name="author" content="TREX CoE" />
|
|
<style type="text/css">
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
.title { text-align: center;
|
|
margin-bottom: .2em; }
|
|
.subtitle { text-align: center;
|
|
font-size: medium;
|
|
font-weight: bold;
|
|
margin-top:0; }
|
|
.todo { font-family: monospace; color: red; }
|
|
.done { font-family: monospace; color: green; }
|
|
.priority { font-family: monospace; color: orange; }
|
|
.tag { background-color: #eee; font-family: monospace;
|
|
padding: 2px; font-size: 80%; font-weight: normal; }
|
|
.timestamp { color: #bebebe; }
|
|
.timestamp-kwd { color: #5f9ea0; }
|
|
.org-right { margin-left: auto; margin-right: 0px; text-align: right; }
|
|
.org-left { margin-left: 0px; margin-right: auto; text-align: left; }
|
|
.org-center { margin-left: auto; margin-right: auto; text-align: center; }
|
|
.underline { text-decoration: underline; }
|
|
#postamble p, #preamble p { font-size: 90%; margin: .2em; }
|
|
p.verse { margin-left: 3%; }
|
|
pre {
|
|
border: 1px solid #ccc;
|
|
box-shadow: 3px 3px 3px #eee;
|
|
padding: 8pt;
|
|
font-family: monospace;
|
|
overflow: auto;
|
|
margin: 1.2em;
|
|
}
|
|
pre.src {
|
|
position: relative;
|
|
overflow: visible;
|
|
padding-top: 1.2em;
|
|
}
|
|
pre.src:before {
|
|
display: none;
|
|
position: absolute;
|
|
background-color: white;
|
|
top: -10px;
|
|
right: 10px;
|
|
padding: 3px;
|
|
border: 1px solid black;
|
|
}
|
|
pre.src:hover:before { display: inline;}
|
|
/* Languages per Org manual */
|
|
pre.src-asymptote:before { content: 'Asymptote'; }
|
|
pre.src-awk:before { content: 'Awk'; }
|
|
pre.src-C:before { content: 'C'; }
|
|
/* pre.src-C++ doesn't work in CSS */
|
|
pre.src-clojure:before { content: 'Clojure'; }
|
|
pre.src-css:before { content: 'CSS'; }
|
|
pre.src-D:before { content: 'D'; }
|
|
pre.src-ditaa:before { content: 'ditaa'; }
|
|
pre.src-dot:before { content: 'Graphviz'; }
|
|
pre.src-calc:before { content: 'Emacs Calc'; }
|
|
pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
|
|
pre.src-fortran:before { content: 'Fortran'; }
|
|
pre.src-gnuplot:before { content: 'gnuplot'; }
|
|
pre.src-haskell:before { content: 'Haskell'; }
|
|
pre.src-hledger:before { content: 'hledger'; }
|
|
pre.src-java:before { content: 'Java'; }
|
|
pre.src-js:before { content: 'Javascript'; }
|
|
pre.src-latex:before { content: 'LaTeX'; }
|
|
pre.src-ledger:before { content: 'Ledger'; }
|
|
pre.src-lisp:before { content: 'Lisp'; }
|
|
pre.src-lilypond:before { content: 'Lilypond'; }
|
|
pre.src-lua:before { content: 'Lua'; }
|
|
pre.src-matlab:before { content: 'MATLAB'; }
|
|
pre.src-mscgen:before { content: 'Mscgen'; }
|
|
pre.src-ocaml:before { content: 'Objective Caml'; }
|
|
pre.src-octave:before { content: 'Octave'; }
|
|
pre.src-org:before { content: 'Org mode'; }
|
|
pre.src-oz:before { content: 'OZ'; }
|
|
pre.src-plantuml:before { content: 'Plantuml'; }
|
|
pre.src-processing:before { content: 'Processing.js'; }
|
|
pre.src-python:before { content: 'Python'; }
|
|
pre.src-R:before { content: 'R'; }
|
|
pre.src-ruby:before { content: 'Ruby'; }
|
|
pre.src-sass:before { content: 'Sass'; }
|
|
pre.src-scheme:before { content: 'Scheme'; }
|
|
pre.src-screen:before { content: 'Gnu Screen'; }
|
|
pre.src-sed:before { content: 'Sed'; }
|
|
pre.src-sh:before { content: 'shell'; }
|
|
pre.src-sql:before { content: 'SQL'; }
|
|
pre.src-sqlite:before { content: 'SQLite'; }
|
|
/* additional languages in org.el's org-babel-load-languages alist */
|
|
pre.src-forth:before { content: 'Forth'; }
|
|
pre.src-io:before { content: 'IO'; }
|
|
pre.src-J:before { content: 'J'; }
|
|
pre.src-makefile:before { content: 'Makefile'; }
|
|
pre.src-maxima:before { content: 'Maxima'; }
|
|
pre.src-perl:before { content: 'Perl'; }
|
|
pre.src-picolisp:before { content: 'Pico Lisp'; }
|
|
pre.src-scala:before { content: 'Scala'; }
|
|
pre.src-shell:before { content: 'Shell Script'; }
|
|
pre.src-ebnf2ps:before { content: 'ebfn2ps'; }
|
|
/* additional language identifiers per "defun org-babel-execute"
|
|
in ob-*.el */
|
|
pre.src-cpp:before { content: 'C++'; }
|
|
pre.src-abc:before { content: 'ABC'; }
|
|
pre.src-coq:before { content: 'Coq'; }
|
|
pre.src-groovy:before { content: 'Groovy'; }
|
|
/* additional language identifiers from org-babel-shell-names in
|
|
ob-shell.el: ob-shell is the only babel language using a lambda to put
|
|
the execution function name together. */
|
|
pre.src-bash:before { content: 'bash'; }
|
|
pre.src-csh:before { content: 'csh'; }
|
|
pre.src-ash:before { content: 'ash'; }
|
|
pre.src-dash:before { content: 'dash'; }
|
|
pre.src-ksh:before { content: 'ksh'; }
|
|
pre.src-mksh:before { content: 'mksh'; }
|
|
pre.src-posh:before { content: 'posh'; }
|
|
/* Additional Emacs modes also supported by the LaTeX listings package */
|
|
pre.src-ada:before { content: 'Ada'; }
|
|
pre.src-asm:before { content: 'Assembler'; }
|
|
pre.src-caml:before { content: 'Caml'; }
|
|
pre.src-delphi:before { content: 'Delphi'; }
|
|
pre.src-html:before { content: 'HTML'; }
|
|
pre.src-idl:before { content: 'IDL'; }
|
|
pre.src-mercury:before { content: 'Mercury'; }
|
|
pre.src-metapost:before { content: 'MetaPost'; }
|
|
pre.src-modula-2:before { content: 'Modula-2'; }
|
|
pre.src-pascal:before { content: 'Pascal'; }
|
|
pre.src-ps:before { content: 'PostScript'; }
|
|
pre.src-prolog:before { content: 'Prolog'; }
|
|
pre.src-simula:before { content: 'Simula'; }
|
|
pre.src-tcl:before { content: 'tcl'; }
|
|
pre.src-tex:before { content: 'TeX'; }
|
|
pre.src-plain-tex:before { content: 'Plain TeX'; }
|
|
pre.src-verilog:before { content: 'Verilog'; }
|
|
pre.src-vhdl:before { content: 'VHDL'; }
|
|
pre.src-xml:before { content: 'XML'; }
|
|
pre.src-nxml:before { content: 'XML'; }
|
|
/* add a generic configuration mode; LaTeX export needs an additional
|
|
(add-to-list 'org-latex-listings-langs '(conf " ")) in .emacs */
|
|
pre.src-conf:before { content: 'Configuration File'; }
|
|
|
|
table { border-collapse:collapse; }
|
|
caption.t-above { caption-side: top; }
|
|
caption.t-bottom { caption-side: bottom; }
|
|
td, th { vertical-align:top; }
|
|
th.org-right { text-align: center; }
|
|
th.org-left { text-align: center; }
|
|
th.org-center { text-align: center; }
|
|
td.org-right { text-align: right; }
|
|
td.org-left { text-align: left; }
|
|
td.org-center { text-align: center; }
|
|
dt { font-weight: bold; }
|
|
.footpara { display: inline; }
|
|
.footdef { margin-bottom: 1em; }
|
|
.figure { padding: 1em; }
|
|
.figure p { text-align: center; }
|
|
.equation-container {
|
|
display: table;
|
|
text-align: center;
|
|
width: 100%;
|
|
}
|
|
.equation {
|
|
vertical-align: middle;
|
|
}
|
|
.equation-label {
|
|
display: table-cell;
|
|
text-align: right;
|
|
vertical-align: middle;
|
|
}
|
|
.inlinetask {
|
|
padding: 10px;
|
|
border: 2px solid gray;
|
|
margin: 10px;
|
|
background: #ffffcc;
|
|
}
|
|
#org-div-home-and-up
|
|
{ text-align: right; font-size: 70%; white-space: nowrap; }
|
|
textarea { overflow-x: auto; }
|
|
.linenr { font-size: smaller }
|
|
.code-highlighted { background-color: #ffff00; }
|
|
.org-info-js_info-navigation { border-style: none; }
|
|
#org-info-js_console-label
|
|
{ font-size: 10px; font-weight: bold; white-space: nowrap; }
|
|
.org-info-js_search-highlight
|
|
{ background-color: #ffff00; color: #000000; font-weight: bold; }
|
|
.org-svg { width: 90%; }
|
|
/*]]>*/-->
|
|
</style>
|
|
<link rel="stylesheet" title="Standard" href="qmckl.css" type="text/css" />
|
|
|
|
<script type="text/javascript" src="org-info.js">
|
|
/**
|
|
*
|
|
* @source: org-info.js
|
|
*
|
|
* @licstart The following is the entire license notice for the
|
|
* JavaScript code in org-info.js.
|
|
*
|
|
* Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
*
|
|
*
|
|
* The JavaScript code in this tag is free software: you can
|
|
* redistribute it and/or modify it under the terms of the GNU
|
|
* General Public License (GNU GPL) as published by the Free Software
|
|
* Foundation, either version 3 of the License, or (at your option)
|
|
* any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
*
|
|
* As additional permission under GNU GPL version 3 section 7, you
|
|
* may distribute non-source (e.g., minimized or compacted) forms of
|
|
* that code without the copy of the GNU GPL normally required by
|
|
* section 4, provided you include this license notice and a URL
|
|
* through which recipients can access the Corresponding Source.
|
|
*
|
|
* @licend The above is the entire license notice
|
|
* for the JavaScript code in org-info.js.
|
|
*
|
|
*/
|
|
</script>
|
|
|
|
<script type="text/javascript">
|
|
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
org_html_manager.set("TOC_DEPTH", "4");
|
|
org_html_manager.set("LINK_HOME", "index.html");
|
|
org_html_manager.set("LINK_UP", "");
|
|
org_html_manager.set("LOCAL_TOC", "1");
|
|
org_html_manager.set("VIEW_BUTTONS", "0");
|
|
org_html_manager.set("MOUSE_HINT", "underline");
|
|
org_html_manager.set("FIXED_TOC", "0");
|
|
org_html_manager.set("TOC", "1");
|
|
org_html_manager.set("VIEW", "info");
|
|
org_html_manager.setup(); // activate after the parameters are set
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/javascript">
|
|
/*
|
|
@licstart The following is the entire license notice for the
|
|
JavaScript code in this tag.
|
|
|
|
Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
|
|
|
The JavaScript code in this tag is free software: you can
|
|
redistribute it and/or modify it under the terms of the GNU
|
|
General Public License (GNU GPL) as published by the Free Software
|
|
Foundation, either version 3 of the License, or (at your option)
|
|
any later version. The code is distributed WITHOUT ANY WARRANTY;
|
|
without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
|
|
|
|
As additional permission under GNU GPL version 3 section 7, you
|
|
may distribute non-source (e.g., minimized or compacted) forms of
|
|
that code without the copy of the GNU GPL normally required by
|
|
section 4, provided you include this license notice and a URL
|
|
through which recipients can access the Corresponding Source.
|
|
|
|
|
|
@licend The above is the entire license notice
|
|
for the JavaScript code in this tag.
|
|
*/
|
|
<!--/*--><![CDATA[/*><!--*/
|
|
function CodeHighlightOn(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(null != target) {
|
|
elem.cacheClassElem = elem.className;
|
|
elem.cacheClassTarget = target.className;
|
|
target.className = "code-highlighted";
|
|
elem.className = "code-highlighted";
|
|
}
|
|
}
|
|
function CodeHighlightOff(elem, id)
|
|
{
|
|
var target = document.getElementById(id);
|
|
if(elem.cacheClassElem)
|
|
elem.className = elem.cacheClassElem;
|
|
if(elem.cacheClassTarget)
|
|
target.className = elem.cacheClassTarget;
|
|
}
|
|
/*]]>*///-->
|
|
</script>
|
|
<script type="text/x-mathjax-config">
|
|
MathJax.Hub.Config({
|
|
displayAlign: "center",
|
|
displayIndent: "0em",
|
|
|
|
"HTML-CSS": { scale: 100,
|
|
linebreaks: { automatic: "false" },
|
|
webFont: "TeX"
|
|
},
|
|
SVG: {scale: 100,
|
|
linebreaks: { automatic: "false" },
|
|
font: "TeX"},
|
|
NativeMML: {scale: 100},
|
|
TeX: { equationNumbers: {autoNumber: "AMS"},
|
|
MultLineWidth: "85%",
|
|
TagSide: "right",
|
|
TagIndent: ".8em"
|
|
}
|
|
});
|
|
</script>
|
|
<script type="text/javascript"
|
|
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS_HTML"></script>
|
|
</head>
|
|
<body>
|
|
<div id="org-div-home-and-up">
|
|
<a accesskey="h" href=""> UP </a>
|
|
|
|
|
<a accesskey="H" href="index.html"> HOME </a>
|
|
</div><div id="content">
|
|
<h1 class="title">Sherman-Morrison-Woodbury</h1>
|
|
<div id="table-of-contents">
|
|
<h2>Table of Contents</h2>
|
|
<div id="text-table-of-contents">
|
|
<ul>
|
|
<li><a href="#orgf091f25">1. Headers</a></li>
|
|
<li><a href="#org2ec9fb2">2. Naïve Sherman-Morrison</a>
|
|
<ul>
|
|
<li><a href="#org2b86a59">2.1. <code>qmckl_sm_naive</code></a>
|
|
<ul>
|
|
<li><a href="#orgb061612">2.1.1. Introduction</a></li>
|
|
<li><a href="#org192e71d">2.1.2. API</a></li>
|
|
<li><a href="#org6d49eab">2.1.3. Requirements</a></li>
|
|
<li><a href="#orgd7b1b6c">2.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#org452d92a">2.1.4.1. C interface (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org6c7a0bc">2.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#orgdc84b97">2.1.6. C sources</a></li>
|
|
<li><a href="#org96472a7">2.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#org63a8859">2.1.8. Performance</a></li>
|
|
<li><a href="#org83e472a">2.1.9. Tests</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org5752ee9">3. Sherman-Morrison with Slagel Splitting (core)</a>
|
|
<ul>
|
|
<li><a href="#orgbf02ec7">3.1. <code>qmckl_sm_splitting_core</code></a>
|
|
<ul>
|
|
<li><a href="#org5b90e69">3.1.1. Introduction</a></li>
|
|
<li><a href="#orge4a7d2d">3.1.2. API</a></li>
|
|
<li><a href="#org550efff">3.1.3. Requirements</a></li>
|
|
<li><a href="#orgc6fc75d">3.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#org3dde3a8">3.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org9320d92">3.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#org15b46f4">3.1.6. C sources</a></li>
|
|
<li><a href="#org0a8fd81">3.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#org167700e">3.1.8. Performance</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orga78d0d0">4. Woodbury 2x2</a>
|
|
<ul>
|
|
<li><a href="#orgaba90e6">4.1. <code>qmckl_woodbury_2x2</code></a>
|
|
<ul>
|
|
<li><a href="#orgd51bf81">4.1.1. Introduction</a></li>
|
|
<li><a href="#org25c96ce">4.1.2. API</a></li>
|
|
<li><a href="#org626dcea">4.1.3. Requirements</a></li>
|
|
<li><a href="#orgb29d07c">4.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#orgcd75d4d">4.1.4.1. C interface (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org6eae02e">4.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#orgee7be50">4.1.6. C sources</a></li>
|
|
<li><a href="#orgdabb694">4.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#org76f6b4d">4.1.8. Performance</a></li>
|
|
<li><a href="#org67afaf9">4.1.9. Tests</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orga3ef988">5. Woodbury 3x3</a>
|
|
<ul>
|
|
<li><a href="#org135e1ce">5.1. <code>qmckl_woodbury_3x3</code></a>
|
|
<ul>
|
|
<li><a href="#orgb8c1005">5.1.1. Introduction</a></li>
|
|
<li><a href="#orgeca9968">5.1.2. API</a></li>
|
|
<li><a href="#orgc852257">5.1.3. Requirements</a></li>
|
|
<li><a href="#org42e9f4d">5.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#org8d00f93">5.1.4.1. C interface (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org78cdb80">5.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#orgd20328f">5.1.6. C sources</a></li>
|
|
<li><a href="#orgb36c05c">5.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#org508cca3">5.1.8. Performance</a></li>
|
|
<li><a href="#orgb80d61f">5.1.9. Tests</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org9d94d73">6. Sherman-Morrison with Slagel Splitting</a>
|
|
<ul>
|
|
<li><a href="#org36e8ada">6.1. <code>qmckl_sm_splitting</code></a>
|
|
<ul>
|
|
<li><a href="#orgd3daa84">6.1.1. Introduction</a></li>
|
|
<li><a href="#orgeb0d501">6.1.2. API</a></li>
|
|
<li><a href="#orgd4ce03c">6.1.3. Requirements</a></li>
|
|
<li><a href="#orge30d097">6.1.4. Pedagogical kernel source (in Fortran)</a>
|
|
<ul>
|
|
<li><a href="#orgebcf1dd">6.1.4.1. C interface to the pedagogical kernel (not directly exposed)</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#org7d3b8da">6.1.5. C headers (exposed in qmckl.h)</a></li>
|
|
<li><a href="#orgb34b665">6.1.6. C source</a></li>
|
|
<li><a href="#org16eff54">6.1.7. Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</a></li>
|
|
<li><a href="#org2014d20">6.1.8. Performance…</a></li>
|
|
<li><a href="#orgd086d3f">6.1.9. Test</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#orgbd320f6">7. End of files</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgf091f25" class="outline-2">
|
|
<h2 id="orgf091f25"><span class="section-number-2">1</span> Headers</h2>
|
|
<div class="outline-text-2" id="text-1">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
|
|
<span style="color: #483d8b;">#ifdef</span> HAVE_CONFIG_H
|
|
<span style="color: #483d8b;"> #include</span> <span style="color: #8b2252;">"config.h"</span>
|
|
<span style="color: #483d8b;">#endif</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><math.h></span>
|
|
|
|
<span style="color: #228b22;">int</span> <span style="color: #0000ff;">main</span>() {
|
|
<span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>;
|
|
context = qmckl_context_create();
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
This is the range that determines the how many high performance kernel instantces will be generated, using the C-function templates defined in the sections below. If the name of the C-function template is called <code>qmckl_kernel_{Dim}</code>, then <code>range(K, L+1)</code> will results in kernel instances from <code>qmckl_kernel_K</code> to <code>qmckl_kernel_L</code>.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div id="outline-container-org2ec9fb2" class="outline-2">
|
|
<h2 id="org2ec9fb2"><span class="section-number-2">2</span> Naïve Sherman-Morrison</h2>
|
|
<div class="outline-text-2" id="text-2">
|
|
</div>
|
|
<div id="outline-container-org2b86a59" class="outline-3">
|
|
<h3 id="org2b86a59"><span class="section-number-3">2.1</span> <code>qmckl_sm_naive</code></h3>
|
|
<div class="outline-text-3" id="text-2-1">
|
|
</div>
|
|
|
|
<div id="outline-container-orgb061612" class="outline-4">
|
|
<h4 id="orgb061612"><span class="section-number-4">2.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-2-1-1">
|
|
<p>
|
|
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
|
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
|
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
|
</p>
|
|
|
|
<p>
|
|
#+TODO
|
|
Change the math notation so that the update vectors appear as row in the math
|
|
so that it is consistent with the representation in C (memory)
|
|
</p>
|
|
|
|
<p>
|
|
The formula for any update \(u_j\) (index \(j\) is suppresed for clarity) that is applied is
|
|
\[
|
|
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
|
|
\]
|
|
</p>
|
|
|
|
<p>
|
|
where
|
|
\(S\) is the Slater-matrix,
|
|
\(u\) and \(v^T\) are the column and row vectors containing the updates,
|
|
\(S^{-1}\) is the inverse of the Slater-matrix.
|
|
</p>
|
|
|
|
<p>
|
|
Even though the Slater-matrix \(S\) with all updates applied at once is invertable, during the course of applying
|
|
updates to the inverse Slater-matrix \(S^{-1}\) one-by-one it can happen that one of the intermediate inverse
|
|
matrices \(S^{-1}\) becomes singular. Therefore a global threshold value \(\epsilon\) is defined that is used to
|
|
evaluate each individual update \(u_j\) when it is applied.
|
|
</p>
|
|
|
|
<p>
|
|
This value sets the lower bound for which the
|
|
denominator \(1+v_j^TS^{-1}u_j\) is considered to be too small and will most probably result in a singular matrix
|
|
\(S\), or at least in an inverse of \(S\) of very poor numerical quality. Therefore, when \(1+v_j^TS^{-1}u_j \geq \epsilon\),
|
|
the update is applied as usual and the kernel exits with return code \texttt{QMCKL_SUCCESS}.
|
|
If \(1+v_j^TS^{-1}u_j \leq \epsilon\) the update is rejected and the kernel exits with return code \texttt{QMCKL_FAILURE}.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org192e71d" class="outline-4">
|
|
<h4 id="org192e71d"><span class="section-number-4">2.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-2-1-2">
|
|
<table id="org9354b71" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left"><code>context</code></td>
|
|
<td class="org-left"><code>qmckl_context</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>LDS</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Dim</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>N_updates</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates</code></td>
|
|
<td class="org-left"><code>double[N_updates*LDS]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates_index</code></td>
|
|
<td class="org-left"><code>uint64_t[N_updates]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>breakdown</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Slater_inv</code></td>
|
|
<td class="org-left"><code>double[Dim*LDS]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>determinant</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org6d49eab" class="outline-4">
|
|
<h4 id="org6d49eab"><span class="section-number-4">2.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-2-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
<li><code>determinant > 0</code></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgd7b1b6c" class="outline-4">
|
|
<h4 id="orgd7b1b6c"><span class="section-number-4">2.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-2-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_sm_naive_doc_f(context, </span><span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(nupdates * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, nupdates) ::<span style="color: #a0522d;"> Updates</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, dim) ::<span style="color: #a0522d;"> Inverse</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> C</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> D</span>
|
|
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> denominator, idenominator, update</span>
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, l, row</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
|
|
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
|
|
l = 1;
|
|
! <span style="color: #b22222;">For each update do...</span>
|
|
<span style="color: #a020f0;">do while</span> (l < nupdates + 1)
|
|
|
|
! <span style="color: #b22222;">Compute C = S^{-1}U(l)</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
C(i) = 0
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute denominator = 1 + V(l)^TC</span>
|
|
row = updates_index(l)
|
|
denominator = 1 + C(row)
|
|
|
|
! <span style="color: #b22222;">Return early if denominator is too small</span>
|
|
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(denominator) < breakdown) <span style="color: #a020f0;">return</span>
|
|
idenominator = 1 / denominator
|
|
|
|
! <span style="color: #b22222;">Update det(S)</span>
|
|
determinant = determinant * denominator
|
|
|
|
! <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
D = Inverse(row, :)
|
|
|
|
! <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / denominator</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
update = C(i) * D(j) * idenominator
|
|
Inverse(i, j) = Inverse(i, j) - update
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
l = l + 1
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Copy updated inverse back to s_inv</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_naive_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org452d92a" class="outline-5">
|
|
<h5 id="org452d92a"><span class="section-number-5">2.1.4.1</span> C interface (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-2-1-4-1">
|
|
<p>
|
|
The following Fortran function <code>qmckl_sm_naive_doc</code> makes sure
|
|
that the pedagogical kernel <code>qmckl_sm_naive_doc_f</code>, written in
|
|
Fortran, can be called from C using the <code>ISO_C_BINDING</code>. The Fortran function <code>qmckl_sm_naive_doc</code> will be exposed in the header file 'qmckl.h'
|
|
for C users and in the module file 'qmckl<sub>f.F90</sub>' for Fortran users.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org6c7a0bc" class="outline-4">
|
|
<h4 id="org6c7a0bc"><span class="section-number-4">2.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-2-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgdc84b97" class="outline-4">
|
|
<h4 id="orgdc84b97"><span class="section-number-4">2.1.6</span> C sources</h4>
|
|
<div class="outline-text-4" id="text-2-1-6">
|
|
<p>
|
|
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><stdbool.h></span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;"><math.h></span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"qmckl.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"config.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"assert.h"</span>
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"stdio.h"</span>
|
|
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<code>qmckl_sm_naive_hpc</code> is a high performance variation of
|
|
<code>qmckl_sm_naive</code> written in C. It is used in cases when <code>Dim</code> is
|
|
smaller than the leading dimension <code>LDS</code>, irrespective of whetether <code>LDS</code>
|
|
includes zero padding to benefit from SIMD instructions or not. Cases like this
|
|
include situations where one wants to apply updates to a square submatrix of the
|
|
full matrix.
|
|
It takes advantage of memory aligned data and assumes no data dependencies
|
|
inside the loops. The loops are fully vectorised whenever <code>Dim</code> is an integer
|
|
multiple of <code>SIMD_LENGTH</code>.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[Dim];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x u_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator: v_l^T * C</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown)
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<code>qmckl_exit_code qmckl_sm_naive_{Dim}</code> is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org82b84ae"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sm_naive_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D</span>{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[{Dim}];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="orgfb9f79e"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">static inline qmckl_exit_code qmckl_sm_naive_{Dim}(</span>
|
|
<span style="color: #8b2252;"> const qmckl_context context,</span>
|
|
<span style="color: #8b2252;"> const uint64_t N_updates,</span>
|
|
<span style="color: #8b2252;"> const double* __restrict Updates,</span>
|
|
<span style="color: #8b2252;"> const uint64_t* __restrict Updates_index,</span>
|
|
<span style="color: #8b2252;"> const double breakdown,</span>
|
|
<span style="color: #8b2252;"> double* __restrict Slater_inv,</span>
|
|
<span style="color: #8b2252;"> double* __restrict determinant) {</span>
|
|
|
|
<span style="color: #8b2252;"> if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {</span>
|
|
<span style="color: #8b2252;"> return qmckl_failwith(context,</span>
|
|
<span style="color: #8b2252;"> QMCKL_NULL_CONTEXT,</span>
|
|
<span style="color: #8b2252;"> "qmckl_sm_naive_{Dim}",</span>
|
|
<span style="color: #8b2252;"> NULL);</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> #define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)</span>
|
|
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) C[{Dim}];</span>
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) D[D{Dim}_P];</span>
|
|
|
|
<span style="color: #8b2252;"> uint64_t l = 0;</span>
|
|
<span style="color: #8b2252;"> // For each update</span>
|
|
<span style="color: #8b2252;"> while (l < N_updates) {</span>
|
|
<span style="color: #8b2252;"> // C = A^{-1} x U_l</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> C[i] = 0;</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // Denominator</span>
|
|
<span style="color: #8b2252;"> const int cui = Updates_index[l] - 1;</span>
|
|
<span style="color: #8b2252;"> double den = 1.0f + C[cui];</span>
|
|
|
|
<span style="color: #8b2252;"> if (fabs(den) < breakdown) {</span>
|
|
<span style="color: #8b2252;"> return QMCKL_FAILURE;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> double iden = 1.0f / den;</span>
|
|
|
|
<span style="color: #8b2252;"> // Update det(A)</span>
|
|
<span style="color: #8b2252;"> if (determinant)</span>
|
|
<span style="color: #8b2252;"> *determinant *= den;</span>
|
|
|
|
<span style="color: #8b2252;"> // selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> D[j] = Slater_inv[cui * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> double update = C[i] * D[j] * iden;</span>
|
|
<span style="color: #8b2252;"> Slater_inv[i * D{Dim}_P + j] -= update;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> l += 1;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> return QMCKL_SUCCESS;</span>
|
|
<span style="color: #8b2252;">}</span>
|
|
<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim))
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
Python script that generated C switch cases that call individual kernel instances.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org89da1cf"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">case {Dim}: </span>
|
|
<span style="color: #8b2252;"> return qmckl_sm_naive_{Dim}(context,</span>
|
|
<span style="color: #8b2252;"> N_updates,</span>
|
|
<span style="color: #8b2252;"> Updates,</span>
|
|
<span style="color: #8b2252;"> Updates_index,</span>
|
|
<span style="color: #8b2252;"> breakdown,</span>
|
|
<span style="color: #8b2252;"> Slater_inv,</span>
|
|
<span style="color: #8b2252;"> determinant);"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim))
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D2_P</span> ((1+(2-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
D[j] = Slater_inv[cui * D2_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D2_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D3_P</span> ((1+(3-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
D[j] = Slater_inv[cui * D3_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D3_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D4_P</span> ((1+(4-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[4];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
D[j] = Slater_inv[cui * D4_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D4_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D5_P</span> ((1+(5-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[5];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
D[j] = Slater_inv[cui * D5_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D5_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D6_P</span> ((1+(6-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[6];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
D[j] = Slater_inv[cui * D6_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D6_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D7_P</span> ((1+(7-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[7];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
D[j] = Slater_inv[cui * D7_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D7_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D8_P</span> ((1+(8-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[8];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
D[j] = Slater_inv[cui * D8_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D8_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D9_P</span> ((1+(9-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[9];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
D[j] = Slater_inv[cui * D9_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D9_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D10_P</span> ((1+(10-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[10];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
D[j] = Slater_inv[cui * D10_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D10_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D11_P</span> ((1+(11-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[11];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
D[j] = Slater_inv[cui * D11_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D11_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D12_P</span> ((1+(12-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[12];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
D[j] = Slater_inv[cui * D12_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D12_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D13_P</span> ((1+(13-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[13];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
D[j] = Slater_inv[cui * D13_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D13_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D14_P</span> ((1+(14-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[14];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
D[j] = Slater_inv[cui * D14_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D14_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D15_P</span> ((1+(15-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[15];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
D[j] = Slater_inv[cui * D15_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D15_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D16_P</span> ((1+(16-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[16];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
D[j] = Slater_inv[cui * D16_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D16_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D17_P</span> ((1+(17-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[17];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
D[j] = Slater_inv[cui * D17_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D17_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D18_P</span> ((1+(18-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[18];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
D[j] = Slater_inv[cui * D18_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D18_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D19_P</span> ((1+(19-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[19];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
D[j] = Slater_inv[cui * D19_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D19_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D20_P</span> ((1+(20-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[20];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
D[j] = Slater_inv[cui * D20_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D20_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #define</span> <span style="color: #a0522d;">D21_P</span> ((1+(21-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[21];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = A^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
// <span style="color: #b22222;">Update det(A)</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">selecting column: D = v_l^T * S_inv</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
D[j] = Slater_inv[cui * D21_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D21_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<p>
|
|
<code>qmckl_sm_naive</code> is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (<code>--enable-doc</code> and <code>--enable-hpc</code>) and the passed array dimensions <code>LDS</code> and <code>Dim</code>.
|
|
</p>
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_naive</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_naive"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_2(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_3(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_4(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_5(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_6(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_7(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_8(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_9(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_10(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_11(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_12(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_13(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_14(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_15(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_16(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_17(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_18(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_19(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_20(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_21(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_naive_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org96472a7" class="outline-4">
|
|
<h4 id="org96472a7"><span class="section-number-4">2.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-2-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org63a8859" class="outline-4">
|
|
<h4 id="org63a8859"><span class="section-number-4">2.1.8</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-2-1-8">
|
|
<p>
|
|
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
|
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
|
can never resolve a situation where applying the update causes singular behaviour.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div id="outline-container-org83e472a" class="outline-4">
|
|
<h4 id="org83e472a"><span class="section-number-4">2.1.9</span> Tests</h4>
|
|
<div class="outline-text-4" id="text-2-1-9">
|
|
<p>
|
|
The tests for the kernels are executed on datasets that are extracted from a run of
|
|
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
|
The tests are run such that the kernels reject the computed inverse whenever the computed
|
|
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
|
|
QMC=Chem. The tests will return QMCKL<sub>SUCCESS</sub> whenever all the elements of the final matrix
|
|
\(R=S.S^-1 - 1\) are smaller than the given tolerance value of 1e-3, and will return
|
|
QMCKL<sub>FAILURE</sub> if the values are larger than this tolerance value.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span> = 21;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span> = (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span> = 1e-3;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">tolerance</span> = 1e-3;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">res</span>[441];
|
|
|
|
<span style="color: #483d8b;">#include</span> <span style="color: #8b2252;">"sm_test.h"</span>
|
|
|
|
<span style="color: #0000ff;">assert</span>(Updates1 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Updates_index1 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Slater_inv1 != <span style="color: #008b8b;">NULL</span>);
|
|
|
|
// <span style="color: #b22222;">original determinant of Slater1 (before applying updates)</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = 3.407025646103221e-10;
|
|
rc = qmckl_sm_naive(context,
|
|
LDS,
|
|
Dim,
|
|
N_updates1,
|
|
Updates1,
|
|
Updates_index1,
|
|
breakdown,
|
|
Slater_inv1,
|
|
&det);
|
|
|
|
// <span style="color: #b22222;">Check that the determinant is updated properly</span>
|
|
<span style="color: #0000ff;">assert</span>(fabs(det + 4.120398385068217e-10) < 1e-15);
|
|
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">if</span> (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-org5752ee9" class="outline-2">
|
|
<h2 id="org5752ee9"><span class="section-number-2">3</span> Sherman-Morrison with Slagel Splitting (core)</h2>
|
|
<div class="outline-text-2" id="text-3">
|
|
</div>
|
|
<div id="outline-container-orgbf02ec7" class="outline-3">
|
|
<h3 id="orgbf02ec7"><span class="section-number-3">3.1</span> <code>qmckl_sm_splitting_core</code></h3>
|
|
<div class="outline-text-3" id="text-3-1">
|
|
</div>
|
|
|
|
<div id="outline-container-org5b90e69" class="outline-4">
|
|
<h4 id="org5b90e69"><span class="section-number-4">3.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-3-1-1">
|
|
<p>
|
|
<code>qmckl_sm_splitting_core</code> is the inner core part of 'Sherman-Morrison with update splitting' in the next section.
|
|
It is not normally used by itself but it is possible to use it nonetheless.
|
|
</p>
|
|
|
|
<p>
|
|
It has three extra parameters in its API:
|
|
</p>
|
|
<ul class="org-ul">
|
|
<li><code>later_updates</code> initially empty array that will contain the second halves of updates that were split during kernel execution</li>
|
|
<li><code>later_index</code> initially empty array that will contain the row/column numbers of the updates that were split during execution</li>
|
|
<li><code>later</code> initially zero integer that records the number of updates that were split during exection.</li>
|
|
</ul>
|
|
|
|
<p>
|
|
It is up to the user to decide what to do with these updates once the kernel returns. Normally <code>qmckl_sm_splitting_core</code> is
|
|
used as the core part of a recursive function, as is done in <code>qmckl_sm_splitting</code> or as part of a more complex
|
|
kernel like <code>qmckl_sherman_morrison_smw32s</code>.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant is passed it will only be partially updated if there were any update splits.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orge4a7d2d" class="outline-4">
|
|
<h4 id="orge4a7d2d"><span class="section-number-4">3.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-3-1-2">
|
|
<table id="org0e641ca" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left"><code>context</code></td>
|
|
<td class="org-left"><code>qmckl_context</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>LDS</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Dim</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>N_updates</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates</code></td>
|
|
<td class="org-left"><code>double[LDS*N_updates]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates_index</code></td>
|
|
<td class="org-left"><code>uint64_t[N_updates]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing positions of the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>breakdown</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Slater_inv</code></td>
|
|
<td class="org-left"><code>double[Dim*LDS]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>later_updates</code></td>
|
|
<td class="org-left"><code>double[LDS*N_updates]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>later_index</code></td>
|
|
<td class="org-left"><code>uint64_t[N_updates]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the positions of the split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>later</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Number of split updates for later</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>determinant</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org550efff" class="outline-4">
|
|
<h4 id="org550efff"><span class="section-number-4">3.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-3-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
<li><code>later_updates</code> is allocated with \(later \times Dim\) elements</li>
|
|
<li><code>later_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>later >= 0</code></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgc6fc75d" class="outline-4">
|
|
<h4 id="orgc6fc75d"><span class="section-number-4">3.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-3-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_sm_splitting_core_doc_f( </span><span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
later_upds, <span style="color: #a020f0;">&</span>
|
|
Later_index, <span style="color: #a020f0;">&</span>
|
|
Later, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(lds * nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> Later</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> Later_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> later_upds(lds * nupdates)</span>
|
|
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, nupdates) ::<span style="color: #a0522d;"> Updates</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, nupdates) ::<span style="color: #a0522d;"> Later_updates</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, dim) ::<span style="color: #a0522d;"> Inverse</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> C</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim) ::<span style="color: #a0522d;"> D</span>
|
|
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> denominator, idenominator, update</span>
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, l, row</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
|
|
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
|
|
l = 1;
|
|
! <span style="color: #b22222;">For each update do...</span>
|
|
<span style="color: #a020f0;">do while</span> (l < nupdates + 1)
|
|
|
|
! <span style="color: #b22222;">Compute C = S^{-1}U(l)</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
C(i) = 0
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute denominator = 1 + V(l)^TC</span>
|
|
row = updates_index(l)
|
|
denominator = 1 + C(row)
|
|
|
|
! <span style="color: #b22222;">If denominator is too close to zero:</span>
|
|
! <span style="color: #b22222;">- Split update in 2 before storing in Later_updates</span>
|
|
! <span style="color: #b22222;">- Split previously computed vector C in 2</span>
|
|
! <span style="color: #b22222;">- Recompute the denominator</span>
|
|
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(denominator) < breakdown) <span style="color: #a020f0;">then</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
Later_updates(i, l) = Updates(i, l) / 2
|
|
C(i) = C(i) / 2
|
|
<span style="color: #a020f0;">end do</span>
|
|
Later_index(Later + 1) = updates_index(l)
|
|
Later = Later + 1
|
|
denominator = 1 + C(row)
|
|
<span style="color: #a020f0;">end if</span>
|
|
|
|
idenominator = 1 / denominator
|
|
|
|
! <span style="color: #b22222;">Update det(S)</span>
|
|
determinant = determinant * denominator
|
|
|
|
! <span style="color: #b22222;">selecting column: v_l^T * S_inv</span>
|
|
D = Inverse(row, :)
|
|
|
|
! <span style="color: #b22222;">A^{-1} = A^{-1} - C x D / denominator</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
update = C(i) * D(j) * idenominator
|
|
Inverse(i, j) = Inverse(i, j) - update
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
l = l + 1
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Copy updated inverse and later updates</span>
|
|
! <span style="color: #b22222;">back to s_inv and later_upds</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_lu</span>(Later_Updates, later_upds, lds, dim, nupdates)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org3dde3a8" class="outline-5">
|
|
<h5 id="org3dde3a8"><span class="section-number-5">3.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-3-1-4-1">
|
|
<p>
|
|
The function <code>qmckl_sm_splitting_core_doc</code> makes sure that
|
|
<code>qmckl_sm_splitting_core_doc_f</code> can be called from C using the
|
|
<code>ISO_C_BINDING</code>. Function <code>qmckl_sm_splitting_core_doc</code> will be
|
|
exposed in <code>qmckl.h</code> and <code>qmckl_f.F90</code>, but
|
|
<code>qmckl_sm_splitting_core_doc_f</code> will not.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org9320d92" class="outline-4">
|
|
<h4 id="org9320d92"><span class="section-number-4">3.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-3-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org15b46f4" class="outline-4">
|
|
<h4 id="org15b46f4"><span class="section-number-4">3.1.6</span> C sources</h4>
|
|
<div class="outline-text-4" id="text-3-1-6">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[LDS];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[LDS];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < LDS; i++) {
|
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x LDS</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="org80dc9b1"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_sm_splitting_core_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D{Dim}_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D{Dim}_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D{Dim}_P; i++) {
|
|
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org2e863ad"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">static inline qmckl_exit_code qmckl_sm_splitting_core_{Dim}(</span>
|
|
<span style="color: #8b2252;"> const qmckl_context context,</span>
|
|
<span style="color: #8b2252;"> uint64_t N_updates,</span>
|
|
<span style="color: #8b2252;"> const double* __restrict Updates,</span>
|
|
<span style="color: #8b2252;"> const uint64_t* __restrict Updates_index,</span>
|
|
<span style="color: #8b2252;"> const double breakdown,</span>
|
|
<span style="color: #8b2252;"> double* __restrict Slater_inv,</span>
|
|
<span style="color: #8b2252;"> double* __restrict later_updates,</span>
|
|
<span style="color: #8b2252;"> uint64_t* __restrict later_index,</span>
|
|
<span style="color: #8b2252;"> uint64_t* __restrict later,</span>
|
|
<span style="color: #8b2252;"> double* __restrict determinant) {</span>
|
|
|
|
<span style="color: #8b2252;"> if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {</span>
|
|
<span style="color: #8b2252;"> return qmckl_failwith(</span>
|
|
<span style="color: #8b2252;"> context,</span>
|
|
<span style="color: #8b2252;"> QMCKL_NULL_CONTEXT,</span>
|
|
<span style="color: #8b2252;"> "qmckl_sm_splitting_core_{Dim}",</span>
|
|
<span style="color: #8b2252;"> NULL);</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) C[D{Dim}_P];</span>
|
|
<span style="color: #8b2252;"> double __attribute__((aligned(8))) D[D{Dim}_P];</span>
|
|
|
|
<span style="color: #8b2252;"> uint64_t l = 0;</span>
|
|
<span style="color: #8b2252;"> // For each update</span>
|
|
<span style="color: #8b2252;"> while (l < N_updates) {</span>
|
|
<span style="color: #8b2252;"> // C = S^{-1} x U_l</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> C[i] = 0.0f;</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // Denominator</span>
|
|
<span style="color: #8b2252;"> const int cui = Updates_index[l] - 1;</span>
|
|
<span style="color: #8b2252;"> double den = 1.0f + C[cui];</span>
|
|
<span style="color: #8b2252;"> if (fabs(den) < breakdown) {</span>
|
|
<span style="color: #8b2252;"> // U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
<span style="color: #8b2252;"> // second halve in later_updates</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < D{Dim}_P; i++) {</span>
|
|
<span style="color: #8b2252;"> later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;</span>
|
|
<span style="color: #8b2252;"> C[i] *= 0.5f;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> later_index[*later] = Updates_index[l];</span>
|
|
<span style="color: #8b2252;"> (*later)++;</span>
|
|
|
|
<span style="color: #8b2252;"> den = 1.0f + C[cui];</span>
|
|
<span style="color: #8b2252;"> } // From here onwards we continue with applying the first halve of the</span>
|
|
<span style="color: #8b2252;"> // update to Slater_inv</span>
|
|
<span style="color: #8b2252;"> double iden = 1.0f / den;</span>
|
|
|
|
<span style="color: #8b2252;"> if (determinant)</span>
|
|
<span style="color: #8b2252;"> *determinant *= den;</span>
|
|
|
|
<span style="color: #8b2252;"> // D = v^T x S^{-1} : 1 x D{Dim}_P</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> D[j] = Slater_inv[cui * D{Dim}_P + j];</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> // S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #8b2252;"> for (uint64_t i = 0; i < {Dim}; i++) {</span>
|
|
<span style="color: #8b2252;"> IVDEP</span>
|
|
<span style="color: #8b2252;"> ALIGNED</span>
|
|
<span style="color: #8b2252;"> for (uint64_t j = 0; j < D{Dim}_P; j++) {</span>
|
|
<span style="color: #8b2252;"> const double update = C[i] * D[j] * iden;</span>
|
|
<span style="color: #8b2252;"> Slater_inv[i * D{Dim}_P + j] -= update;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
<span style="color: #8b2252;"> l += 1;</span>
|
|
<span style="color: #8b2252;"> }</span>
|
|
|
|
<span style="color: #8b2252;"> return QMCKL_SUCCESS;</span>
|
|
<span style="color: #8b2252;">}</span>
|
|
<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org76f13fb"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">case {Dim}: {</span>
|
|
<span style="color: #8b2252;"> return qmckl_sm_splitting_core_{Dim}(</span>
|
|
<span style="color: #8b2252;"> context,</span>
|
|
<span style="color: #8b2252;"> N_updates,</span>
|
|
<span style="color: #8b2252;"> Updates,</span>
|
|
<span style="color: #8b2252;"> Updates_index,</span>
|
|
<span style="color: #8b2252;"> breakdown,</span>
|
|
<span style="color: #8b2252;"> Slater_inv,</span>
|
|
<span style="color: #8b2252;"> later_updates,</span>
|
|
<span style="color: #8b2252;"> later_index,</span>
|
|
<span style="color: #8b2252;"> later,</span>
|
|
<span style="color: #8b2252;"> determinant);</span>
|
|
<span style="color: #8b2252;">}"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">''</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D2_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D2_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
C[i] += Slater_inv[i * D2_P + j] * Updates[l * D2_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D2_P; i++) {
|
|
later_updates[*later * D2_P + i] = Updates[l * D2_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D2_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
D[j] = Slater_inv[cui * D2_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D2_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D3_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D3_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
C[i] += Slater_inv[i * D3_P + j] * Updates[l * D3_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D3_P; i++) {
|
|
later_updates[*later * D3_P + i] = Updates[l * D3_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D3_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
D[j] = Slater_inv[cui * D3_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D3_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D4_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D4_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
C[i] += Slater_inv[i * D4_P + j] * Updates[l * D4_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D4_P; i++) {
|
|
later_updates[*later * D4_P + i] = Updates[l * D4_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D4_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
D[j] = Slater_inv[cui * D4_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D4_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D5_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D5_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
C[i] += Slater_inv[i * D5_P + j] * Updates[l * D5_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D5_P; i++) {
|
|
later_updates[*later * D5_P + i] = Updates[l * D5_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D5_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
D[j] = Slater_inv[cui * D5_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D5_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D6_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D6_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
C[i] += Slater_inv[i * D6_P + j] * Updates[l * D6_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D6_P; i++) {
|
|
later_updates[*later * D6_P + i] = Updates[l * D6_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D6_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
D[j] = Slater_inv[cui * D6_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D6_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D7_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D7_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
C[i] += Slater_inv[i * D7_P + j] * Updates[l * D7_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D7_P; i++) {
|
|
later_updates[*later * D7_P + i] = Updates[l * D7_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D7_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
D[j] = Slater_inv[cui * D7_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D7_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D8_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D8_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
C[i] += Slater_inv[i * D8_P + j] * Updates[l * D8_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D8_P; i++) {
|
|
later_updates[*later * D8_P + i] = Updates[l * D8_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D8_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
D[j] = Slater_inv[cui * D8_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D8_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D9_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D9_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
C[i] += Slater_inv[i * D9_P + j] * Updates[l * D9_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D9_P; i++) {
|
|
later_updates[*later * D9_P + i] = Updates[l * D9_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D9_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
D[j] = Slater_inv[cui * D9_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D9_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D10_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D10_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
C[i] += Slater_inv[i * D10_P + j] * Updates[l * D10_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D10_P; i++) {
|
|
later_updates[*later * D10_P + i] = Updates[l * D10_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D10_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
D[j] = Slater_inv[cui * D10_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D10_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D11_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D11_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
C[i] += Slater_inv[i * D11_P + j] * Updates[l * D11_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D11_P; i++) {
|
|
later_updates[*later * D11_P + i] = Updates[l * D11_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D11_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
D[j] = Slater_inv[cui * D11_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D11_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D12_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D12_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
C[i] += Slater_inv[i * D12_P + j] * Updates[l * D12_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D12_P; i++) {
|
|
later_updates[*later * D12_P + i] = Updates[l * D12_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D12_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
D[j] = Slater_inv[cui * D12_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D12_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D13_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D13_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
C[i] += Slater_inv[i * D13_P + j] * Updates[l * D13_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D13_P; i++) {
|
|
later_updates[*later * D13_P + i] = Updates[l * D13_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D13_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
D[j] = Slater_inv[cui * D13_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D13_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D14_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D14_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
C[i] += Slater_inv[i * D14_P + j] * Updates[l * D14_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D14_P; i++) {
|
|
later_updates[*later * D14_P + i] = Updates[l * D14_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D14_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
D[j] = Slater_inv[cui * D14_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D14_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D15_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D15_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
C[i] += Slater_inv[i * D15_P + j] * Updates[l * D15_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D15_P; i++) {
|
|
later_updates[*later * D15_P + i] = Updates[l * D15_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D15_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
D[j] = Slater_inv[cui * D15_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D15_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D16_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D16_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
C[i] += Slater_inv[i * D16_P + j] * Updates[l * D16_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D16_P; i++) {
|
|
later_updates[*later * D16_P + i] = Updates[l * D16_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D16_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
D[j] = Slater_inv[cui * D16_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D16_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D17_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D17_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
C[i] += Slater_inv[i * D17_P + j] * Updates[l * D17_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D17_P; i++) {
|
|
later_updates[*later * D17_P + i] = Updates[l * D17_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D17_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
D[j] = Slater_inv[cui * D17_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D17_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D18_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D18_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
C[i] += Slater_inv[i * D18_P + j] * Updates[l * D18_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D18_P; i++) {
|
|
later_updates[*later * D18_P + i] = Updates[l * D18_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D18_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
D[j] = Slater_inv[cui * D18_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D18_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D19_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D19_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
C[i] += Slater_inv[i * D19_P + j] * Updates[l * D19_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D19_P; i++) {
|
|
later_updates[*later * D19_P + i] = Updates[l * D19_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D19_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
D[j] = Slater_inv[cui * D19_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D19_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D20_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D20_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
C[i] += Slater_inv[i * D20_P + j] * Updates[l * D20_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D20_P; i++) {
|
|
later_updates[*later * D20_P + i] = Updates[l * D20_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D20_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
D[j] = Slater_inv[cui * D20_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D20_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> later_updates,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later_index,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> later,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_core_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[D21_P];
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">D</span>[D21_P];
|
|
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">l</span> = 0;
|
|
// <span style="color: #b22222;">For each update</span>
|
|
<span style="color: #a020f0;">while</span> (l < N_updates) {
|
|
// <span style="color: #b22222;">C = S^{-1} x U_l</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i] = 0.0f;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
C[i] += Slater_inv[i * D21_P + j] * Updates[l * D21_P + j];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Denominator</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">cui</span> = Updates_index[l] - 1;
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">den</span> = 1.0f + C[cui];
|
|
<span style="color: #a020f0;">if</span> (fabs(den) < breakdown) {
|
|
// <span style="color: #b22222;">U_l = U_l / 2: split the update in 2 equal halves and save the</span>
|
|
// <span style="color: #b22222;">second halve in later_updates</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < D21_P; i++) {
|
|
later_updates[*later * D21_P + i] = Updates[l * D21_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // <span style="color: #b22222;">From here onwards we continue with applying the first halve of the</span>
|
|
// <span style="color: #b22222;">update to Slater_inv</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">iden</span> = 1.0f / den;
|
|
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= den;
|
|
|
|
// <span style="color: #b22222;">D = v^T x S^{-1} : 1 x D21_P</span>
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
D[j] = Slater_inv[cui * D21_P + j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">S^{-1} = S^{-1} - C x D / den</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">update</span> = C[i] * D[j] * iden;
|
|
Slater_inv[i * D21_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_core</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">later_updates</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later_index</span>,
|
|
<span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">later</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_2(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 3: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_3(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 4: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_4(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 5: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_5(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 6: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_6(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 7: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_7(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 8: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_8(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 9: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_9(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 10: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_10(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 11: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_11(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 12: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_12(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 13: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_13(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 14: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_14(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 15: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_15(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 16: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_16(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 17: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_17(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 18: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_18(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 19: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_19(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 20: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_20(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">case</span> 21: {
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_21(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #a020f0;">default</span>: {
|
|
assert(0 == 1 && <span style="color: #8b2252;">"TEMPLATE NOT IMPLEMENTED!"</span>);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_core_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org0a8fd81" class="outline-4">
|
|
<h4 id="org0a8fd81"><span class="section-number-4">3.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-3-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org167700e" class="outline-4">
|
|
<h4 id="org167700e"><span class="section-number-4">3.1.8</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-3-1-8">
|
|
<p>
|
|
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
|
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-orga78d0d0" class="outline-2">
|
|
<h2 id="orga78d0d0"><span class="section-number-2">4</span> Woodbury 2x2</h2>
|
|
<div class="outline-text-2" id="text-4">
|
|
</div>
|
|
<div id="outline-container-orgaba90e6" class="outline-3">
|
|
<h3 id="orgaba90e6"><span class="section-number-3">4.1</span> <code>qmckl_woodbury_2x2</code></h3>
|
|
<div class="outline-text-3" id="text-4-1">
|
|
</div>
|
|
|
|
<div id="outline-container-orgd51bf81" class="outline-4">
|
|
<h4 id="orgd51bf81"><span class="section-number-4">4.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-4-1-1">
|
|
<p>
|
|
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
|
this algorithm is called the Woodbury Matrix Id
|
|
\[
|
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
|
\]
|
|
where
|
|
\(S\) is the Slater-matrix
|
|
\(U\) and \(V\) are the matrices containing the updates and the canonical basis matrix
|
|
\(S^{-1}\) is the inverse of the Slater-matrix
|
|
\(C:= S^{-1}U\), a Dim \(\times 2\) matrix
|
|
\(B := 1 + VC\), the \(2 \times 2\) matrix that is going to be inverted
|
|
\(D := VS^{-1}\), a \(2 \times Dim\) matrix
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org25c96ce" class="outline-4">
|
|
<h4 id="org25c96ce"><span class="section-number-4">4.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-4-1-2">
|
|
<table id="org4667e14" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left"><code>context</code></td>
|
|
<td class="org-left"><code>qmckl_context</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>LDS</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Dim</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates</code></td>
|
|
<td class="org-left"><code>double[2*Dim]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates_index</code></td>
|
|
<td class="org-left"><code>uint64_t[2]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>breakdown</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Slater_inv</code></td>
|
|
<td class="org-left"><code>double[LDS*Dim]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>determinant</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org626dcea" class="outline-4">
|
|
<h4 id="org626dcea"><span class="section-number-4">4.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-4-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>qmckl_null_context</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>Updates</code> is allocated with \(2 \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(2\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgb29d07c" class="outline-4">
|
|
<h4 id="orgb29d07c"><span class="section-number-4">4.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-4-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_woodbury_2x2_doc_f(</span><span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(2)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(2 * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(2, dim) ::<span style="color: #a0522d;"> V</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(2, 2) ::<span style="color: #a0522d;"> Id</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, dim) ::<span style="color: #a0522d;"> Inverse</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, 2) ::<span style="color: #a0522d;"> Updates, C</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(2, 2) ::<span style="color: #a0522d;"> D, invD</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(2, dim) ::<span style="color: #a0522d;"> E, F</span>
|
|
|
|
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> detD, idenominator, update</span>
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, k, l</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
! <span style="color: #b22222;">Construct V(2, dim) matrix</span>
|
|
V = 0
|
|
V(1, updates_index(1)) = 1
|
|
V(2, updates_index(2)) = 1
|
|
|
|
! <span style="color: #b22222;">Construct Id(2, 2) matrix</span>
|
|
Id = 0
|
|
Id(1, 1) = 1
|
|
Id(2, 2) = 1
|
|
|
|
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
|
|
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, <span style="color: #a020f0;">int</span>(2,8), lds, dim)
|
|
|
|
! <span style="color: #b22222;">Compute C(dim, 2) = Inverse(dim, dim) x Updates(dim, 2)</span>
|
|
C = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, 2
|
|
<span style="color: #a020f0;">do</span> k = 1, dim
|
|
C(i, j) = C(i, j) + Inverse(i, k) * Updates(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Construct matrix D(2, 2) := I(2, 2) + V(2, dim) x C(dim, 2)</span>
|
|
D = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, 2
|
|
<span style="color: #a020f0;">do</span> j = 1, 2
|
|
<span style="color: #a020f0;">do</span> k = 2, dim
|
|
D(i, j) = D(i, j) + V(i, k) * C(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
D = Id + D
|
|
|
|
! <span style="color: #b22222;">Compute determinant := det(D) explicitly</span>
|
|
detD = D(1,1) * D(2,2) - D(1,2) * D(2,1)
|
|
|
|
! <span style="color: #b22222;">Return early if det(D) is too small</span>
|
|
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(detD) < breakdown) <span style="color: #a020f0;">return</span>
|
|
|
|
! <span style="color: #b22222;">Update det(S)</span>
|
|
determinant = determinant * detD
|
|
|
|
! <span style="color: #b22222;">Compute inv(D) explicitly</span>
|
|
invD(1,1) = D(2,2)
|
|
invD(1,2) = - D(1,2)
|
|
invD(2,1) = - D(2,1)
|
|
invD(2,2) = D(1,1)
|
|
invD = invD / detD
|
|
|
|
! <span style="color: #b22222;">Compute E(2, dim) := V(2, dim) x Inverse(dim, dim)</span>
|
|
E = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, 2
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
<span style="color: #a020f0;">do</span> k = 1, dim
|
|
E(i, j) = E(i, j) + V(i, k) * Inverse(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute F(2, dim) := invD(2, 2) x E(2, dim)</span>
|
|
F = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, 2
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
<span style="color: #a020f0;">do</span> k = 1, 2
|
|
F(i, j) = F(i, j) + invD(i, k) * E(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute Inverse(dim, dim) := Inverse(dim, dim) - C(dim, 2) x F(2, dim)</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
<span style="color: #a020f0;">do</span> k = 1, 2
|
|
Inverse(i, j) = Inverse(i, j) - C(i, k) * F(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Copy updated inverse and later updates</span>
|
|
! <span style="color: #b22222;">back to s_inv and later_upds</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgcd75d4d" class="outline-5">
|
|
<h5 id="orgcd75d4d"><span class="section-number-5">4.1.4.1</span> C interface (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-4-1-4-1">
|
|
<p>
|
|
The function <code>qmckl_sm_splitting_core_doc</code> makes sure that
|
|
<code>qmckl_sm_splitting_core_doc_f</code> can be called from C using the
|
|
<code>ISO_C_BINDING</code>. Function <code>qmckl_sm_splitting_core_doc</code> will be
|
|
exposed in <code>qmckl.h</code> and <code>qmckl_f.F90</code>, but
|
|
<code>qmckl_sm_splitting_core_doc_f</code> will not.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org6eae02e" class="outline-4">
|
|
<h4 id="org6eae02e"><span class="section-number-4">4.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-4-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgee7be50" class="outline-4">
|
|
<h4 id="orgee7be50"><span class="section-number-4">4.1.6</span> C sources</h4>
|
|
<div class="outline-text-4" id="text-4-1-6">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_hpc</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : Dim x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * Dim];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < LDS; k++) {
|
|
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x LDS</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * LDS];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * LDS]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * LDS]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : Dim x LDS</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="orgf72e784"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_woodbury_2x2_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : {Dim} x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * {Dim}];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D{Dim}_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D{Dim}_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D{Dim}_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D{Dim}_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 2 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 2];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D2_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D2_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D2_P + k] * Updates[D2_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D2_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D2_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D2_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D2_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D2_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 2 x D2_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
Slater_inv[i * D2_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D2_P + j] -= C[i * 2 + 1] * tmp[D2_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 3 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 3];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D3_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D3_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D3_P + k] * Updates[D3_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D3_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D3_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D3_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D3_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D3_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 3 x D3_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
Slater_inv[i * D3_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D3_P + j] -= C[i * 2 + 1] * tmp[D3_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 4 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 4];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D4_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D4_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D4_P + k] * Updates[D4_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D4_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D4_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D4_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D4_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D4_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 4 x D4_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
Slater_inv[i * D4_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D4_P + j] -= C[i * 2 + 1] * tmp[D4_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 5 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 5];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D5_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D5_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D5_P + k] * Updates[D5_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D5_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D5_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D5_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D5_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D5_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 5 x D5_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
Slater_inv[i * D5_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D5_P + j] -= C[i * 2 + 1] * tmp[D5_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 6 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 6];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D6_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D6_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D6_P + k] * Updates[D6_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D6_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D6_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D6_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D6_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D6_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 6 x D6_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
Slater_inv[i * D6_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D6_P + j] -= C[i * 2 + 1] * tmp[D6_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 7 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 7];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D7_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D7_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D7_P + k] * Updates[D7_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D7_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D7_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D7_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D7_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D7_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 7 x D7_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
Slater_inv[i * D7_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D7_P + j] -= C[i * 2 + 1] * tmp[D7_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 8 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 8];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D8_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D8_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D8_P + k] * Updates[D8_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D8_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D8_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D8_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D8_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D8_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 8 x D8_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
Slater_inv[i * D8_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D8_P + j] -= C[i * 2 + 1] * tmp[D8_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 9 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 9];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D9_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D9_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D9_P + k] * Updates[D9_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D9_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D9_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D9_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D9_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D9_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 9 x D9_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
Slater_inv[i * D9_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D9_P + j] -= C[i * 2 + 1] * tmp[D9_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 10 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 10];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D10_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D10_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D10_P + k] * Updates[D10_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D10_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D10_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D10_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D10_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D10_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 10 x D10_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
Slater_inv[i * D10_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D10_P + j] -= C[i * 2 + 1] * tmp[D10_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 11 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 11];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D11_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D11_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D11_P + k] * Updates[D11_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D11_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D11_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D11_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D11_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D11_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 11 x D11_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
Slater_inv[i * D11_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D11_P + j] -= C[i * 2 + 1] * tmp[D11_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 12 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 12];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D12_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D12_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D12_P + k] * Updates[D12_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D12_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D12_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D12_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D12_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D12_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 12 x D12_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
Slater_inv[i * D12_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D12_P + j] -= C[i * 2 + 1] * tmp[D12_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 13 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 13];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D13_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D13_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D13_P + k] * Updates[D13_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D13_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D13_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D13_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D13_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D13_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 13 x D13_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
Slater_inv[i * D13_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D13_P + j] -= C[i * 2 + 1] * tmp[D13_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 14 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 14];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D14_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D14_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D14_P + k] * Updates[D14_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D14_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D14_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D14_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D14_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D14_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 14 x D14_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
Slater_inv[i * D14_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D14_P + j] -= C[i * 2 + 1] * tmp[D14_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 15 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 15];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D15_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D15_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D15_P + k] * Updates[D15_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D15_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D15_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D15_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D15_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D15_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 15 x D15_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
Slater_inv[i * D15_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D15_P + j] -= C[i * 2 + 1] * tmp[D15_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 16 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 16];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D16_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D16_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D16_P + k] * Updates[D16_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D16_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D16_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D16_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D16_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D16_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 16 x D16_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
Slater_inv[i * D16_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D16_P + j] -= C[i * 2 + 1] * tmp[D16_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 17 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 17];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D17_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D17_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D17_P + k] * Updates[D17_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D17_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D17_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D17_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D17_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D17_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 17 x D17_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
Slater_inv[i * D17_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D17_P + j] -= C[i * 2 + 1] * tmp[D17_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 18 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 18];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D18_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D18_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D18_P + k] * Updates[D18_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D18_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D18_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D18_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D18_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D18_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 18 x D18_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
Slater_inv[i * D18_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D18_P + j] -= C[i * 2 + 1] * tmp[D18_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 19 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 19];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D19_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D19_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D19_P + k] * Updates[D19_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D19_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D19_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D19_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D19_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D19_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 19 x D19_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
Slater_inv[i * D19_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D19_P + j] -= C[i * 2 + 1] * tmp[D19_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 20 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 20];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D20_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D20_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D20_P + k] * Updates[D20_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D20_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D20_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D20_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D20_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D20_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 20 x D20_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
Slater_inv[i * D20_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D20_P + j] -= C[i * 2 + 1] * tmp[D20_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 2</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 2 x 2</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 2 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 21 x 2</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[2 * 21];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D21_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D21_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D21_P + k] * Updates[D21_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 2 x 2</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 2] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 2 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row2 * 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 2 + 1] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span> = B0 * B3 - B1 * B2;
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[4], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x D21_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[2 * D21_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D21_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D21_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D21_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 21 x D21_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
Slater_inv[i * D21_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D21_P + j] -= C[i * 2 + 1] * tmp[D21_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_2x2</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_2x2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_2(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_3(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_4(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_5(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_6(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_7(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_8(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_9(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_10(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_11(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_12(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_13(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_14(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_15(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_16(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_17(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_18(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_19(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_20(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_21(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_2x2_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
// <span style="color: #b22222;">return qmckl_woodbury_2x2_hpc(</span>
|
|
// <span style="color: #b22222;">context,</span>
|
|
// <span style="color: #b22222;">LDS,</span>
|
|
// <span style="color: #b22222;">Dim,</span>
|
|
// <span style="color: #b22222;">Updates,</span>
|
|
// <span style="color: #b22222;">Updates_index,</span>
|
|
// <span style="color: #b22222;">breakdown,</span>
|
|
// <span style="color: #b22222;">Slater_inv,</span>
|
|
// <span style="color: #b22222;">determinant);</span>
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgdabb694" class="outline-4">
|
|
<h4 id="orgdabb694"><span class="section-number-4">4.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-4-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org76f6b4d" class="outline-4">
|
|
<h4 id="org76f6b4d"><span class="section-number-4">4.1.8</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-4-1-8">
|
|
<p>
|
|
This function is most efficient when used in cases where there are only 2 rank-1 updates and
|
|
it is sure they will not result in a singular matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org67afaf9" class="outline-4">
|
|
<h4 id="org67afaf9"><span class="section-number-4">4.1.9</span> Tests</h4>
|
|
<div class="outline-text-4" id="text-4-1-9">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #0000ff;">assert</span>(Updates2 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Updates_index2 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Slater_inv2 != <span style="color: #008b8b;">NULL</span>);
|
|
det = -1.4432116661319376e-11;
|
|
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
|
<span style="color: #0000ff;">assert</span>(fabs(det-2.367058141251457e-10) < 1e-15);
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">if</span> (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-orga3ef988" class="outline-2">
|
|
<h2 id="orga3ef988"><span class="section-number-2">5</span> Woodbury 3x3</h2>
|
|
<div class="outline-text-2" id="text-5">
|
|
</div>
|
|
<div id="outline-container-org135e1ce" class="outline-3">
|
|
<h3 id="org135e1ce"><span class="section-number-3">5.1</span> <code>qmckl_woodbury_3x3</code></h3>
|
|
<div class="outline-text-3" id="text-5-1">
|
|
</div>
|
|
|
|
<div id="outline-container-orgb8c1005" class="outline-4">
|
|
<h4 id="orgb8c1005"><span class="section-number-4">5.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-5-1-1">
|
|
<p>
|
|
The Woodbury 3x3 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
|
this algorithm is called the Woodbury Matrix Id
|
|
\[
|
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
|
\]
|
|
where
|
|
\(S\) is the Slater-matrix
|
|
\(U\) and \(V\) are the matrices containing the updates and the canonical basis matrix
|
|
\(S^{-1}\) is the inverse of the Slater-matrix
|
|
\(C:= S^{-1}U\), a Dim \(\times 3\) matrix
|
|
\(B := 1 + VC\), the \(3 \times 3\) matrix that is going to be inverted
|
|
\(D := VS^{-1}\), a \(3 \times Dim\) matrix
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgeca9968" class="outline-4">
|
|
<h4 id="orgeca9968"><span class="section-number-4">5.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-5-1-2">
|
|
<table id="orge4d2e96" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left"><code>context</code></td>
|
|
<td class="org-left"><code>qmckl_context</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>LDS</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Dim</code></td>
|
|
<td class="org-left"><code>uint64_t</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates</code></td>
|
|
<td class="org-left"><code>double[3*Dim]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Updates_index</code></td>
|
|
<td class="org-left"><code>uint64_t[3]</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>breakdown</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>Slater_inv</code></td>
|
|
<td class="org-left"><code>double[LDS*Dim]</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left"><code>determinant</code></td>
|
|
<td class="org-left"><code>double</code></td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgc852257" class="outline-4">
|
|
<h4 id="orgc852257"><span class="section-number-4">5.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-5-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>qmckl_null_context</code></li>
|
|
<li><code>LDS >= 3</code></li>
|
|
<li><code>Dim >= 3</code></li>
|
|
<li><code>Updates</code> is allocated with \(3 \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(3\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org42e9f4d" class="outline-4">
|
|
<h4 id="org42e9f4d"><span class="section-number-4">5.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-5-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> function qmckl_woodbury_3x3_doc_f(</span><span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(3)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(3 * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(3, dim) ::<span style="color: #a0522d;"> V</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(3, 3) ::<span style="color: #a0522d;"> Id</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, dim) ::<span style="color: #a0522d;"> Inverse</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(dim, 3) ::<span style="color: #a0522d;"> Updates, C</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(3, 3) ::<span style="color: #a0522d;"> D, invD</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(3, dim) ::<span style="color: #a0522d;"> E, F</span>
|
|
|
|
<span style="color: #228b22;">real</span>*8 ::<span style="color: #a0522d;"> detD, idetD, idenominator, update</span>
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> i, j, k, l</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
! <span style="color: #b22222;">Construct V(3, dim) matrix</span>
|
|
V = 0
|
|
V(1, updates_index(1)) = 1
|
|
V(2, updates_index(2)) = 1
|
|
V(3, updates_index(3)) = 1
|
|
|
|
! <span style="color: #b22222;">Construct Id(3, 3) matrix</span>
|
|
Id = 0
|
|
Id(1, 1) = 1
|
|
Id(2, 2) = 1
|
|
Id(3, 3) = 1
|
|
|
|
! <span style="color: #b22222;">Convert 'upds' and 's_inv' into the more easily readable Fortran</span>
|
|
! <span style="color: #b22222;">matrices 'Updates' and 'Inverse'.</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">convert</span>(upds, s_inv, Updates, Inverse, <span style="color: #a020f0;">int</span>(3,8), lds, dim)
|
|
|
|
! <span style="color: #b22222;">Compute C(dim, 3) = Inverse(dim, dim) x Updates(dim, 3)</span>
|
|
C = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, 3
|
|
<span style="color: #a020f0;">do</span> k = 1, dim
|
|
C(i, j) = C(i, j) + Inverse(i, k) * Updates(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Construct matrix D(3, 3) := I(3, 3) + V(3, dim) x C(dim, 3)</span>
|
|
D = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, 3
|
|
<span style="color: #a020f0;">do</span> j = 1, 3
|
|
<span style="color: #a020f0;">do</span> k = 3, dim
|
|
D(i, j) = D(i, j) + V(i, k) * C(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
D = Id + D
|
|
|
|
! <span style="color: #b22222;">Compute determinant := det(D) explicitly</span>
|
|
detD = D(1,1) * (D(2,2) * D(3,3) - D(2,3) * D(3,2)) - <span style="color: #a020f0;">&</span>
|
|
D(1,2) * (D(2,1) * D(3,3) - D(2,3) * D(3,1)) + <span style="color: #a020f0;">&</span>
|
|
D(1,3) * (D(2,1) * D(3,2) - D(2,2) * D(3,1))
|
|
|
|
! <span style="color: #b22222;">Return early if det(D) is too small</span>
|
|
<span style="color: #a020f0;">if</span> (<span style="color: #a020f0;">abs</span>(detD) < breakdown) <span style="color: #a020f0;">return</span>
|
|
|
|
! <span style="color: #b22222;">Update det(S)</span>
|
|
determinant = determinant * detD
|
|
|
|
idetD = 1.0d0 / detD
|
|
! <span style="color: #b22222;">Compute inv(D) explicitly</span>
|
|
invD(1,1) = (D(2,2) * D(3,3) - D(3,2) * D(2,3)) * idetD
|
|
invD(1,2) = -(D(1,2) * D(3,3) - D(3,2) * D(1,3)) * idetD
|
|
invD(1,3) = (D(1,2) * D(2,3) - D(2,2) * D(1,3)) * idetD
|
|
invD(2,1) = -(D(2,1) * D(3,3) - D(3,1) * D(2,3)) * idetD
|
|
invD(2,2) = (D(1,1) * D(3,3) - D(3,1) * D(1,3)) * idetD
|
|
invD(2,3) = -(D(1,1) * D(2,3) - D(2,1) * D(1,3)) * idetD
|
|
invD(3,1) = (D(2,1) * D(3,2) - D(3,1) * D(2,2)) * idetD
|
|
invD(3,2) = -(D(1,1) * D(3,2) - D(3,1) * D(1,2)) * idetD
|
|
invD(3,3) = (D(1,1) * D(2,2) - D(2,1) * D(1,2)) * idetD
|
|
|
|
! <span style="color: #b22222;">Compute E(3, dim) := V(3, dim) x Inverse(dim, dim)</span>
|
|
E = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, 3
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
<span style="color: #a020f0;">do</span> k = 1, dim
|
|
E(i, j) = E(i, j) + V(i, k) * Inverse(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute F(3, dim) := invD(3, 3) x E(3, dim)</span>
|
|
F = 0
|
|
<span style="color: #a020f0;">do</span> i = 1, 3
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
<span style="color: #a020f0;">do</span> k = 1, 3
|
|
F(i, j) = F(i, j) + invD(i, k) * E(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Compute Inverse(dim, dim) := Inverse(dim, dim) - C(dim, 3) x F(3, dim)</span>
|
|
<span style="color: #a020f0;">do</span> i = 1, dim
|
|
<span style="color: #a020f0;">do</span> j = 1, dim
|
|
<span style="color: #a020f0;">do</span> k = 1, 3
|
|
Inverse(i, j) = Inverse(i, j) - C(i, k) * F(k, j)
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
<span style="color: #a020f0;">end do</span>
|
|
|
|
! <span style="color: #b22222;">Copy updated inverse and later updates</span>
|
|
! <span style="color: #b22222;">back to s_inv and later_upds</span>
|
|
<span style="color: #a020f0;">call</span> <span style="color: #0000ff;">copy_back_inv</span>(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org8d00f93" class="outline-5">
|
|
<h5 id="org8d00f93"><span class="section-number-5">5.1.4.1</span> C interface (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-5-1-4-1">
|
|
<p>
|
|
The function <code>qmckl_sm_splitting_core_doc</code> makes sure that
|
|
<code>qmckl_sm_splitting_core_doc_f</code> can be called from C using the
|
|
<code>ISO_C_BINDING</code>. Function <code>qmckl_sm_splitting_core_doc</code> will be
|
|
exposed in <code>qmckl.h</code> and <code>qmckl_f.F90</code>, but
|
|
<code>qmckl_sm_splitting_core_doc_f</code> will not.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org78cdb80" class="outline-4">
|
|
<h4 id="org78cdb80"><span class="section-number-4">5.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-5-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgd20328f" class="outline-4">
|
|
<h4 id="orgd20328f"><span class="section-number-4">5.1.6</span> C sources</h4>
|
|
<div class="outline-text-4" id="text-5-1-6">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_hpc</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : Dim x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * Dim];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < LDS; k++) {
|
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of inverted matrix is not zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(S) when passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 2 x 2 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 2 x LDS</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * LDS];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * LDS]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * LDS]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * LDS]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[LDS + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * LDS + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : Dim x LDS</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < LDS; j++) {
|
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c" id="orgc443e10"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">qmckl_woodbury_3x3_</span>{Dim}(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_{Dim}"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : {Dim} x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * {Dim}];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D{Dim}_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D{Dim}_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D{Dim}_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D{Dim}_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D{Dim}_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D{Dim}_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D{Dim}_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D{Dim}_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < {Dim}; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D{Dim}_P; j++) {
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_2</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_2"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 2 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 2];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D2_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D2_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D2_P + k] * Updates[D2_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D2_P + k] * Updates[2 * D2_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D2_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D2_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D2_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D2_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D2_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D2_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D2_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 2 x D2_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 2; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D2_P; j++) {
|
|
Slater_inv[i * D2_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D2_P + j] -= C[i * 3 + 1] * tmp[D2_P + j];
|
|
Slater_inv[i * D2_P + j] -= C[i * 3 + 2] * tmp[2 * D2_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_3</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 3 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 3];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D3_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D3_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D3_P + k] * Updates[D3_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D3_P + k] * Updates[2 * D3_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D3_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D3_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D3_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D3_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D3_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D3_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D3_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 3 x D3_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 3; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D3_P; j++) {
|
|
Slater_inv[i * D3_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D3_P + j] -= C[i * 3 + 1] * tmp[D3_P + j];
|
|
Slater_inv[i * D3_P + j] -= C[i * 3 + 2] * tmp[2 * D3_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_4</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_4"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 4 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 4];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D4_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D4_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D4_P + k] * Updates[D4_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D4_P + k] * Updates[2 * D4_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D4_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D4_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D4_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D4_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D4_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D4_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D4_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 4 x D4_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 4; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D4_P; j++) {
|
|
Slater_inv[i * D4_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D4_P + j] -= C[i * 3 + 1] * tmp[D4_P + j];
|
|
Slater_inv[i * D4_P + j] -= C[i * 3 + 2] * tmp[2 * D4_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_5</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_5"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 5 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 5];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D5_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D5_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D5_P + k] * Updates[D5_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D5_P + k] * Updates[2 * D5_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D5_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D5_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D5_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D5_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D5_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D5_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D5_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 5 x D5_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 5; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D5_P; j++) {
|
|
Slater_inv[i * D5_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D5_P + j] -= C[i * 3 + 1] * tmp[D5_P + j];
|
|
Slater_inv[i * D5_P + j] -= C[i * 3 + 2] * tmp[2 * D5_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_6</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_6"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 6 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 6];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D6_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D6_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D6_P + k] * Updates[D6_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D6_P + k] * Updates[2 * D6_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D6_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D6_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D6_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D6_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D6_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D6_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D6_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 6 x D6_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 6; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D6_P; j++) {
|
|
Slater_inv[i * D6_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D6_P + j] -= C[i * 3 + 1] * tmp[D6_P + j];
|
|
Slater_inv[i * D6_P + j] -= C[i * 3 + 2] * tmp[2 * D6_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_7</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_7"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 7 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 7];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D7_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D7_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D7_P + k] * Updates[D7_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D7_P + k] * Updates[2 * D7_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D7_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D7_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D7_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D7_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D7_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D7_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D7_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 7 x D7_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 7; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D7_P; j++) {
|
|
Slater_inv[i * D7_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D7_P + j] -= C[i * 3 + 1] * tmp[D7_P + j];
|
|
Slater_inv[i * D7_P + j] -= C[i * 3 + 2] * tmp[2 * D7_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_8</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_8"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 8 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 8];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D8_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D8_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D8_P + k] * Updates[D8_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D8_P + k] * Updates[2 * D8_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D8_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D8_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D8_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D8_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D8_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D8_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D8_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 8 x D8_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 8; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D8_P; j++) {
|
|
Slater_inv[i * D8_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D8_P + j] -= C[i * 3 + 1] * tmp[D8_P + j];
|
|
Slater_inv[i * D8_P + j] -= C[i * 3 + 2] * tmp[2 * D8_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_9</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_9"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 9 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 9];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D9_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D9_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D9_P + k] * Updates[D9_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D9_P + k] * Updates[2 * D9_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D9_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D9_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D9_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D9_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D9_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D9_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D9_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 9 x D9_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 9; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D9_P; j++) {
|
|
Slater_inv[i * D9_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D9_P + j] -= C[i * 3 + 1] * tmp[D9_P + j];
|
|
Slater_inv[i * D9_P + j] -= C[i * 3 + 2] * tmp[2 * D9_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_10</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_10"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 10 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 10];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D10_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D10_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D10_P + k] * Updates[D10_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D10_P + k] * Updates[2 * D10_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D10_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D10_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D10_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D10_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D10_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D10_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D10_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 10 x D10_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 10; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D10_P; j++) {
|
|
Slater_inv[i * D10_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D10_P + j] -= C[i * 3 + 1] * tmp[D10_P + j];
|
|
Slater_inv[i * D10_P + j] -= C[i * 3 + 2] * tmp[2 * D10_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_11</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_11"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 11 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 11];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D11_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D11_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D11_P + k] * Updates[D11_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D11_P + k] * Updates[2 * D11_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D11_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D11_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D11_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D11_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D11_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D11_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D11_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 11 x D11_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 11; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D11_P; j++) {
|
|
Slater_inv[i * D11_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D11_P + j] -= C[i * 3 + 1] * tmp[D11_P + j];
|
|
Slater_inv[i * D11_P + j] -= C[i * 3 + 2] * tmp[2 * D11_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_12</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_12"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 12 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 12];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D12_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D12_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D12_P + k] * Updates[D12_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D12_P + k] * Updates[2 * D12_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D12_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D12_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D12_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D12_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D12_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D12_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D12_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 12 x D12_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 12; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D12_P; j++) {
|
|
Slater_inv[i * D12_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D12_P + j] -= C[i * 3 + 1] * tmp[D12_P + j];
|
|
Slater_inv[i * D12_P + j] -= C[i * 3 + 2] * tmp[2 * D12_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_13</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_13"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 13 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 13];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D13_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D13_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D13_P + k] * Updates[D13_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D13_P + k] * Updates[2 * D13_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D13_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D13_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D13_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D13_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D13_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D13_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D13_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 13 x D13_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 13; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D13_P; j++) {
|
|
Slater_inv[i * D13_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D13_P + j] -= C[i * 3 + 1] * tmp[D13_P + j];
|
|
Slater_inv[i * D13_P + j] -= C[i * 3 + 2] * tmp[2 * D13_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_14</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_14"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 14 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 14];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D14_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D14_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D14_P + k] * Updates[D14_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D14_P + k] * Updates[2 * D14_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D14_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D14_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D14_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D14_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D14_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D14_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D14_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 14 x D14_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 14; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D14_P; j++) {
|
|
Slater_inv[i * D14_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D14_P + j] -= C[i * 3 + 1] * tmp[D14_P + j];
|
|
Slater_inv[i * D14_P + j] -= C[i * 3 + 2] * tmp[2 * D14_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_15</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_15"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 15 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 15];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D15_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D15_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D15_P + k] * Updates[D15_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D15_P + k] * Updates[2 * D15_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D15_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D15_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D15_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D15_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D15_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D15_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D15_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 15 x D15_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 15; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D15_P; j++) {
|
|
Slater_inv[i * D15_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D15_P + j] -= C[i * 3 + 1] * tmp[D15_P + j];
|
|
Slater_inv[i * D15_P + j] -= C[i * 3 + 2] * tmp[2 * D15_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_16</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_16"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 16 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 16];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D16_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D16_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D16_P + k] * Updates[D16_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D16_P + k] * Updates[2 * D16_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D16_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D16_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D16_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D16_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D16_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D16_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D16_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 16 x D16_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 16; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D16_P; j++) {
|
|
Slater_inv[i * D16_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D16_P + j] -= C[i * 3 + 1] * tmp[D16_P + j];
|
|
Slater_inv[i * D16_P + j] -= C[i * 3 + 2] * tmp[2 * D16_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_17</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_17"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 17 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 17];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D17_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D17_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D17_P + k] * Updates[D17_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D17_P + k] * Updates[2 * D17_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D17_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D17_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D17_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D17_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D17_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D17_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D17_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 17 x D17_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 17; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D17_P; j++) {
|
|
Slater_inv[i * D17_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D17_P + j] -= C[i * 3 + 1] * tmp[D17_P + j];
|
|
Slater_inv[i * D17_P + j] -= C[i * 3 + 2] * tmp[2 * D17_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_18</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_18"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 18 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 18];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D18_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D18_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D18_P + k] * Updates[D18_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D18_P + k] * Updates[2 * D18_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D18_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D18_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D18_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D18_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D18_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D18_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D18_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 18 x D18_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 18; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D18_P; j++) {
|
|
Slater_inv[i * D18_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D18_P + j] -= C[i * 3 + 1] * tmp[D18_P + j];
|
|
Slater_inv[i * D18_P + j] -= C[i * 3 + 2] * tmp[2 * D18_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_19</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_19"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 19 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 19];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D19_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D19_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D19_P + k] * Updates[D19_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D19_P + k] * Updates[2 * D19_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D19_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D19_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D19_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D19_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D19_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D19_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D19_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 19 x D19_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 19; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D19_P; j++) {
|
|
Slater_inv[i * D19_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D19_P + j] -= C[i * 3 + 1] * tmp[D19_P + j];
|
|
Slater_inv[i * D19_P + j] -= C[i * 3 + 2] * tmp[2 * D19_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_20</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_20"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 20 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 20];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D20_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D20_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D20_P + k] * Updates[D20_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D20_P + k] * Updates[2 * D20_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D20_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D20_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D20_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D20_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D20_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D20_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D20_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 20 x D20_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 20; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D20_P; j++) {
|
|
Slater_inv[i * D20_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D20_P + j] -= C[i * 3 + 1] * tmp[D20_P + j];
|
|
Slater_inv[i * D20_P + j] -= C[i * 3 + 2] * tmp[2 * D20_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
|
|
<span style="color: #a020f0;">static</span> <span style="color: #a020f0;">inline</span> <span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3_21</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Updates,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">__restrict</span> Updates_index,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> Slater_inv,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">__restrict</span> determinant) {
|
|
/*
|
|
<span style="color: #b22222;"> C := S^{-1} * U, dim x 3</span>
|
|
<span style="color: #b22222;"> B := 1 + V * C, 3 x 3</span>
|
|
<span style="color: #b22222;"> D := V * S^{-1}, 3 x dim</span>
|
|
*/
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3_21"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row1</span> = (Updates_index[0] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row2</span> = (Updates_index[1] - 1);
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">row3</span> = (Updates_index[2] - 1);
|
|
|
|
// <span style="color: #b22222;">Compute C = (S^T)^{-1}U : 21 x 3</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">C</span>[3 * 21];
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">k</span> = 0; k < D21_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D21_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D21_P + k] * Updates[D21_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D21_P + k] * Updates[2 * D21_P + k];
|
|
}
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute B = 1 + VC : 3 x 3</span>
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B0</span> = C[row1 * 3] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B1</span> = C[row1 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B2</span> = C[row1 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B3</span> = C[row2 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B4</span> = C[row2 * 3 + 1] + 1;
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B5</span> = C[row2 * 3 + 2];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B6</span> = C[row3 * 3];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B7</span> = C[row3 * 3 + 1];
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">B8</span> = C[row3 * 3 + 2] + 1;
|
|
|
|
// <span style="color: #b22222;">Check if determinant of B is not too close to zero</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a0522d;">det</span>;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
<span style="color: #a020f0;">if</span> (fabs(det) < breakdown) {
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
// <span style="color: #b22222;">Update det(Slater) if passed</span>
|
|
<span style="color: #a020f0;">if</span> (determinant)
|
|
*determinant *= det;
|
|
|
|
// <span style="color: #b22222;">Compute B^{-1} with explicit formula for 3 x 3 inversion</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">Binv</span>[9], <span style="color: #a0522d;">idet</span> = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// <span style="color: #b22222;">tmp = B^{-1}D : 3 x D21_P</span>
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">tmp</span>[3 * D21_P];
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r1dim</span> = &(Slater_inv[row1 * D21_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r2dim</span> = &(Slater_inv[row2 * D21_P]);
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">r3dim</span> = &(Slater_inv[row3 * D21_P]);
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D21_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D21_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// <span style="color: #b22222;">Compute (S^T)^{-1} - C * tmp : 21 x D21_P</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">i</span> = 0; i < 21; i++) {
|
|
<span style="color: #228b22;">IVDEP</span>
|
|
<span style="color: #0000ff;">ALIGNED</span>
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">j</span> = 0; j < D21_P; j++) {
|
|
Slater_inv[i * D21_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D21_P + j] -= C[i * 3 + 1] * tmp[D21_P + j];
|
|
Slater_inv[i * D21_P + j] -= C[i * 3 + 2] * tmp[2 * D21_P + j];
|
|
}
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_woodbury_3x3</span>(<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_woodbury_3x3"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // <span style="color: #b22222;">Most cases</span>
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_2(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 3:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_3(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 4:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_4(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 5:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_5(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 6:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_6(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 7:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_7(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 8:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_8(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 9:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_9(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 10:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_10(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 11:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_11(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 12:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_12(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 13:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_13(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 14:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_14(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 15:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_15(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 16:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_16(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 17:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_17(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 18:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_18(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 19:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_19(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 20:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_20(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #a020f0;">case</span> 21:
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_21(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
}
|
|
<span style="color: #a020f0;">else</span> { // <span style="color: #b22222;">Updating smaller sub-matrix</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_woodbury_3x3_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
// <span style="color: #b22222;">return qmckl_woodbury_3x3_hpc(</span>
|
|
// <span style="color: #b22222;">context,</span>
|
|
// <span style="color: #b22222;">LDS,</span>
|
|
// <span style="color: #b22222;">Dim,</span>
|
|
// <span style="color: #b22222;">Updates,</span>
|
|
// <span style="color: #b22222;">Updates_index,</span>
|
|
// <span style="color: #b22222;">breakdown,</span>
|
|
// <span style="color: #b22222;">Slater_inv,</span>
|
|
// <span style="color: #b22222;">determinant);</span>
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgb36c05c" class="outline-4">
|
|
<h4 id="orgb36c05c"><span class="section-number-4">5.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-5-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org508cca3" class="outline-4">
|
|
<h4 id="org508cca3"><span class="section-number-4">5.1.8</span> Performance</h4>
|
|
<div class="outline-text-4" id="text-5-1-8">
|
|
<p>
|
|
This function is most efficient when used in cases where there are only 3 rank-1 updates and
|
|
it is sure they will not result in a singular matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgb80d61f" class="outline-4">
|
|
<h4 id="orgb80d61f"><span class="section-number-4">5.1.9</span> Tests</h4>
|
|
<div class="outline-text-4" id="text-5-1-9">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #0000ff;">assert</span>(Updates3 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Updates_index3 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Slater_inv3_1 != <span style="color: #008b8b;">NULL</span>);
|
|
det = -1.23743195512859e-09;
|
|
rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
|
<span style="color: #0000ff;">assert</span>(fabs(det - 1.602708950725074e-10) < 1e-15);
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">if</span> (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="outline-container-org9d94d73" class="outline-2">
|
|
<h2 id="org9d94d73"><span class="section-number-2">6</span> Sherman-Morrison with Slagel Splitting</h2>
|
|
<div class="outline-text-2" id="text-6">
|
|
</div>
|
|
<div id="outline-container-org36e8ada" class="outline-3">
|
|
<h3 id="org36e8ada"><span class="section-number-3">6.1</span> <code>qmckl_sm_splitting</code></h3>
|
|
<div class="outline-text-3" id="text-6-1">
|
|
</div>
|
|
|
|
<div id="outline-container-orgd3daa84" class="outline-4">
|
|
<h4 id="orgd3daa84"><span class="section-number-4">6.1.1</span> Introduction</h4>
|
|
<div class="outline-text-4" id="text-6-1-1">
|
|
<p>
|
|
This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator \(1+v_j^T S^{-1} u_j\) in
|
|
the Sherman-Morrison formula is deemed to be too close to zero, the update \(u_j\) is split in half:
|
|
\(u_j \rightarrow \frac{1}{2} u_j\). One half is applied immediately –necessarily increasing the value of the
|
|
denominator because of the split– while the other halve is put in a queue that will be applied when all the
|
|
remaining updates have been treated.
|
|
</p>
|
|
|
|
<p>
|
|
The kernel is executed recursively until the queue is eiter empty and all
|
|
updates are applied successfully, or the size of the queue equals the number of initial updates. In the last
|
|
case the Slater-matrix that would have resulted from applying the updates is singular and therefore the
|
|
kernel exits with an exit code.
|
|
</p>
|
|
|
|
<p>
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgeb0d501" class="outline-4">
|
|
<h4 id="orgeb0d501"><span class="section-number-4">6.1.2</span> API</h4>
|
|
<div class="outline-text-4" id="text-6-1-2">
|
|
<table id="orgab0d8c7" border="2" cellspacing="0" cellpadding="6" rules="groups" frame="hsides">
|
|
|
|
|
|
<colgroup>
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
|
|
<col class="org-left" />
|
|
</colgroup>
|
|
<thead>
|
|
<tr>
|
|
<th scope="col" class="org-left">Variable</th>
|
|
<th scope="col" class="org-left">Type</th>
|
|
<th scope="col" class="org-left">In/Out</th>
|
|
<th scope="col" class="org-left">Description</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td class="org-left">context</td>
|
|
<td class="org-left">qmckl<sub>context</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Global state</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">LDS</td>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Leading dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Dim</td>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Dimension of Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">N<sub>updates</sub></td>
|
|
<td class="org-left">uint64<sub>t</sub></td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Number of rank-1 updates to be applied to Slater<sub>inv</sub></td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Updates</td>
|
|
<td class="org-left">double[N<sub>updates</sub>*LDS]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Updates<sub>index</sub></td>
|
|
<td class="org-left">uint64<sub>t</sub>[N<sub>updates</sub>]</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Array containing the rank-1 updates</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">breakdown</td>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">in</td>
|
|
<td class="org-left">Break-down parameter on which to fail or not</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">Slater<sub>inv</sub></td>
|
|
<td class="org-left">double[Dim*LDS]</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Array containing the inverse of a Slater-matrix</td>
|
|
</tr>
|
|
|
|
<tr>
|
|
<td class="org-left">determinant</td>
|
|
<td class="org-left">double</td>
|
|
<td class="org-left">inout</td>
|
|
<td class="org-left">Determinant of the Slater-matrix</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgd4ce03c" class="outline-4">
|
|
<h4 id="orgd4ce03c"><span class="section-number-4">6.1.3</span> Requirements</h4>
|
|
<div class="outline-text-4" id="text-6-1-3">
|
|
<ul class="org-ul">
|
|
<li><code>context</code> is not <code>QMCKL_NULL_CONTEXT</code></li>
|
|
<li><code>LDS >= 2</code></li>
|
|
<li><code>Dim >= 2</code></li>
|
|
<li><code>N_updates >= 1</code></li>
|
|
<li><code>Updates</code> is allocated with \(N_updates \times Dim\) elements</li>
|
|
<li><code>Updates_index</code> is allocated with \(N_updates\) elements</li>
|
|
<li><code>breakdown</code> is a small number such that \(0 < breakdown << 1\)</li>
|
|
<li><code>Slater_inv</code> is allocated with \(Dim \times Dim\) elements</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orge30d097" class="outline-4">
|
|
<h4 id="orge30d097"><span class="section-number-4">6.1.4</span> Pedagogical kernel source (in Fortran)</h4>
|
|
<div class="outline-text-4" id="text-6-1-4">
|
|
<p>
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
</p>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-f90"><span style="color: #228b22;">integer</span><span style="color: #a0522d;"> recursive function qmckl_sm_splitting_doc_f( </span><span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant) <span style="color: #a020f0;">result</span>(info)
|
|
|
|
<span style="color: #a020f0;">use</span> <span style="color: #0000ff;">qmckl</span>
|
|
<span style="color: #a020f0;">implicit</span> <span style="color: #228b22;">none</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> context</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> lds, dim</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> nupdates</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> updates_index(nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> upds(lds * nupdates)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(in) ::<span style="color: #a0522d;"> breakdown</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> s_inv(dim * lds)</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">intent</span>(inout) ::<span style="color: #a0522d;"> determinant</span>
|
|
|
|
<span style="color: #228b22;">integer</span> , <span style="color: #a020f0;">external</span> ::<span style="color: #a0522d;"> qmckl_sm_splitting_core_doc_f</span>
|
|
|
|
<span style="color: #228b22;">integer</span>*8 ::<span style="color: #a0522d;"> Later</span>
|
|
<span style="color: #228b22;">integer</span>*8 , <span style="color: #a020f0;">dimension</span>(nupdates) ::<span style="color: #a0522d;"> Later_index</span>
|
|
<span style="color: #228b22;">real</span>*8 , <span style="color: #a020f0;">dimension</span>(lds * nupdates) ::<span style="color: #a0522d;"> Later_updates</span>
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
<span style="color: #a020f0;">if</span> (context == QMCKL_NULL_CONTEXT) <span style="color: #a020f0;">then</span>
|
|
info = QMCKL_INVALID_CONTEXT
|
|
<span style="color: #a020f0;">return</span>
|
|
<span style="color: #a020f0;">endif</span>
|
|
|
|
Later = 0
|
|
Later_index = 0
|
|
Later_updates = 0
|
|
|
|
info = qmckl_sm_splitting_core_doc_f( <span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
nupdates, <span style="color: #a020f0;">&</span>
|
|
upds, <span style="color: #a020f0;">&</span>
|
|
updates_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
Later_updates, <span style="color: #a020f0;">&</span>
|
|
Later_index, <span style="color: #a020f0;">&</span>
|
|
Later, <span style="color: #a020f0;">&</span>
|
|
determinant)
|
|
|
|
<span style="color: #a020f0;">if</span> (Later > 0) <span style="color: #a020f0;">then</span>
|
|
info = qmckl_sm_splitting_doc_f( <span style="color: #a020f0;">&</span>
|
|
context, <span style="color: #a020f0;">&</span>
|
|
lds, dim, <span style="color: #a020f0;">&</span>
|
|
Later, <span style="color: #a020f0;">&</span>
|
|
Later_updates, <span style="color: #a020f0;">&</span>
|
|
Later_index, <span style="color: #a020f0;">&</span>
|
|
breakdown, <span style="color: #a020f0;">&</span>
|
|
s_inv, <span style="color: #a020f0;">&</span>
|
|
determinant)
|
|
<span style="color: #a020f0;">end if</span>
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
<span style="color: #a020f0;">end function</span> <span style="color: #0000ff;">qmckl_sm_splitting_doc_f</span>
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgebcf1dd" class="outline-5">
|
|
<h5 id="orgebcf1dd"><span class="section-number-5">6.1.4.1</span> C interface to the pedagogical kernel (not directly exposed)</h5>
|
|
<div class="outline-text-5" id="text-6-1-4-1">
|
|
<p>
|
|
The following Fortran function <code>qmckl_sm_splitting_core_doc</code> makes sure
|
|
that the pedagogical kernel <code>qmckl_sm_splitting_core_doc_f</code>, written in
|
|
Fortran, can be called from C using the <code>ISO_C_BINDING</code>. The Fortran function
|
|
<code>qmckl_sm_splitting_core_doc</code> will be exposed in the header file 'qmckl.h'
|
|
for C users and in the module file 'qmckl<sub>f.F90</sub>' for Fortran users.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org7d3b8da" class="outline-4">
|
|
<h4 id="org7d3b8da"><span class="section-number-4">6.1.5</span> C headers (exposed in qmckl.h)</h4>
|
|
<div class="outline-text-4" id="text-6-1-5">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_hpc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_doc</span> (
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span> );
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgb34b665" class="outline-4">
|
|
<h4 id="orgb34b665"><span class="section-number-4">6.1.6</span> C source</h4>
|
|
<div class="outline-text-4" id="text-6-1-6">
|
|
<div class="org-src-container">
|
|
<pre class="src src-python" id="org99b3d4c"><span style="color: #a0522d;">text</span>=<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #8b2252;">case {Dim}: {</span>
|
|
<span style="color: #8b2252;"> rc = qmckl_sm_splitting_core_{Dim}(</span>
|
|
<span style="color: #8b2252;"> context,</span>
|
|
<span style="color: #8b2252;"> N_updates,</span>
|
|
<span style="color: #8b2252;"> Updates,</span>
|
|
<span style="color: #8b2252;"> Updates_index,</span>
|
|
<span style="color: #8b2252;"> breakdown,</span>
|
|
<span style="color: #8b2252;"> Slater_inv,</span>
|
|
<span style="color: #8b2252;"> later_updates,</span>
|
|
<span style="color: #8b2252;"> later_index, &later, determinant);</span>
|
|
<span style="color: #8b2252;"> break;</span>
|
|
<span style="color: #8b2252;">}</span>
|
|
<span style="color: #8b2252;">"""</span>
|
|
<span style="color: #a0522d;">result</span> = []
|
|
<span style="color: #a020f0;">for</span> Dim <span style="color: #a020f0;">in</span> <span style="color: #483d8b;">range</span>(2, 22):
|
|
<span style="color: #a0522d;">Dim</span>=<span style="color: #483d8b;">str</span>(Dim)
|
|
result.append(text.replace(<span style="color: #8b2252;">"{Dim}"</span>,Dim) )
|
|
|
|
<span style="color: #a020f0;">return</span> <span style="color: #8b2252;">'\n'</span>.join(result)
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting_hpc</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting_hpc"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
|
|
<span style="color: #228b22;">double</span> <span style="color: #a020f0;">__attribute__</span>((aligned(8))) <span style="color: #a0522d;">later_updates</span>[LDS * N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later_index</span>[N_updates];
|
|
<span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">later</span> = 0;
|
|
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span>;
|
|
<span style="color: #a020f0;">if</span> (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) {
|
|
<span style="color: #a020f0;">switch</span> (Dim) {
|
|
<span style="color: #a020f0;">case</span> 2: {
|
|
rc = qmckl_sm_splitting_core_2(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 3: {
|
|
rc = qmckl_sm_splitting_core_3(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 4: {
|
|
rc = qmckl_sm_splitting_core_4(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 5: {
|
|
rc = qmckl_sm_splitting_core_5(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 6: {
|
|
rc = qmckl_sm_splitting_core_6(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 7: {
|
|
rc = qmckl_sm_splitting_core_7(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 8: {
|
|
rc = qmckl_sm_splitting_core_8(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 9: {
|
|
rc = qmckl_sm_splitting_core_9(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 10: {
|
|
rc = qmckl_sm_splitting_core_10(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 11: {
|
|
rc = qmckl_sm_splitting_core_11(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 12: {
|
|
rc = qmckl_sm_splitting_core_12(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 13: {
|
|
rc = qmckl_sm_splitting_core_13(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 14: {
|
|
rc = qmckl_sm_splitting_core_14(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 15: {
|
|
rc = qmckl_sm_splitting_core_15(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 16: {
|
|
rc = qmckl_sm_splitting_core_16(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 17: {
|
|
rc = qmckl_sm_splitting_core_17(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 18: {
|
|
rc = qmckl_sm_splitting_core_18(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 19: {
|
|
rc = qmckl_sm_splitting_core_19(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 20: {
|
|
rc = qmckl_sm_splitting_core_20(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
|
|
|
|
<span style="color: #a020f0;">case</span> 21: {
|
|
rc = qmckl_sm_splitting_core_21(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
<span style="color: #a020f0;">default</span>: {
|
|
assert(0 == 1 && <span style="color: #8b2252;">"TEMPLATE NOT IMPLEMENTED!"</span>);
|
|
<span style="color: #a020f0;">break</span>;
|
|
}
|
|
}
|
|
} <span style="color: #a020f0;">else</span> {
|
|
rc = qmckl_sm_splitting_core_hpc(
|
|
context, LDS, Dim, N_updates, Updates, Updates_index,
|
|
breakdown, Slater_inv, later_updates,
|
|
later_index, &later, determinant);
|
|
}
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
|
|
<span style="color: #a020f0;">if</span> (later > 0) {
|
|
<span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #a0522d;">rc</span> = qmckl_sm_splitting_hpc(
|
|
context, LDS, Dim, later,
|
|
later_updates, later_index,
|
|
breakdown, Slater_inv, determinant);
|
|
<span style="color: #a020f0;">if</span> (rc != QMCKL_SUCCESS) <span style="color: #a020f0;">return</span> QMCKL_FAILURE;
|
|
}
|
|
|
|
<span style="color: #a020f0;">return</span> QMCKL_SUCCESS;
|
|
}
|
|
</pre>
|
|
</div>
|
|
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #228b22;">qmckl_exit_code</span> <span style="color: #0000ff;">qmckl_sm_splitting</span>(
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">qmckl_context</span> <span style="color: #a0522d;">context</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">LDS</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">Dim</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span> <span style="color: #a0522d;">N_updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Updates</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">uint64_t</span>* <span style="color: #a0522d;">Updates_index</span>,
|
|
<span style="color: #a020f0;">const</span> <span style="color: #228b22;">double</span> <span style="color: #a0522d;">breakdown</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">Slater_inv</span>,
|
|
<span style="color: #228b22;">double</span>* <span style="color: #a0522d;">determinant</span>) {
|
|
|
|
<span style="color: #a020f0;">if</span> (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
<span style="color: #a020f0;">return</span> qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
<span style="color: #8b2252;">"qmckl_sm_splitting"</span>,
|
|
<span style="color: #008b8b;">NULL</span>);
|
|
}
|
|
<span style="color: #483d8b;"> #ifdef</span> HAVE_HPC
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #else</span>
|
|
<span style="color: #a020f0;">return</span> qmckl_sm_splitting_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
<span style="color: #483d8b;"> #endif</span>
|
|
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org16eff54" class="outline-4">
|
|
<h4 id="org16eff54"><span class="section-number-4">6.1.7</span> Fortran interfaces (exposed in qmckl<sub>f.F90</sub>)</h4>
|
|
<div class="outline-text-4" id="text-6-1-7">
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-org2014d20" class="outline-4">
|
|
<h4 id="org2014d20"><span class="section-number-4">6.1.8</span> Performance…</h4>
|
|
<div class="outline-text-4" id="text-6-1-8">
|
|
<p>
|
|
This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div id="outline-container-orgd086d3f" class="outline-4">
|
|
<h4 id="orgd086d3f"><span class="section-number-4">6.1.9</span> Test</h4>
|
|
<div class="outline-text-4" id="text-6-1-9">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #0000ff;">assert</span>(Updates3 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Updates_index3 != <span style="color: #008b8b;">NULL</span>);
|
|
<span style="color: #0000ff;">assert</span>(Slater_inv3_2 != <span style="color: #008b8b;">NULL</span>);
|
|
det = -1.23743195512859e-09;
|
|
rc = qmckl_sm_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
|
|
<span style="color: #0000ff;">assert</span>(fabs(det - 1.602708950725074e-10) < 1e-15);
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">k</span> = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">i</span> = 0; i < Dim; i++) {
|
|
<span style="color: #a020f0;">for</span> (<span style="color: #228b22;">unsigned</span> <span style="color: #228b22;">int</span> <span style="color: #a0522d;">j</span> = 0; j < Dim; j++) {
|
|
<span style="color: #a020f0;">if</span> (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
<span style="color: #a020f0;">if</span> (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
<span style="color: #0000ff;">assert</span>(rc == QMCKL_SUCCESS);
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<div id="outline-container-orgbd320f6" class="outline-2">
|
|
<h2 id="orgbd320f6"><span class="section-number-2">7</span> End of files</h2>
|
|
<div class="outline-text-2" id="text-7">
|
|
<div class="org-src-container">
|
|
<pre class="src src-c"><span style="color: #0000ff;">assert</span> (<span style="color: #228b22;">qmckl_context_destroy</span>(<span style="color: #a0522d;">context</span>) == QMCKL_SUCCESS);
|
|
<span style="color: #a020f0;">return</span> 0;
|
|
|
|
}
|
|
</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="postamble" class="status">
|
|
<p class="author">Author: TREX CoE</p>
|
|
<p class="date">Created: 2023-09-14 Thu 09:02</p>
|
|
<p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
|
|
</div>
|
|
</body>
|
|
</html>
|