2021-07-19 12:01:07 +02:00
|
|
|
#+TITLE: Sherman-Morrison-Woodbury
|
|
|
|
#+SETUPFILE: ../tools/theme.setup
|
|
|
|
#+INCLUDE: ../tools/lib.org
|
2021-09-14 09:55:55 +02:00
|
|
|
#+STARTUP: content
|
2021-07-19 12:01:07 +02:00
|
|
|
|
2021-09-07 12:22:39 +02:00
|
|
|
Low- and high-level functions that use the Sherman-Morrison and
|
2021-07-21 17:56:04 +02:00
|
|
|
Woodbury matrix inversion formulas to update the inverse of a
|
2021-09-07 09:27:22 +02:00
|
|
|
non-singular matrix
|
2021-09-07 11:22:54 +02:00
|
|
|
|
2021-07-19 12:01:07 +02:00
|
|
|
* Headers
|
2021-07-21 17:56:04 +02:00
|
|
|
#+begin_src elisp :noexport :results none :exports none
|
2023-02-02 17:04:34 +01:00
|
|
|
(org-babel-lob-ingest "../tools/lib.org")
|
|
|
|
#+end_src
|
2021-07-19 12:01:07 +02:00
|
|
|
|
|
|
|
#+begin_src c :comments link :tangle (eval c_test) :noweb yes
|
2023-02-02 17:04:34 +01:00
|
|
|
#include "qmckl.h"
|
|
|
|
#include "assert.h"
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
#include <math.h>
|
|
|
|
|
|
|
|
int main() {
|
|
|
|
qmckl_context context;
|
|
|
|
context = qmckl_context_create();
|
|
|
|
qmckl_exit_code rc;
|
2021-07-19 12:01:07 +02:00
|
|
|
#+end_src
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2023-01-27 15:24:52 +01:00
|
|
|
#+NAME:kernel_generator_range
|
|
|
|
#+begin_src python :noweb yes :exports none
|
2023-02-02 17:04:34 +01:00
|
|
|
range(2, 22)
|
2023-01-27 15:24:52 +01:00
|
|
|
#+end_src
|
|
|
|
|
2021-07-21 17:30:12 +02:00
|
|
|
* Naïve Sherman-Morrison
|
2023-01-27 14:29:35 +01:00
|
|
|
** ~qmckl_sherman_morrison_naive~
|
2023-02-02 17:04:34 +01:00
|
|
|
:PROPERTIES:
|
|
|
|
:Name: qmckl_sherman_morrison_naive
|
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
|
|
|
|
|
|
|
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
|
|
|
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
|
|
|
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
|
|
|
|
|
|
|
The formula for any update $u_j$ (index $j$ is suppresed for clarity) that is applied is
|
|
|
|
\[
|
2021-09-01 15:29:14 +02:00
|
|
|
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
|
2023-02-02 17:04:34 +01:00
|
|
|
\]
|
|
|
|
|
|
|
|
where
|
|
|
|
$S$ is the Slater-matrix,
|
|
|
|
$u$ and $v^T$ are the column and row vectors containing the updates,
|
|
|
|
$S^{-1}$ is the inverse of the Slater-matrix.
|
|
|
|
|
|
|
|
Even though the Slater-matrix $S$ with all updates applied at once is invertable, during the course of applying
|
|
|
|
updates to the inverse Slater-matrix $S^{-1}$ one-by-one it can happen that one of the intermediate inverse
|
|
|
|
matrices $S^{-1}$ becomes singular. Therefore a global threshold value $\epsilon$ is defined that is used to
|
|
|
|
evaluate each individual update $u_j$ when it is applied.
|
|
|
|
|
|
|
|
This value sets the lower bound for which the
|
|
|
|
denominator $1+v_j^TS^{-1}u_j$ is considered to be too small and will most probably result in a singular matrix
|
|
|
|
$S$, or at least in an inverse of $S$ of very poor numerical quality. Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$,
|
|
|
|
the update is applied as usual and the kernel exits with return code \texttt{QMCKL_SUCCESS}.
|
|
|
|
If $1+v_j^TS^{-1}u_j \leq \epsilon$ the update is rejected and the kernel exits with return code \texttt{QMCKL_FAILURE}.
|
|
|
|
|
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
|
|
|
|
|
|
|
#+NAME: qmckl_sherman_morrison_naive_args
|
|
|
|
| qmckl_context | context | in | Global state |
|
|
|
|
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
|
|
|
| uint64_t | Dim | in | Dimension of Slater_inv |
|
|
|
|
| uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
|
|
| double | Updates[N_updates*Dim] | in | Array containing the updates |
|
|
|
|
| uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates |
|
|
|
|
| double | breakdown | in | Break-down parameter on which to fail or not |
|
|
|
|
| double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
|
|
|
| double* | determinant | inout | Determinant of the Slater-matrix |
|
2021-09-07 11:22:54 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
*** Requirements
|
2021-10-13 10:31:24 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
|
|
|
* ~LDS >= 2~
|
|
|
|
* ~Dim >= 2~
|
|
|
|
* ~N_updates >= 1~
|
|
|
|
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
|
|
* ~Updates_index~ is allocated with $N_updates$ elements
|
|
|
|
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
|
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
2021-07-19 12:01:07 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
*** Fortran source
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
#+begin_src f90 :tangle (eval f)
|
|
|
|
integer function qmckl_sherman_morrison_naive_doc_f &
|
|
|
|
(context, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
|
|
bind(C)
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
implicit none
|
2021-07-19 14:00:10 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
|
|
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-07-19 14:00:10 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
write(*,*) "Function 'qmckl_sherman_morrison_naive_doc_f' does nothing for now..."
|
2021-07-19 18:25:10 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
end function qmckl_sherman_morrison_naive_doc_f
|
|
|
|
#+end_src
|
2023-01-18 18:45:59 +01:00
|
|
|
|
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
*** C header
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
|
|
qmckl_exit_code qmckl_sherman_morrison_naive(
|
|
|
|
const qmckl_context context,
|
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant);
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
|
|
qmckl_exit_code qmckl_sherman_morrison_naive_doc(
|
|
|
|
const qmckl_context context,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant);
|
|
|
|
#+end_src
|
2021-07-19 18:25:10 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
*** C source
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
#+begin_src c :tangle (eval c) :comments org
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <math.h>
|
|
|
|
#include "qmckl.h"
|
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
// Order important because
|
|
|
|
// __GNUC__ also set in ICC, ICX and CLANG
|
|
|
|
// __clang__ also set in ICX
|
|
|
|
#if defined(__INTEL_COMPILER)
|
|
|
|
#define IVDEP _Pragma("ivdep")
|
|
|
|
#define ALIGNED _Pragma("vector aligned")
|
|
|
|
#elif defined(__INTEL_LLVM_COMPILER)
|
|
|
|
#define IVDEP _Pragma("ivdep")
|
|
|
|
#define ALIGNED _Pragma("vector aligned")
|
|
|
|
#elif defined(__clang__)
|
|
|
|
#define IVDEP _Pragma("clang loop vectorize(enable)")
|
|
|
|
#define ALIGNED
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
#define IVDEP _Pragma("GCC ivdep")
|
|
|
|
#define ALIGNED
|
|
|
|
#endif
|
|
|
|
|
|
|
|
qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
|
|
|
|
const qmckl_context context,
|
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict determinant) {
|
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
|
|
return qmckl_failwith( context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_sherman_morrison_naive_hpc",
|
|
|
|
NULL);
|
|
|
|
}
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
double __attribute__((aligned(8))) C[Dim];
|
|
|
|
double __attribute__((aligned(8))) D[LDS];
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
uint64_t l = 0;
|
|
|
|
// For each update
|
|
|
|
while (l < N_updates) {
|
|
|
|
// C = S^{-1} x u_l
|
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-18 18:45:59 +01:00
|
|
|
C[i] = 0.0f;
|
2023-01-19 15:25:12 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
2023-01-25 18:46:56 +01:00
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
2023-02-02 17:04:34 +01:00
|
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
2023-01-18 18:45:59 +01:00
|
|
|
}
|
2023-02-02 17:04:34 +01:00
|
|
|
}
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// Denominator: v_l^T * C
|
|
|
|
const int cui = Updates_index[l] - 1;
|
|
|
|
double den = 1.0f + C[cui];
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
if (fabs(den) < breakdown)
|
2023-01-18 18:45:59 +01:00
|
|
|
return QMCKL_FAILURE;
|
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
double iden = 1.0f / den;
|
|
|
|
|
|
|
|
// Update det(A)
|
|
|
|
if (determinant)
|
2023-01-18 18:45:59 +01:00
|
|
|
*determinant *= den;
|
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// selecting column: v_l^T * S_inv
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
2023-01-18 18:45:59 +01:00
|
|
|
D[j] = Slater_inv[cui * LDS + j];
|
2023-02-02 17:04:34 +01:00
|
|
|
}
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// A^{-1} = A^{-1} - C x D / den
|
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-19 15:25:12 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
2023-01-25 18:46:56 +01:00
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
2023-02-02 17:04:34 +01:00
|
|
|
const double update = C[i] * D[j] * iden;
|
|
|
|
Slater_inv[i * LDS + j] -= update;
|
2023-01-18 18:45:59 +01:00
|
|
|
}
|
2023-02-02 17:04:34 +01:00
|
|
|
}
|
|
|
|
l += 1;
|
2023-01-18 18:45:59 +01:00
|
|
|
}
|
2023-02-02 17:04:34 +01:00
|
|
|
return QMCKL_SUCCESS;
|
2022-12-15 16:14:57 +01:00
|
|
|
}
|
2023-02-02 17:04:34 +01:00
|
|
|
#+end_src
|
|
|
|
|
|
|
|
#+NAME:naive_template_code
|
|
|
|
#+begin_src c
|
|
|
|
static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}(
|
|
|
|
const qmckl_context context,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict determinant) {
|
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_sherman_morrison_naive_{Dim}",
|
|
|
|
NULL);
|
|
|
|
}
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
#define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
double __attribute__((aligned(8))) C[{Dim}];
|
|
|
|
double __attribute__((aligned(8))) D[D{Dim}_P];
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
uint64_t l = 0;
|
|
|
|
// For each update
|
|
|
|
while (l < N_updates) {
|
|
|
|
// C = A^{-1} x U_l
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
C[i] = 0;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
|
|
}
|
|
|
|
}
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// Denominator
|
|
|
|
const int cui = Updates_index[l] - 1;
|
|
|
|
double den = 1.0f + C[cui];
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
if (fabs(den) < breakdown) {
|
|
|
|
return QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
double iden = 1.0f / den;
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// Update det(A)
|
|
|
|
if (determinant)
|
|
|
|
*determinant *= den;
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// selecting column: D = v_l^T * S_inv
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
|
|
}
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
// A^{-1} = A^{-1} - C x D / den
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
double update = C[i] * D[j] * iden;
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
|
|
}
|
|
|
|
}
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
l += 1;
|
|
|
|
}
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
#+end_src
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-01-27 15:24:52 +01:00
|
|
|
#+NAME:naive_kernel_generator
|
2022-12-15 16:14:57 +01:00
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
2023-01-27 14:29:35 +01:00
|
|
|
<<naive_template_code>>
|
2022-12-15 16:14:57 +01:00
|
|
|
"""
|
|
|
|
result = []
|
2023-01-27 15:24:52 +01:00
|
|
|
for Dim in <<kernel_generator_range>>:
|
2022-12-15 16:14:57 +01:00
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
2023-01-27 15:24:52 +01:00
|
|
|
#+NAME:naive_switch-case_generator
|
2022-12-15 16:14:57 +01:00
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
|
|
|
case {Dim}:
|
2023-01-27 14:29:35 +01:00
|
|
|
return qmckl_sherman_morrison_naive_{Dim}(context,
|
2023-01-18 18:45:59 +01:00
|
|
|
N_updates,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
determinant);
|
2022-12-15 16:14:57 +01:00
|
|
|
"""
|
|
|
|
result = []
|
2023-01-27 15:24:52 +01:00
|
|
|
for Dim in <<kernel_generator_range>>:
|
2022-12-15 16:14:57 +01:00
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
2023-01-27 15:24:52 +01:00
|
|
|
<<naive_kernel_generator()>>
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-08 18:03:11 +02:00
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
2021-07-19 18:25:10 +02:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-08 18:03:11 +02:00
|
|
|
const double breakdown,
|
2021-10-13 10:31:24 +02:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant) {
|
2021-10-06 23:44:06 +02:00
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
2023-01-18 18:45:59 +01:00
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
2023-01-27 14:29:35 +01:00
|
|
|
"qmckl_sherman_morrison_naive",
|
2023-01-18 18:45:59 +01:00
|
|
|
NULL);
|
2021-10-06 23:44:06 +02:00
|
|
|
}
|
|
|
|
|
2023-02-02 17:23:14 +01:00
|
|
|
// #ifdef HAVE_HPC
|
2023-01-18 18:45:59 +01:00
|
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
2022-12-15 16:14:57 +01:00
|
|
|
switch (Dim) {
|
2023-01-27 15:24:52 +01:00
|
|
|
<<naive_switch-case_generator()>>
|
2021-07-19 17:38:53 +02:00
|
|
|
}
|
|
|
|
}
|
2023-01-19 15:25:12 +01:00
|
|
|
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
2023-01-27 14:29:35 +01:00
|
|
|
return qmckl_sherman_morrison_naive_hpc(context,
|
2023-01-18 18:45:59 +01:00
|
|
|
LDS,
|
|
|
|
Dim,
|
|
|
|
N_updates,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
determinant);
|
2021-07-29 11:48:38 +02:00
|
|
|
|
2022-12-16 12:04:42 +01:00
|
|
|
}
|
2023-02-02 17:23:14 +01:00
|
|
|
// #else
|
|
|
|
// return qmckl_sherman_morrison_naive_doc(context,
|
|
|
|
// Dim,
|
|
|
|
// N_updates,
|
|
|
|
// Updates,
|
|
|
|
// Updates_index,
|
|
|
|
// breakdown,
|
|
|
|
// Slater_inv,
|
|
|
|
// determinant);
|
|
|
|
// #endif
|
2023-01-18 18:45:59 +01:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
return QMCKL_FAILURE;
|
2021-07-19 17:38:53 +02:00
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
*** Test :noexport:
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
The tests for the kernels are executed on datasets that are extracted from a run of
|
|
|
|
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
|
|
|
The tests are run such that the kernels reject the computed inverse whenever the computed
|
|
|
|
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
|
|
|
|
QMC=Chem. The tests will return QMCKL_SUCCESS whenever all the elements of the final matrix
|
|
|
|
$R=S.S^-1 - 1$ are smaller than the given tolerance value of 1e-3, and will return
|
|
|
|
QMCKL_FAILURE if the values are larger than this tolerance value.
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c_test)
|
|
|
|
const uint64_t Dim = 21;
|
|
|
|
const uint64_t LDS = (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH;
|
|
|
|
const double breakdown = 1e-3;
|
|
|
|
const double tolerance = 1e-3;
|
|
|
|
double res[441];
|
|
|
|
|
|
|
|
#include "sm_test.h"
|
|
|
|
|
|
|
|
assert(Updates1 != NULL);
|
|
|
|
assert(Updates_index1 != NULL);
|
|
|
|
assert(Slater_inv1 != NULL);
|
|
|
|
|
|
|
|
// original determinant of Slater1 (before applying updates)
|
|
|
|
double det = 3.407025646103221e-10;
|
|
|
|
rc = qmckl_sherman_morrison_naive(context,
|
|
|
|
LDS,
|
|
|
|
Dim,
|
|
|
|
N_updates1,
|
|
|
|
Updates1,
|
|
|
|
Updates_index1,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv1,
|
|
|
|
&det);
|
|
|
|
|
|
|
|
// Check that the determinant is updated properly
|
|
|
|
assert(fabs(det + 4.120398385068217e-10) < 1e-15);
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
res[i * Dim + j] = 0;
|
|
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
|
|
|
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rc = QMCKL_SUCCESS;
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(rc == QMCKL_SUCCESS);
|
|
|
|
#+end_src
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
** Performance
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
|
|
|
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
|
|
|
can never resolve a situation where applying the update causes singular behaviour.
|
2022-12-15 16:14:57 +01:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
** C interface
|
|
|
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
|
|
|
interface
|
|
|
|
integer(c_int32_t) function qmckl_sherman_morrison_naive_doc &
|
|
|
|
(context, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
|
|
bind(C)
|
|
|
|
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
|
|
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
|
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
|
|
|
|
integer(c_int32_t), external :: qmckl_sherman_morrison_naive_doc_f
|
|
|
|
info = qmckl_sherman_morrison_naive_doc_f &
|
|
|
|
(context, Dim, N_updates, Updates, &
|
|
|
|
Updates_index, breakdown, Slater_inv, determinant)
|
2021-07-19 12:01:07 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
end function qmckl_sherman_morrison_naive_doc
|
|
|
|
end interface
|
|
|
|
#+end_src
|
2021-09-01 15:29:14 +02:00
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
** Fortran interface :noexport:
|
2021-09-14 12:13:17 +02:00
|
|
|
:PROPERTIES:
|
2023-01-27 14:29:35 +01:00
|
|
|
:Name: qmckl_sherman_morrison_naive
|
2021-09-14 12:13:17 +02:00
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
2021-07-19 14:04:47 +02:00
|
|
|
|
2021-09-15 10:52:00 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
|
|
interface
|
2023-01-27 14:29:35 +01:00
|
|
|
integer(c_int32_t) function qmckl_sherman_morrison_naive &
|
2021-10-28 16:06:17 +02:00
|
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
2021-09-15 10:52:00 +02:00
|
|
|
bind(C)
|
|
|
|
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
|
|
|
|
2021-09-18 18:07:05 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
2021-10-28 16:06:17 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
2021-10-08 18:03:11 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
2021-09-18 18:07:05 +02:00
|
|
|
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
2021-10-08 18:03:11 +02:00
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
2021-10-28 16:06:17 +02:00
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
2021-10-13 10:31:24 +02:00
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-09-15 10:52:00 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
end function qmckl_sherman_morrison_naive
|
2021-09-15 10:52:00 +02:00
|
|
|
end interface
|
|
|
|
#+end_src
|
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
|
|
interface
|
|
|
|
integer(c_int32_t) function qmckl_sherman_morrison_naive_doc &
|
|
|
|
(context, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
|
|
bind(C)
|
2021-10-13 10:31:24 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
2021-10-13 10:31:24 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
|
|
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-10-13 10:31:24 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
end function qmckl_sherman_morrison_naive_doc
|
|
|
|
end interface
|
|
|
|
#+end_src
|
2021-07-20 19:34:51 +02:00
|
|
|
|
2021-07-21 17:30:12 +02:00
|
|
|
* Woodbury 2x2
|
2023-02-02 17:04:34 +01:00
|
|
|
** ~qmckl_woodbury_2x2~
|
2021-07-21 17:30:12 +02:00
|
|
|
:PROPERTIES:
|
2023-01-27 14:29:35 +01:00
|
|
|
:Name: qmckl_woodbury_2x2
|
2021-07-21 17:30:12 +02:00
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
|
|
|
|
2021-09-07 11:22:54 +02:00
|
|
|
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
|
|
|
this algorithm is called the Woodbury Matrix Identity
|
2021-09-01 16:06:51 +02:00
|
|
|
\[
|
|
|
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
|
|
|
\]
|
|
|
|
where
|
|
|
|
$S$ is the Slater-matrix
|
|
|
|
$U$ and $V$ are the matrices containing the updates and the canonical basis matrix
|
|
|
|
$S^{-1}$ is the inverse of the Slater-matrix
|
|
|
|
$C:= S^{-1}U$, a Dim $\times 2$ matrix
|
|
|
|
$B := 1 + VC$, the $2 \times 2$ matrix that is going to be inverted
|
|
|
|
$D := VS^{-1}$, a $2 \times Dim$ matrix
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-10-13 10:45:54 +02:00
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
|
|
|
|
|
|
|
|
2021-07-21 17:30:12 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+NAME: qmckl_woodbury_2x2_args
|
2021-07-21 17:42:48 +02:00
|
|
|
| qmckl_context | context | in | Global state |
|
2021-10-28 16:06:17 +02:00
|
|
|
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
|
|
|
| uint64_t | Dim | in | Dimension of Slater_inv |
|
2021-07-21 17:42:48 +02:00
|
|
|
| double | Updates[2*Dim] | in | Array containing the updates |
|
|
|
|
| uint64_t | Updates_index[2] | in | Array containing the rank-1 updates |
|
2021-07-26 17:41:21 +02:00
|
|
|
| double | breakdown | in | Break-down parameter on which to fail or not |
|
2021-10-28 16:06:17 +02:00
|
|
|
| double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
2021-10-13 10:45:54 +02:00
|
|
|
| double* | determinant | inout | Determinant of Slater-matrix |
|
2021-07-21 17:30:12 +02:00
|
|
|
|
|
|
|
*** Requirements
|
|
|
|
|
2021-09-01 16:06:51 +02:00
|
|
|
- ~context~ is not ~qmckl_null_context~
|
2021-10-28 16:06:17 +02:00
|
|
|
- ~LDS >= 2~
|
2021-09-07 11:22:54 +02:00
|
|
|
- ~Dim >= 2~
|
|
|
|
- ~Updates~ is allocated with $2 \times Dim$ elements
|
|
|
|
- ~Updates_index~ is allocated with $2$ elements
|
2021-09-01 16:06:51 +02:00
|
|
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
2021-09-07 11:22:54 +02:00
|
|
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
2021-07-21 17:30:12 +02:00
|
|
|
|
|
|
|
*** C header
|
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
2021-07-21 17:30:12 +02:00
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_exit_code qmckl_woodbury_2x2(
|
2021-07-21 17:30:12 +02:00
|
|
|
const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
2023-01-26 11:50:58 +01:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-26 11:50:58 +01:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant);
|
2021-07-21 17:30:12 +02:00
|
|
|
#+end_src
|
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
*** C source
|
2021-07-21 17:30:12 +02:00
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context,
|
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict determinant) {
|
2021-07-21 17:30:12 +02:00
|
|
|
/*
|
|
|
|
C := S^{-1} * U, dim x 2
|
|
|
|
B := 1 + V * C, 2 x 2
|
|
|
|
D := V * S^{-1}, 2 x dim
|
2021-10-28 16:06:17 +02:00
|
|
|
*/
|
2021-09-16 13:44:12 +02:00
|
|
|
|
2021-10-06 23:44:06 +02:00
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
2023-01-25 18:46:56 +01:00
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
2023-01-27 14:29:35 +01:00
|
|
|
"qmckl_woodbury_2x2_hpc",
|
2023-01-25 18:46:56 +01:00
|
|
|
NULL);
|
2021-10-06 23:44:06 +02:00
|
|
|
}
|
|
|
|
|
2021-07-22 09:59:02 +02:00
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
2021-07-21 17:30:12 +02:00
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute C = (S^T)^{-1}U : Dim x 2
|
|
|
|
double __attribute__((aligned(8))) C[2 * Dim];
|
2021-07-22 09:59:02 +02:00
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
C[i * 2] = 0;
|
|
|
|
C[i * 2 + 1] = 0;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t k = 0; k < LDS; k++) {
|
|
|
|
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
|
|
|
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
2021-07-21 17:30:12 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute B = 1 + VC : 2 x 2
|
2021-07-21 17:30:12 +02:00
|
|
|
const double B0 = C[row1 * 2] + 1;
|
|
|
|
const double B1 = C[row1 * 2 + 1];
|
|
|
|
const double B2 = C[row2 * 2];
|
|
|
|
const double B3 = C[row2 * 2 + 1] + 1;
|
|
|
|
|
|
|
|
// Check if determinant of inverted matrix is not zero
|
|
|
|
double det = B0 * B3 - B1 * B2;
|
2021-07-26 17:41:21 +02:00
|
|
|
if (fabs(det) < breakdown) {
|
2023-01-25 18:46:56 +01:00
|
|
|
return QMCKL_FAILURE;
|
2021-07-21 17:30:12 +02:00
|
|
|
}
|
|
|
|
|
2021-10-13 10:45:54 +02:00
|
|
|
// Update det(S) when passed
|
2023-01-25 18:46:56 +01:00
|
|
|
if (determinant)
|
|
|
|
*determinant *= det;
|
2021-10-13 10:45:54 +02:00
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
|
|
|
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
2021-07-21 17:30:12 +02:00
|
|
|
Binv[0] = idet * B3;
|
|
|
|
Binv[1] = -1.0 * idet * B1;
|
|
|
|
Binv[2] = -1.0 * idet * B2;
|
|
|
|
Binv[3] = idet * B0;
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// tmp = B^{-1}D : 2 x LDS
|
|
|
|
double __attribute__((aligned(8))) tmp[2 * LDS];
|
2023-01-26 11:50:58 +01:00
|
|
|
double* r1dim = &(Slater_inv[row1 * LDS]);
|
|
|
|
double* r2dim = &(Slater_inv[row2 * LDS]);
|
2023-01-25 18:46:56 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
|
|
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
2021-07-21 17:30:12 +02:00
|
|
|
}
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
2021-07-22 09:59:02 +02:00
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
|
|
|
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
|
|
|
}
|
2021-07-21 17:30:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-01-27 15:24:52 +01:00
|
|
|
#+NAME:woodbury_2x2_kernel_template
|
2023-01-27 14:29:35 +01:00
|
|
|
#+begin_src c
|
|
|
|
static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}(
|
|
|
|
const qmckl_context context,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict determinant) {
|
|
|
|
/*
|
|
|
|
C := S^{-1} * U, dim x 2
|
|
|
|
B := 1 + V * C, 2 x 2
|
|
|
|
D := V * S^{-1}, 2 x dim
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_woodbury_2x2_{Dim}",
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
|
|
|
|
|
|
// Compute C = (S^T)^{-1}U : {Dim} x 2
|
|
|
|
double __attribute__((aligned(8))) C[2 * {Dim}];
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
C[i * 2] = 0;
|
|
|
|
C[i * 2 + 1] = 0;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
|
|
|
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
|
|
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute B = 1 + VC : 2 x 2
|
|
|
|
const double B0 = C[row1 * 2] + 1;
|
|
|
|
const double B1 = C[row1 * 2 + 1];
|
|
|
|
const double B2 = C[row2 * 2];
|
|
|
|
const double B3 = C[row2 * 2 + 1] + 1;
|
|
|
|
|
|
|
|
// Check if determinant of inverted matrix is not zero
|
|
|
|
double det = B0 * B3 - B1 * B2;
|
|
|
|
if (fabs(det) < breakdown) {
|
|
|
|
return QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update det(S) when passed
|
|
|
|
if (determinant)
|
|
|
|
*determinant *= det;
|
|
|
|
|
|
|
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
|
|
|
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
|
|
|
Binv[0] = idet * B3;
|
|
|
|
Binv[1] = -1.0 * idet * B1;
|
|
|
|
Binv[2] = -1.0 * idet * B2;
|
|
|
|
Binv[3] = idet * B0;
|
|
|
|
|
|
|
|
// tmp = B^{-1}D : 2 x D{Dim}_P
|
|
|
|
double __attribute__((aligned(8))) tmp[2 * D{Dim}_P];
|
|
|
|
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
|
|
|
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
|
|
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-01-27 15:24:52 +01:00
|
|
|
#+NAME:woodbury_2x2_kernel_generator
|
2023-01-27 14:29:35 +01:00
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
2023-01-27 15:24:52 +01:00
|
|
|
<<woodbury_2x2_kernel_template>>
|
2023-01-27 14:29:35 +01:00
|
|
|
"""
|
|
|
|
result = []
|
2023-01-27 15:24:52 +01:00
|
|
|
for Dim in <<kernel_generator_range>>:
|
2023-01-27 14:29:35 +01:00
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-01-27 15:24:52 +01:00
|
|
|
#+NAME:woodbury_2x2_switch-case_generator
|
2023-01-27 14:29:35 +01:00
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
|
|
|
case {Dim}:
|
|
|
|
return qmckl_woodbury_2x2_{Dim}(context,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
determinant);
|
|
|
|
"""
|
|
|
|
result = []
|
2023-01-27 15:24:52 +01:00
|
|
|
for Dim in <<kernel_generator_range>>:
|
2023-01-27 14:29:35 +01:00
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
2023-01-27 15:24:52 +01:00
|
|
|
<<woodbury_2x2_kernel_generator()>>
|
2023-01-27 14:29:35 +01:00
|
|
|
|
|
|
|
qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context,
|
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant) {
|
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_woodbury_2x2",
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
|
|
switch (Dim) {
|
2023-01-27 15:24:52 +01:00
|
|
|
<<woodbury_2x2_switch-case_generator()>>
|
2023-01-27 14:29:35 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
return qmckl_woodbury_2x2_hpc(context,
|
|
|
|
LDS,
|
|
|
|
Dim,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
determinant);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-07-21 17:30:12 +02:00
|
|
|
*** Performance
|
|
|
|
|
2021-09-07 12:22:39 +02:00
|
|
|
This function is most efficient when used in cases where there are only 2 rank-1 updates and
|
|
|
|
it is sure they will not result in a singular matrix.
|
2021-09-10 17:25:56 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
** Fortran interface :noexport:
|
2021-09-14 12:13:17 +02:00
|
|
|
:PROPERTIES:
|
2023-01-27 14:29:35 +01:00
|
|
|
:Name: qmckl_woodbury_2x2
|
2021-09-14 12:13:17 +02:00
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
2021-07-21 17:30:12 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
2021-07-21 17:30:12 +02:00
|
|
|
|
2021-09-16 13:44:12 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
|
|
interface
|
2023-01-27 14:29:35 +01:00
|
|
|
integer(c_int32_t) function qmckl_woodbury_2x2 &
|
2021-10-28 16:06:17 +02:00
|
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
2021-09-16 13:44:12 +02:00
|
|
|
bind(C)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
2021-10-28 16:06:17 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
2021-10-09 22:23:12 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
2021-09-16 13:44:12 +02:00
|
|
|
real (c_double ) , intent(in) :: Updates(2*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(2)
|
2021-10-09 22:23:12 +02:00
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
2021-10-28 16:06:17 +02:00
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
2021-10-13 10:45:54 +02:00
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-09-16 13:44:12 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
end function qmckl_woodbury_2x2
|
2021-09-16 13:44:12 +02:00
|
|
|
end interface
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-21 17:30:12 +02:00
|
|
|
*** Test :noexport:
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c_test)
|
2021-09-18 18:07:05 +02:00
|
|
|
assert(Updates2 != NULL);
|
|
|
|
assert(Updates_index2 != NULL);
|
|
|
|
assert(Slater_inv2 != NULL);
|
2021-10-13 10:45:54 +02:00
|
|
|
det = -1.4432116661319376e-11;
|
2023-01-27 14:29:35 +01:00
|
|
|
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
2021-10-13 10:45:54 +02:00
|
|
|
assert(fabs(det-2.367058141251457e-10) < 1e-15);
|
2021-09-05 11:22:41 +02:00
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
res[i * Dim + j] = 0;
|
|
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
|
2021-09-05 11:22:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rc = QMCKL_SUCCESS;
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-21 17:30:12 +02:00
|
|
|
assert(rc == QMCKL_SUCCESS);
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-22 11:38:50 +02:00
|
|
|
* Woodbury 3x3
|
2023-01-27 14:29:35 +01:00
|
|
|
** ~qmckl_woodbury_3x3~
|
2021-07-22 11:38:50 +02:00
|
|
|
:PROPERTIES:
|
2023-01-27 14:29:35 +01:00
|
|
|
:Name: qmckl_woodbury_3x3
|
2021-07-22 11:38:50 +02:00
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
|
|
|
|
2021-09-01 16:06:51 +02:00
|
|
|
The 3x3 version of the Woodbury 2x2 kernel. It is used to apply three
|
2021-09-02 17:56:42 +02:00
|
|
|
rank-1 updates at once. The formula used in this kernel is the same as for Woodbury 2x2,
|
|
|
|
except for the sizes of the following matrices:
|
2021-09-01 16:06:51 +02:00
|
|
|
|
|
|
|
$C:= S^{-1}U$, a Dim $\times 3$ matrix
|
|
|
|
$B := 1 + VC$, the $3 \times 3$ matrix that is going to be inverted
|
|
|
|
$D := VS^{-1}$, a $3 \times Dim$ matrix
|
2021-07-22 11:38:50 +02:00
|
|
|
|
2021-10-13 10:51:42 +02:00
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
2023-01-25 18:46:56 +01:00
|
|
|
#pragma ivdep
|
|
|
|
#pragma vector aligned
|
2021-10-13 10:51:42 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+NAME: qmckl_woodbury_3x3_args
|
2021-07-22 11:38:50 +02:00
|
|
|
| qmckl_context | context | in | Global state |
|
2021-10-28 16:06:17 +02:00
|
|
|
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
|
|
|
| uint64_t | Dim | in | Dimension of Slater_inv |
|
2021-07-22 11:38:50 +02:00
|
|
|
| double | Updates[3*Dim] | in | Array containing the updates |
|
|
|
|
| uint64_t | Updates_index[3] | in | Array containing the rank-1 updates |
|
2021-07-26 17:41:21 +02:00
|
|
|
| double | breakdown | in | Break-down parameter on which to fail or not |
|
2021-10-28 16:06:17 +02:00
|
|
|
| double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
2021-10-13 10:51:42 +02:00
|
|
|
| double* | determinant | inout | Determinant of Slater-matrix |
|
2021-07-22 11:38:50 +02:00
|
|
|
|
|
|
|
*** Requirements
|
|
|
|
|
2021-09-01 16:10:59 +02:00
|
|
|
- ~context~ is not ~qmckl_null_context~
|
2021-10-28 16:06:17 +02:00
|
|
|
- ~LDS >= 2~
|
2021-09-07 11:22:54 +02:00
|
|
|
- ~Dim >= 2~
|
|
|
|
- ~Updates~ is allocated with $3 \times Dim$ elements
|
|
|
|
- ~Updates_index~ is allocated with $3$ elements
|
2021-09-01 16:10:59 +02:00
|
|
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
2021-09-07 11:22:54 +02:00
|
|
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
2021-07-22 11:38:50 +02:00
|
|
|
|
|
|
|
*** C header
|
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
2021-07-22 11:38:50 +02:00
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_exit_code qmckl_woodbury_3x3(
|
2021-07-22 11:38:50 +02:00
|
|
|
const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
2023-01-26 11:50:58 +01:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-26 11:50:58 +01:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant);
|
2021-07-22 11:38:50 +02:00
|
|
|
#+end_src
|
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
*** C source
|
2021-07-22 11:38:50 +02:00
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org
|
2023-01-27 17:41:32 +01:00
|
|
|
qmckl_exit_code qmckl_woodbury_3x3_hpc(const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
2023-01-27 17:41:32 +01:00
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-27 17:41:32 +01:00
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict determinant) {
|
2021-07-22 11:38:50 +02:00
|
|
|
/*
|
|
|
|
C := S^{-1} * U, dim x 3
|
|
|
|
B := 1 + V * C, 3 x 3
|
|
|
|
D := V * S^{-1}, 3 x dim
|
|
|
|
,*/
|
2021-09-16 13:44:12 +02:00
|
|
|
|
2021-10-06 23:44:06 +02:00
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
2023-01-25 18:46:56 +01:00
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
2023-01-27 17:41:32 +01:00
|
|
|
"qmckl_woodbury_3x3_hpc",
|
2023-01-25 18:46:56 +01:00
|
|
|
NULL);
|
2021-10-06 23:44:06 +02:00
|
|
|
}
|
|
|
|
|
2021-07-22 11:38:50 +02:00
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
|
|
const uint64_t row3 = (Updates_index[2] - 1);
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute C = (S^T)^{-1}U : Dim x 3
|
|
|
|
double __attribute__((aligned(8))) C[3 * Dim];
|
2021-07-22 11:38:50 +02:00
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
C[i * 3] = 0;
|
|
|
|
C[i * 3 + 1] = 0;
|
|
|
|
C[i * 3 + 2] = 0;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t k = 0; k < LDS; k++) {
|
|
|
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
|
|
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
|
|
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
2021-07-22 11:38:50 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute B = 1 + VC : 3 x 3
|
2021-07-22 11:38:50 +02:00
|
|
|
const double B0 = C[row1 * 3] + 1;
|
|
|
|
const double B1 = C[row1 * 3 + 1];
|
|
|
|
const double B2 = C[row1 * 3 + 2];
|
|
|
|
const double B3 = C[row2 * 3];
|
|
|
|
const double B4 = C[row2 * 3 + 1] + 1;
|
|
|
|
const double B5 = C[row2 * 3 + 2];
|
|
|
|
const double B6 = C[row3 * 3];
|
|
|
|
const double B7 = C[row3 * 3 + 1];
|
|
|
|
const double B8 = C[row3 * 3 + 2] + 1;
|
|
|
|
|
|
|
|
// Check if determinant of B is not too close to zero
|
|
|
|
double det;
|
|
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
|
|
B2 * (B3 * B7 - B4 * B6);
|
2021-07-26 17:41:21 +02:00
|
|
|
if (fabs(det) < breakdown) {
|
2023-01-25 18:46:56 +01:00
|
|
|
return QMCKL_FAILURE;
|
2021-07-22 11:38:50 +02:00
|
|
|
}
|
|
|
|
|
2021-10-13 10:51:42 +02:00
|
|
|
// Update det(Slater) if passed
|
2023-01-25 18:46:56 +01:00
|
|
|
if (determinant)
|
|
|
|
*determinant *= det;
|
2021-10-13 10:51:42 +02:00
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
|
|
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
2021-07-22 11:38:50 +02:00
|
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// tmp = B^{-1}D : 3 x LDS
|
|
|
|
double __attribute__((aligned(8))) tmp[3 * LDS];
|
2023-01-26 11:50:58 +01:00
|
|
|
double* r1dim = &(Slater_inv[row1 * LDS]);
|
|
|
|
double* r2dim = &(Slater_inv[row2 * LDS]);
|
|
|
|
double* r3dim = &(Slater_inv[row3 * LDS]);
|
2023-01-25 18:46:56 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
|
|
tmp[LDS + j] =
|
|
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
|
|
tmp[2 * LDS + j] =
|
|
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
2021-07-22 11:38:50 +02:00
|
|
|
}
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
2021-07-22 11:38:50 +02:00
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
|
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
|
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
|
|
|
}
|
2021-07-22 11:38:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
2023-01-27 17:41:32 +01:00
|
|
|
|
|
|
|
#+NAME:woodbury_3x3_kernel_template
|
|
|
|
#+begin_src c
|
|
|
|
qmckl_exit_code qmckl_woodbury_3x3_{Dim}(
|
|
|
|
const qmckl_context context,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict determinant) {
|
|
|
|
/*
|
|
|
|
C := S^{-1} * U, dim x 3
|
|
|
|
B := 1 + V * C, 3 x 3
|
|
|
|
D := V * S^{-1}, 3 x dim
|
|
|
|
,*/
|
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_woodbury_3x3_{Dim}",
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
|
|
const uint64_t row3 = (Updates_index[2] - 1);
|
|
|
|
|
|
|
|
// Compute C = (S^T)^{-1}U : {Dim} x 3
|
|
|
|
double __attribute__((aligned(8))) C[3 * {Dim}];
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
C[i * 3] = 0;
|
|
|
|
C[i * 3 + 1] = 0;
|
|
|
|
C[i * 3 + 2] = 0;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
|
|
|
C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
|
|
C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
|
|
C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute B = 1 + VC : 3 x 3
|
|
|
|
const double B0 = C[row1 * 3] + 1;
|
|
|
|
const double B1 = C[row1 * 3 + 1];
|
|
|
|
const double B2 = C[row1 * 3 + 2];
|
|
|
|
const double B3 = C[row2 * 3];
|
|
|
|
const double B4 = C[row2 * 3 + 1] + 1;
|
|
|
|
const double B5 = C[row2 * 3 + 2];
|
|
|
|
const double B6 = C[row3 * 3];
|
|
|
|
const double B7 = C[row3 * 3 + 1];
|
|
|
|
const double B8 = C[row3 * 3 + 2] + 1;
|
|
|
|
|
|
|
|
// Check if determinant of B is not too close to zero
|
|
|
|
double det;
|
|
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
|
|
if (fabs(det) < breakdown) {
|
|
|
|
return QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update det(Slater) if passed
|
|
|
|
if (determinant)
|
|
|
|
*determinant *= det;
|
|
|
|
|
|
|
|
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
|
|
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
|
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
|
|
|
|
// tmp = B^{-1}D : 3 x D{Dim}_P
|
|
|
|
double __attribute__((aligned(8))) tmp[3 * D{Dim}_P];
|
|
|
|
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
|
|
|
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
|
|
|
double* r3dim = &(Slater_inv[row3 * D{Dim}_P]);
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
|
|
tmp[D{Dim}_P + j] =
|
|
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
|
|
tmp[2 * D{Dim}_P + j] =
|
|
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j];
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j];
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME:woodbury_3x3_kernel_generator
|
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
|
|
|
<<woodbury_3x3_kernel_template>>
|
|
|
|
"""
|
|
|
|
result = []
|
|
|
|
for Dim in <<kernel_generator_range>>:
|
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME:woodbury_3x3_switch-case_generator
|
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
|
|
|
case {Dim}:
|
|
|
|
return qmckl_woodbury_3x3_{Dim}(context,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
determinant);
|
|
|
|
"""
|
|
|
|
result = []
|
|
|
|
for Dim in <<kernel_generator_range>>:
|
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
|
|
<<woodbury_3x3_kernel_generator()>>
|
|
|
|
|
|
|
|
qmckl_exit_code qmckl_woodbury_3x3(const qmckl_context context,
|
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant) {
|
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_woodbury_3x3",
|
|
|
|
NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
|
|
switch (Dim) {
|
|
|
|
<<woodbury_3x3_switch-case_generator()>>
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
return qmckl_woodbury_3x3_hpc(context,
|
|
|
|
LDS,
|
|
|
|
Dim,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
determinant);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
2021-07-22 11:38:50 +02:00
|
|
|
*** Performance...
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-09-07 12:22:39 +02:00
|
|
|
This function is most efficient when used in cases where there are only 3 rank-1 updates and
|
|
|
|
it is sure they will not result in a singular matrix.
|
2021-09-01 16:06:51 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
** Fortran interface :noexport:
|
2021-09-14 12:13:17 +02:00
|
|
|
:PROPERTIES:
|
2023-01-27 14:29:35 +01:00
|
|
|
:Name: qmckl_woodbury_3x3
|
2021-09-14 12:13:17 +02:00
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
2021-07-22 11:38:50 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
2021-07-22 11:38:50 +02:00
|
|
|
|
2021-09-16 13:44:12 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
|
|
interface
|
2023-01-27 14:29:35 +01:00
|
|
|
integer(c_int32_t) function qmckl_woodbury_3x3 &
|
2021-10-28 16:06:17 +02:00
|
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
2021-09-16 13:44:12 +02:00
|
|
|
bind(C)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
2021-10-28 16:06:17 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
2021-10-09 22:23:12 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
2021-09-16 13:44:12 +02:00
|
|
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
2021-10-09 22:23:12 +02:00
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
2021-10-28 16:06:17 +02:00
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
2021-10-13 10:51:42 +02:00
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-09-16 13:44:12 +02:00
|
|
|
|
2023-01-27 14:29:35 +01:00
|
|
|
end function qmckl_woodbury_3x3
|
2021-09-16 13:44:12 +02:00
|
|
|
end interface
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-22 11:38:50 +02:00
|
|
|
*** Test :noexport:
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c_test)
|
2021-09-18 18:07:05 +02:00
|
|
|
assert(Updates3 != NULL);
|
|
|
|
assert(Updates_index3 != NULL);
|
|
|
|
assert(Slater_inv3_1 != NULL);
|
2021-10-13 10:51:42 +02:00
|
|
|
det = -1.23743195512859e-09;
|
2023-01-27 14:29:35 +01:00
|
|
|
rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
2021-10-13 10:51:42 +02:00
|
|
|
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
2021-09-05 11:22:41 +02:00
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
res[i * Dim + j] = 0;
|
|
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j];
|
2021-09-05 11:22:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rc = QMCKL_SUCCESS;
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-22 11:38:50 +02:00
|
|
|
assert(rc == QMCKL_SUCCESS);
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-22 18:05:39 +02:00
|
|
|
* Sherman-Morrison with update splitting
|
|
|
|
** ~qmckl_sherman_morrison_splitting~
|
|
|
|
:PROPERTIES:
|
|
|
|
:Name: qmckl_sherman_morrison_splitting
|
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
|
|
|
|
2021-09-02 17:56:42 +02:00
|
|
|
This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator $1+v_j^T S^{-1} u_j$ in
|
|
|
|
the Sherman-Morrison formula is deemed to be too close to zero, the update $u_j$ is split in half:
|
2021-09-07 12:22:39 +02:00
|
|
|
$u_j \rightarrow \frac{1}{2} u_j$. One half is applied immediately --necessarily increasing the value of the
|
2021-09-02 17:56:42 +02:00
|
|
|
denominator because of the split-- while the other halve is put in a queue that will be applied when all the
|
2021-09-07 12:22:39 +02:00
|
|
|
remaining updates have been treated.
|
|
|
|
|
|
|
|
The kernel is executed recursively until the queue is eiter empty and all
|
2021-09-02 17:56:42 +02:00
|
|
|
updates are applied successfully, or the size of the queue equals the number of initial updates. In the last
|
2021-09-07 12:22:39 +02:00
|
|
|
case the Slater-matrix that would have resulted from applying the updates is singular and therefore the
|
2021-09-02 17:56:42 +02:00
|
|
|
kernel exits with an exit code.
|
2021-07-22 18:05:39 +02:00
|
|
|
|
2021-10-13 11:55:20 +02:00
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
|
|
|
|
2021-07-22 18:05:39 +02:00
|
|
|
#+NAME: qmckl_sherman_morrison_splitting_args
|
2021-07-22 18:20:20 +02:00
|
|
|
| qmckl_context | context | in | Global state |
|
2021-10-28 16:06:17 +02:00
|
|
|
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
|
|
|
| uint64_t | Dim | in | Dimension of Slater_inv |
|
2021-07-22 18:20:20 +02:00
|
|
|
| uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
|
|
| double | Updates[N_updates*Dim] | in | Array containing the updates |
|
|
|
|
| uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates |
|
2021-07-26 17:41:21 +02:00
|
|
|
| double | breakdown | in | Break-down parameter on which to fail or not |
|
2021-10-28 16:06:17 +02:00
|
|
|
| double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
2021-10-13 11:55:20 +02:00
|
|
|
| double* | determinant | inout | Determinant of the Slater-matrix |
|
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
|
|
|
|
2021-07-22 18:05:39 +02:00
|
|
|
*** Requirements
|
|
|
|
|
2021-09-07 11:22:54 +02:00
|
|
|
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
2021-10-28 16:06:17 +02:00
|
|
|
* ~LDS >= 2~
|
2021-09-07 11:22:54 +02:00
|
|
|
* ~Dim >= 2~
|
|
|
|
* ~N_updates >= 1~
|
|
|
|
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
|
|
* ~Updates_index~ is allocated with $N_updates$ elements
|
|
|
|
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
|
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-07-22 18:05:39 +02:00
|
|
|
*** C header
|
|
|
|
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
|
2021-07-22 18:20:20 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
2021-09-16 13:44:12 +02:00
|
|
|
qmckl_exit_code qmckl_sherman_morrison_splitting(
|
2021-07-22 18:20:20 +02:00
|
|
|
const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
2023-01-26 11:50:58 +01:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-26 11:50:58 +01:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant);
|
2021-07-22 18:20:20 +02:00
|
|
|
#+end_src
|
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
*** C source
|
2021-07-22 18:05:39 +02:00
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org
|
2021-10-09 22:23:12 +02:00
|
|
|
qmckl_exit_code qmckl_sherman_morrison_splitting(const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
2023-01-26 11:50:58 +01:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-26 11:50:58 +01:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant) {
|
2021-10-06 23:44:06 +02:00
|
|
|
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
2023-01-25 18:46:56 +01:00
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_sherman_morrison_splitting",
|
|
|
|
NULL);
|
2021-10-06 23:44:06 +02:00
|
|
|
}
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
2021-10-09 22:23:12 +02:00
|
|
|
uint64_t later_index[N_updates];
|
2021-07-22 18:05:39 +02:00
|
|
|
uint64_t later = 0;
|
|
|
|
|
2023-01-27 11:13:57 +01:00
|
|
|
qmckl_exit_code rc = qmckl_slagel_splitting(
|
|
|
|
LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv,
|
|
|
|
later_updates, later_index, &later, determinant);
|
|
|
|
if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE;
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-07-22 18:05:39 +02:00
|
|
|
if (later > 0) {
|
2023-01-27 11:13:57 +01:00
|
|
|
qmckl_exit_code rc = qmckl_sherman_morrison_splitting(
|
|
|
|
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
|
|
|
Slater_inv, determinant);
|
|
|
|
if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE;
|
2021-07-22 18:05:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
*** Performance...
|
|
|
|
|
2021-09-07 12:22:39 +02:00
|
|
|
This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high.
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
** Fortran interface :noexport:
|
2021-09-14 12:13:17 +02:00
|
|
|
:PROPERTIES:
|
|
|
|
:Name: qmckl_sherman_morrison_splitting
|
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
2021-07-22 18:05:39 +02:00
|
|
|
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
|
2021-09-16 17:24:19 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
|
|
interface
|
|
|
|
integer(c_int32_t) function qmckl_sherman_morrison_splitting &
|
2021-10-28 16:06:17 +02:00
|
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
2021-09-16 17:24:19 +02:00
|
|
|
bind(C)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
2021-10-28 16:06:17 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
2021-10-09 22:23:12 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
2021-09-16 17:24:19 +02:00
|
|
|
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
2021-10-09 22:23:12 +02:00
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
2021-10-28 16:06:17 +02:00
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
2021-10-13 11:55:20 +02:00
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-09-16 17:24:19 +02:00
|
|
|
|
|
|
|
end function qmckl_sherman_morrison_splitting
|
|
|
|
end interface
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-22 18:05:39 +02:00
|
|
|
*** Test :noexport:
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c_test)
|
2021-09-18 18:07:05 +02:00
|
|
|
assert(Updates3 != NULL);
|
|
|
|
assert(Updates_index3 != NULL);
|
|
|
|
assert(Slater_inv3_2 != NULL);
|
2021-10-13 11:55:20 +02:00
|
|
|
det = -1.23743195512859e-09;
|
2023-01-25 18:46:56 +01:00
|
|
|
rc = qmckl_sherman_morrison_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
|
2021-10-13 11:55:20 +02:00
|
|
|
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
2021-09-05 11:22:41 +02:00
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
res[i * Dim + j] = 0;
|
|
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
|
2021-09-05 11:22:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rc = QMCKL_SUCCESS;
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-22 18:05:39 +02:00
|
|
|
assert(rc == QMCKL_SUCCESS);
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-26 17:41:21 +02:00
|
|
|
* Woodbury 3x3 and 2x2 with Sherman-Morrison and update splitting
|
2023-02-02 17:04:34 +01:00
|
|
|
** ~qmckl_sherman_morrison_smw32s~
|
2021-07-23 16:50:51 +02:00
|
|
|
:PROPERTIES:
|
|
|
|
:Name: qmckl_sherman_morrison_smw32s
|
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
|
|
|
|
2021-09-02 17:56:42 +02:00
|
|
|
The Woodbury 3x3 and 2x2 kernel with Sherman-Morrison and update splitting combines the low-level Woodbury 3x3 kernel,
|
|
|
|
the Woobury 2x2 kernel and Sherman-Morrison with update splitting. It works the almost the same as Woodbury 3x3 with
|
|
|
|
Sherman-Morrison and update splitting, except that when there is a remainder of two rank-1 updates, it is first tried
|
|
|
|
with Woodbury 2x2 instead of sending them all to Sherman-Morrison with update splitting. For example, in the case of
|
|
|
|
5 updates the updates are applied in 1 block of 3 updates end 1 block of 2 updates.
|
2021-07-23 16:50:51 +02:00
|
|
|
|
2021-10-13 11:55:20 +02:00
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
|
|
|
|
2021-07-23 16:50:51 +02:00
|
|
|
#+NAME: qmckl_sherman_morrison_smw32s_args
|
|
|
|
| qmckl_context | context | in | Global state |
|
2021-10-28 16:06:17 +02:00
|
|
|
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
|
|
|
| uint64_t | Dim | in | Dimension of Slater_inv |
|
2021-07-23 16:50:51 +02:00
|
|
|
| uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
|
|
| double | Updates[N_updates*Dim] | in | Array containing the updates |
|
|
|
|
| uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates |
|
2021-07-26 17:41:21 +02:00
|
|
|
| double | breakdown | in | Break-down parameter on which to fail or not |
|
2021-10-28 16:06:17 +02:00
|
|
|
| double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
2021-10-13 11:55:20 +02:00
|
|
|
| double* | determinant | inout | Determinant of the Slater-matrix |
|
|
|
|
|
2021-07-23 16:50:51 +02:00
|
|
|
|
|
|
|
*** Requirements
|
|
|
|
|
2021-09-07 11:22:54 +02:00
|
|
|
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
2021-10-28 16:06:17 +02:00
|
|
|
* ~LDS >= 2~
|
2021-09-07 11:22:54 +02:00
|
|
|
* ~Dim >= 2~
|
|
|
|
* ~N_updates >= 1~
|
|
|
|
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
|
|
* ~Updates_index~ is allocated with $N_updates$ elements
|
|
|
|
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
|
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
2021-07-23 16:50:51 +02:00
|
|
|
|
|
|
|
*** C header
|
|
|
|
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sherman_morrison_smw32s_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
|
2021-07-23 19:58:24 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
2021-09-16 17:24:19 +02:00
|
|
|
qmckl_exit_code qmckl_sherman_morrison_smw32s(
|
2021-07-23 19:58:24 +02:00
|
|
|
const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
2023-01-26 11:50:58 +01:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-26 11:50:58 +01:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant);
|
2021-07-23 19:58:24 +02:00
|
|
|
#+end_src
|
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
*** C source
|
2021-07-23 16:50:51 +02:00
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org
|
2021-10-09 22:23:12 +02:00
|
|
|
qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
2021-10-28 16:06:17 +02:00
|
|
|
const uint64_t LDS,
|
2021-10-09 22:23:12 +02:00
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
2023-01-26 11:50:58 +01:00
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
2021-10-09 22:23:12 +02:00
|
|
|
const double breakdown,
|
2023-01-26 11:50:58 +01:00
|
|
|
double* Slater_inv,
|
|
|
|
double* determinant) {
|
2021-07-23 16:50:51 +02:00
|
|
|
|
2021-10-06 23:44:06 +02:00
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
2023-01-25 18:46:56 +01:00
|
|
|
return qmckl_failwith(context,
|
|
|
|
QMCKL_NULL_CONTEXT,
|
|
|
|
"qmckl_sherman_morrison_smw32s",
|
|
|
|
NULL);
|
2021-10-06 23:44:06 +02:00
|
|
|
}
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
|
|
|
uint64_t later_index[N_updates];
|
|
|
|
uint64_t later = 0;
|
2021-07-23 16:50:51 +02:00
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// Special case for 4 rank-1 updates: 2+2
|
|
|
|
if (N_updates == 4) {
|
|
|
|
qmckl_exit_code rc =
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_woodbury_2x2(context, LDS, Dim, Updates, Updates_index,
|
2023-01-25 18:46:56 +01:00
|
|
|
breakdown, Slater_inv, determinant);
|
|
|
|
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
|
|
|
uint64_t l = 0;
|
|
|
|
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates, Updates_index,
|
|
|
|
breakdown, Slater_inv,
|
|
|
|
later_updates + (LDS * later),
|
|
|
|
later_index + later, &l, determinant);
|
|
|
|
later += l;
|
|
|
|
}
|
2023-01-27 14:29:35 +01:00
|
|
|
rc = qmckl_woodbury_2x2(context, LDS, Dim, &Updates[2 * LDS],
|
2023-01-25 18:46:56 +01:00
|
|
|
&Updates_index[2], breakdown, Slater_inv,
|
|
|
|
determinant);
|
|
|
|
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
|
|
|
uint64_t l = 0;
|
|
|
|
rc = qmckl_slagel_splitting(
|
|
|
|
LDS, Dim, 2, &Updates[2 * LDS], &Updates_index[2], breakdown,
|
|
|
|
Slater_inv, later_updates + (LDS * later), later_index + later,
|
|
|
|
&l, determinant);
|
|
|
|
later += l;
|
|
|
|
}
|
|
|
|
if (later > 0) {
|
|
|
|
rc = qmckl_sherman_morrison_splitting(
|
|
|
|
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
|
|
|
Slater_inv, determinant);
|
|
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
// And for the other cases != 4
|
|
|
|
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates
|
|
|
|
// with Woodbury 3x3 kernel
|
2021-07-23 16:50:51 +02:00
|
|
|
uint64_t n_of_3blocks = N_updates / 3;
|
|
|
|
uint64_t remainder = N_updates % 3;
|
2023-01-25 18:46:56 +01:00
|
|
|
uint64_t length_3block = 3 * LDS;
|
2021-07-23 16:50:51 +02:00
|
|
|
|
|
|
|
if (n_of_3blocks > 0) {
|
|
|
|
for (uint64_t i = 0; i < n_of_3blocks; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
const double* Updates_3block = &Updates[i * length_3block];
|
|
|
|
const uint64_t* Updates_index_3block = &Updates_index[i * 3];
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_exit_code rc = qmckl_woodbury_3x3(
|
2023-01-25 18:46:56 +01:00
|
|
|
context, LDS, Dim, Updates_3block, Updates_index_3block,
|
|
|
|
breakdown, Slater_inv, determinant);
|
|
|
|
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
2021-07-23 16:50:51 +02:00
|
|
|
uint64_t l = 0;
|
2023-01-25 18:46:56 +01:00
|
|
|
rc = qmckl_slagel_splitting(
|
|
|
|
LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
|
|
|
breakdown, Slater_inv, later_updates + (LDS * later),
|
|
|
|
later_index + later, &l, determinant);
|
|
|
|
later += l;
|
2021-07-23 16:50:51 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-16 17:24:19 +02:00
|
|
|
// Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
|
|
|
if (remainder == 2) {
|
2023-01-25 18:46:56 +01:00
|
|
|
const double* Updates_2block = &Updates[n_of_3blocks * length_3block];
|
|
|
|
const uint64_t* Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
2023-01-27 14:29:35 +01:00
|
|
|
qmckl_exit_code rc = qmckl_woodbury_2x2(
|
2023-01-25 18:46:56 +01:00
|
|
|
context, LDS, Dim, Updates_2block, Updates_index_2block,
|
|
|
|
breakdown, Slater_inv, determinant);
|
|
|
|
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
2021-07-23 16:50:51 +02:00
|
|
|
uint64_t l = 0;
|
2023-01-25 18:46:56 +01:00
|
|
|
rc = qmckl_slagel_splitting(
|
|
|
|
LDS, Dim, 2, Updates_2block, Updates_index_2block, breakdown,
|
|
|
|
Slater_inv, later_updates + (LDS * later), later_index + later,
|
|
|
|
&l, determinant);
|
|
|
|
later += l;
|
2021-07-23 16:50:51 +02:00
|
|
|
}
|
|
|
|
}
|
2023-01-25 18:46:56 +01:00
|
|
|
|
2021-09-16 17:24:19 +02:00
|
|
|
// Apply last remaining update with slagel_splitting
|
2023-01-25 18:46:56 +01:00
|
|
|
if (remainder == 1) {
|
|
|
|
const double* Updates_1block = &Updates[n_of_3blocks * length_3block];
|
|
|
|
const uint64_t* Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
2021-07-23 16:50:51 +02:00
|
|
|
uint64_t l = 0;
|
2023-01-27 11:13:57 +01:00
|
|
|
qmckl_exit_code rc = qmckl_slagel_splitting(
|
2023-01-25 18:46:56 +01:00
|
|
|
LDS, Dim, 1, Updates_1block, Updates_index_1block, breakdown,
|
|
|
|
Slater_inv, later_updates + (LDS * later), later_index + later, &l,
|
|
|
|
determinant);
|
2023-01-27 11:13:57 +01:00
|
|
|
if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE;
|
2023-01-25 18:46:56 +01:00
|
|
|
later += l;
|
2021-07-23 16:50:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (later > 0) {
|
2023-01-27 11:13:57 +01:00
|
|
|
qmckl_exit_code rc = qmckl_sherman_morrison_splitting(
|
2023-01-25 18:46:56 +01:00
|
|
|
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
|
|
|
Slater_inv, determinant);
|
2023-01-27 11:13:57 +01:00
|
|
|
if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE;
|
2021-07-23 16:50:51 +02:00
|
|
|
}
|
2023-01-25 18:46:56 +01:00
|
|
|
|
2021-07-27 06:59:44 +02:00
|
|
|
return QMCKL_SUCCESS;
|
2021-07-23 16:50:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
*** Performance...
|
|
|
|
|
2021-09-07 12:22:39 +02:00
|
|
|
This kernel performs best for update cycles with 2 or more rank-1 updates and the fail-rate is low.
|
2021-09-02 17:56:42 +02:00
|
|
|
|
2023-02-02 17:04:34 +01:00
|
|
|
** Fortran interface :noexport:
|
2021-09-14 12:13:17 +02:00
|
|
|
:PROPERTIES:
|
|
|
|
:Name: qmckl_sherman_morrison_smw32s
|
|
|
|
:CRetType: qmckl_exit_code
|
|
|
|
:FRetType: qmckl_exit_code
|
|
|
|
:END:
|
2021-07-23 16:50:51 +02:00
|
|
|
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_smw32s_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
|
2021-09-16 17:24:19 +02:00
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
|
|
interface
|
|
|
|
integer(c_int32_t) function qmckl_sherman_morrison_smw32s &
|
2021-10-28 16:06:17 +02:00
|
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
2021-09-16 17:24:19 +02:00
|
|
|
bind(C)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
|
|
import
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
2021-10-28 16:06:17 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
2021-10-09 22:23:12 +02:00
|
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
2021-09-16 17:24:19 +02:00
|
|
|
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
|
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
2021-10-09 22:23:12 +02:00
|
|
|
real (c_double ) , intent(in) , value :: breakdown
|
2021-10-28 16:06:17 +02:00
|
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
2021-10-26 14:55:26 +02:00
|
|
|
real (c_double ) , intent(inout) :: determinant
|
2021-09-16 17:24:19 +02:00
|
|
|
|
|
|
|
end function qmckl_sherman_morrison_smw32s
|
|
|
|
end interface
|
|
|
|
#+end_src
|
|
|
|
|
2021-07-23 16:50:51 +02:00
|
|
|
*** Test :noexport:
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c_test)
|
2021-09-18 18:07:05 +02:00
|
|
|
assert(Updates5 != NULL);
|
|
|
|
assert(Updates_index5 != NULL);
|
|
|
|
assert(Slater_inv5 != NULL);
|
2021-10-13 11:55:20 +02:00
|
|
|
det = -3.186005284713128e-10;
|
2023-01-25 18:46:56 +01:00
|
|
|
rc = qmckl_sherman_morrison_smw32s(context, LDS, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5, &det);
|
2021-10-13 11:55:20 +02:00
|
|
|
assert(fabs(det + 5.260200118412903e-10) < 1e-15);
|
|
|
|
|
2021-09-05 11:22:41 +02:00
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
res[i * Dim + j] = 0;
|
|
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5[k * LDS + j];
|
2021-09-05 11:22:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rc = QMCKL_SUCCESS;
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
|
|
rc = QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-23 16:50:51 +02:00
|
|
|
assert(rc == QMCKL_SUCCESS);
|
|
|
|
#+end_src
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-09-07 09:27:22 +02:00
|
|
|
* Helper Functions
|
2021-10-06 23:44:06 +02:00
|
|
|
|
|
|
|
Private helper-functions that are used by the Sherman-Morrison-Woodbury kernels.
|
2021-09-07 11:22:54 +02:00
|
|
|
These functions can only be used internally by the kernels in this module.
|
2021-09-07 09:27:22 +02:00
|
|
|
|
|
|
|
** ~qmckl_slagel_splitting~
|
|
|
|
:PROPERTIES:
|
|
|
|
:Name: qmckl_slagel_splitting
|
|
|
|
:CRetType: double
|
|
|
|
:FRetType: double precision
|
|
|
|
:END:
|
|
|
|
|
2021-09-07 11:22:54 +02:00
|
|
|
~qmckl_slagel_splitting~ is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel.
|
2021-09-07 12:22:39 +02:00
|
|
|
It is used internally to apply a collection of $N$ rank-1 updates to the inverse Slater-matrix $S^{-1}$ and
|
2021-09-07 11:22:54 +02:00
|
|
|
splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update,
|
|
|
|
while putting the second half in a waiting queue to be applied at the end.
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-09-07 11:22:54 +02:00
|
|
|
Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$, the update is applied as usual. Otherwise, $u_j$ will be redefined
|
|
|
|
as $\frac{1}{2}u_j$. One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors
|
|
|
|
$u_{j'}=\frac{1}{2}u_j$ and $v_{j'}^T=v_{j}^T$, which are stored in the array \texttt{later_updates}.
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-10-13 11:55:20 +02:00
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
|
|
from applying the updates to the original matrix.
|
|
|
|
|
2021-09-07 09:27:22 +02:00
|
|
|
#+NAME: qmckl_slagel_splitting_args
|
2021-10-28 16:06:17 +02:00
|
|
|
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
|
|
|
| uint64_t | Dim | in | Dimension of Slater_inv |
|
2021-09-07 09:27:22 +02:00
|
|
|
| uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
|
|
| double | Updates[N_updates*Dim] | in | Array containing the rank-1 updates |
|
|
|
|
| uint64_t | Updates_index[N_updates] | in | Array containing positions of the rank-1 updates |
|
|
|
|
| double | breakdown | in | Break-down parameter on which to fail or not |
|
2021-10-28 16:06:17 +02:00
|
|
|
| double | Slater_inv[LDS*Dim] | inout | Array containing the inverse Slater-matrix |
|
2021-09-07 09:27:22 +02:00
|
|
|
| double | later_updates[Dim * N_updates] | inout | Array containing the split updates for later |
|
|
|
|
| uint64_t | later_index[N_updates] | inout | Array containing the positions of the split updates for later |
|
|
|
|
| uint64_t | later | inout | Number of split updates for later |
|
2021-10-13 11:55:20 +02:00
|
|
|
| double* | determinant | inout | Determinant of the Slater-matrix |
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-09-07 09:27:22 +02:00
|
|
|
|
|
|
|
*** Requirements
|
|
|
|
|
2021-10-28 16:06:17 +02:00
|
|
|
- ~LDS >= 2~
|
2021-09-07 09:27:22 +02:00
|
|
|
- ~Dim >= 2~
|
|
|
|
- ~N_updates >= 1~
|
2021-09-07 11:22:54 +02:00
|
|
|
- ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
|
|
- ~Updates_index~ is allocated with $N_updates$ elements
|
2021-09-07 09:27:22 +02:00
|
|
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
2021-09-07 11:22:54 +02:00
|
|
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
|
|
|
- ~later_updates~ is allocated with $later \times Dim$ elements
|
|
|
|
- ~later_index~ is allocated with $N_updates$ elements
|
2021-10-06 23:44:06 +02:00
|
|
|
- ~later >= 0~
|
|
|
|
|
2021-09-07 09:27:22 +02:00
|
|
|
*** C header
|
|
|
|
|
|
|
|
#+CALL: generate_c_header(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
|
|
|
|
#+RESULTS:
|
|
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
2021-10-09 22:23:12 +02:00
|
|
|
qmckl_exit_code qmckl_slagel_splitting (
|
2023-01-27 19:33:27 +01:00
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* Slater_inv,
|
|
|
|
double* later_updates,
|
|
|
|
uint64_t* later_index,
|
|
|
|
uint64_t* later,
|
|
|
|
double* determinant);
|
2021-09-07 09:27:22 +02:00
|
|
|
#+end_src
|
|
|
|
|
2021-10-09 22:23:12 +02:00
|
|
|
*** C source
|
2021-09-07 09:27:22 +02:00
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org
|
2023-01-27 19:33:27 +01:00
|
|
|
qmckl_exit_code qmckl_slagel_splitting_hpc(
|
|
|
|
uint64_t LDS,
|
|
|
|
uint64_t Dim,
|
|
|
|
uint64_t N_updates,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict later_updates,
|
|
|
|
uint64_t* __restrict later_index,
|
|
|
|
uint64_t* __restrict later,
|
|
|
|
double* __restrict determinant) {
|
2023-01-25 18:46:56 +01:00
|
|
|
|
|
|
|
double __attribute__((aligned(8))) C[LDS];
|
|
|
|
double __attribute__((aligned(8))) D[LDS];
|
2021-09-07 09:27:22 +02:00
|
|
|
|
|
|
|
uint64_t l = 0;
|
|
|
|
// For each update
|
|
|
|
while (l < N_updates) {
|
|
|
|
// C = S^{-1} x U_l
|
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
C[i] = 0.0f;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
2021-09-07 09:27:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Denominator
|
2023-01-25 18:46:56 +01:00
|
|
|
const int cui = Updates_index[l] - 1;
|
|
|
|
double den = 1.0f + C[cui];
|
|
|
|
if (fabs(den) < breakdown) {
|
|
|
|
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
|
|
|
// second halve in later_updates
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t i = 0; i < LDS; i++) {
|
|
|
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
|
|
|
C[i] *= 0.5f;
|
2021-09-07 09:27:22 +02:00
|
|
|
}
|
|
|
|
later_index[*later] = Updates_index[l];
|
|
|
|
(*later)++;
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
den = 1.0f + C[cui];
|
|
|
|
} // From here onwards we continue with applying the first halve of the
|
|
|
|
// update to Slater_inv
|
|
|
|
double iden = 1.0f / den;
|
2021-09-07 09:27:22 +02:00
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
if (determinant)
|
2021-10-13 11:55:20 +02:00
|
|
|
*determinant *= den;
|
|
|
|
|
2023-01-25 18:46:56 +01:00
|
|
|
// D = v^T x S^{-1} : 1 x LDS
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
D[j] = Slater_inv[cui * LDS + j];
|
2021-09-07 09:27:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// S^{-1} = S^{-1} - C x D / den
|
|
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
2023-01-25 18:46:56 +01:00
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
|
|
const double update = C[i] * D[j] * iden;
|
2021-10-28 16:06:17 +02:00
|
|
|
Slater_inv[i * LDS + j] -= update;
|
2021-09-07 09:27:22 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
l += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
2023-01-27 19:33:27 +01:00
|
|
|
#+end_src
|
|
|
|
|
2021-09-07 09:27:22 +02:00
|
|
|
|
2023-01-27 19:33:27 +01:00
|
|
|
#+NAME:slagel_splitting_template_code
|
|
|
|
#+begin_src c
|
|
|
|
static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}(
|
|
|
|
uint64_t N_updates,
|
|
|
|
const double* __restrict Updates,
|
|
|
|
const uint64_t* __restrict Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* __restrict Slater_inv,
|
|
|
|
double* __restrict later_updates,
|
|
|
|
uint64_t* __restrict later_index,
|
|
|
|
uint64_t* __restrict later,
|
|
|
|
double* __restrict determinant) {
|
|
|
|
|
|
|
|
double __attribute__((aligned(8))) C[D{Dim}_P];
|
|
|
|
double __attribute__((aligned(8))) D[D{Dim}_P];
|
|
|
|
|
|
|
|
uint64_t l = 0;
|
|
|
|
// For each update
|
|
|
|
while (l < N_updates) {
|
|
|
|
// C = S^{-1} x U_l
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
C[i] = 0.0f;
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Denominator
|
|
|
|
const int cui = Updates_index[l] - 1;
|
|
|
|
double den = 1.0f + C[cui];
|
|
|
|
if (fabs(den) < breakdown) {
|
|
|
|
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
|
|
|
// second halve in later_updates
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t i = 0; i < D{Dim}_P; i++) {
|
|
|
|
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
|
|
|
C[i] *= 0.5f;
|
|
|
|
}
|
|
|
|
later_index[*later] = Updates_index[l];
|
|
|
|
(*later)++;
|
|
|
|
|
|
|
|
den = 1.0f + C[cui];
|
|
|
|
} // From here onwards we continue with applying the first halve of the
|
|
|
|
// update to Slater_inv
|
|
|
|
double iden = 1.0f / den;
|
|
|
|
|
|
|
|
if (determinant)
|
|
|
|
*determinant *= den;
|
|
|
|
|
|
|
|
// D = v^T x S^{-1} : 1 x D{Dim}_P
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
|
|
}
|
|
|
|
|
|
|
|
// S^{-1} = S^{-1} - C x D / den
|
|
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
|
|
IVDEP
|
|
|
|
ALIGNED
|
|
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
|
|
const double update = C[i] * D[j] * iden;
|
|
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
l += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_SUCCESS;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME:slagel_splitting_kernel_generator
|
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
|
|
|
<<slagel_splitting_template_code>>
|
|
|
|
"""
|
|
|
|
result = []
|
|
|
|
for Dim in <<kernel_generator_range>>:
|
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
2021-09-07 09:27:22 +02:00
|
|
|
#+end_src
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2023-01-27 19:33:27 +01:00
|
|
|
|
|
|
|
#+NAME:slagel_splitting_switch-case_generator
|
|
|
|
#+begin_src python :noweb yes :exports none
|
|
|
|
text="""
|
|
|
|
case {Dim}:
|
|
|
|
return qmckl_slagel_splitting_{Dim}(
|
|
|
|
N_updates,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
later_updates,
|
|
|
|
later_index,
|
|
|
|
later,
|
|
|
|
determinant);
|
|
|
|
"""
|
|
|
|
result = []
|
|
|
|
for Dim in <<kernel_generator_range>>:
|
|
|
|
Dim=str(Dim)
|
|
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
|
|
|
|
return '\n'.join(result)
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
|
|
<<slagel_splitting_kernel_generator()>>
|
|
|
|
|
|
|
|
qmckl_exit_code qmckl_slagel_splitting(
|
|
|
|
const uint64_t LDS,
|
|
|
|
const uint64_t Dim,
|
|
|
|
const uint64_t N_updates,
|
|
|
|
const double* Updates,
|
|
|
|
const uint64_t* Updates_index,
|
|
|
|
const double breakdown,
|
|
|
|
double* Slater_inv,
|
|
|
|
double* later_updates,
|
|
|
|
uint64_t* later_index,
|
|
|
|
uint64_t* later,
|
|
|
|
double* determinant) {
|
|
|
|
|
|
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
|
|
switch (Dim) {
|
|
|
|
<<slagel_splitting_switch-case_generator()>>
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
return qmckl_slagel_splitting_hpc(
|
|
|
|
LDS,
|
|
|
|
Dim,
|
|
|
|
N_updates,
|
|
|
|
Updates,
|
|
|
|
Updates_index,
|
|
|
|
breakdown,
|
|
|
|
Slater_inv,
|
|
|
|
later_updates,
|
|
|
|
later_index,
|
|
|
|
later,
|
|
|
|
determinant);
|
|
|
|
}
|
|
|
|
|
|
|
|
return QMCKL_FAILURE;
|
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
|
2021-09-07 09:27:22 +02:00
|
|
|
*** Performance
|
|
|
|
|
2021-09-07 12:22:39 +02:00
|
|
|
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
|
|
|
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
2021-09-07 09:27:22 +02:00
|
|
|
|
2021-07-19 12:01:07 +02:00
|
|
|
* End of files
|
|
|
|
|
|
|
|
#+begin_src c :comments link :tangle (eval c_test)
|
|
|
|
assert (qmckl_context_destroy(context) == QMCKL_SUCCESS);
|
|
|
|
return 0;
|
2021-10-06 23:44:06 +02:00
|
|
|
|
2021-07-19 12:01:07 +02:00
|
|
|
}
|
|
|
|
#+end_src
|
|
|
|
|
|
|
|
# -*- mode: org -*-
|
|
|
|
# vim: syntax=c
|