mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-09-16 17:55:29 +02:00
3198 lines
101 KiB
Org Mode
3198 lines
101 KiB
Org Mode
#+TITLE: Sherman-Morrison-Woodbury
|
|
#+SETUPFILE: ../tools/theme.setup
|
|
#+INCLUDE: ../tools/lib.org
|
|
#+STARTUP: content
|
|
|
|
Low- and high-level functions that use the Sherman-Morrison and
|
|
Woodbury matrix inversion formulas to update the inverse of a
|
|
non-singular matrix
|
|
|
|
* Headers
|
|
#+begin_src elisp :noexport :results none :exports none
|
|
(org-babel-lob-ingest "../tools/lib.org")
|
|
#+end_src
|
|
|
|
#+begin_src c :comments link :tangle (eval c_test) :noweb yes
|
|
#include "qmckl.h"
|
|
#include "assert.h"
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
#include <math.h>
|
|
|
|
int main() {
|
|
qmckl_context context;
|
|
context = qmckl_context_create();
|
|
qmckl_exit_code rc;
|
|
#+end_src
|
|
|
|
This is the range that determines the how many high performance kernel instantces will be generated, using the C-function templates defined in the sections below. If the name of the C-function template is called ~qmckl_kernel_{Dim}~, then ~range(K, L+1)~ will results in kernel instances from ~qmckl_kernel_K~ to ~qmckl_kernel_L~.
|
|
#+NAME:kernel_generator_range
|
|
#+begin_src python :noweb yes :exports none
|
|
range(2, 22)
|
|
#+end_src
|
|
|
|
#+RESULTS: kernel_generator_range
|
|
: None
|
|
|
|
|
|
* Naïve Sherman-Morrison
|
|
** ~qmckl_sm_naive~
|
|
:PROPERTIES:
|
|
:Name: qmckl_sm_naive
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
*** Introduction
|
|
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
|
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
|
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
|
|
|
#+TODO
|
|
Change the math notation so that the update vectors appear as row in the math
|
|
so that it is consistent with the representation in C (memory)
|
|
|
|
The formula for any update $u_j$ (index $j$ is suppresed for clarity) that is applied is
|
|
\[
|
|
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
|
|
\]
|
|
|
|
where
|
|
$S$ is the Slater-matrix,
|
|
$u$ and $v^T$ are the column and row vectors containing the updates,
|
|
$S^{-1}$ is the inverse of the Slater-matrix.
|
|
|
|
Even though the Slater-matrix $S$ with all updates applied at once is invertable, during the course of applying
|
|
updates to the inverse Slater-matrix $S^{-1}$ one-by-one it can happen that one of the intermediate inverse
|
|
matrices $S^{-1}$ becomes singular. Therefore a global threshold value $\epsilon$ is defined that is used to
|
|
evaluate each individual update $u_j$ when it is applied.
|
|
|
|
This value sets the lower bound for which the
|
|
denominator $1+v_j^TS^{-1}u_j$ is considered to be too small and will most probably result in a singular matrix
|
|
$S$, or at least in an inverse of $S$ of very poor numerical quality. Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$,
|
|
the update is applied as usual and the kernel exits with return code \texttt{QMCKL_SUCCESS}.
|
|
If $1+v_j^TS^{-1}u_j \leq \epsilon$ the update is rejected and the kernel exits with return code \texttt{QMCKL_FAILURE}.
|
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
|
|
*** API
|
|
#+NAME: qmckl_sm_naive_args
|
|
| Variable | Type | In/Out | Description |
|
|
|-----------------+-------------------------+--------+------------------------------------------------------|
|
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
|
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
|
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
|
| ~N_updates~ | ~uint64_t~ | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
| ~Updates~ | ~double[N_updates*LDS]~ | in | Array containing the updates |
|
|
| ~Updates_index~ | ~uint64_t[N_updates]~ | in | Array containing the rank-1 updates |
|
|
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
|
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse of a Slater-matrix |
|
|
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
|
|
|
*** Requirements
|
|
- ~context~ is not ~QMCKL_NULL_CONTEXT~
|
|
- ~LDS >= 2~
|
|
- ~Dim >= 2~
|
|
- ~N_updates >= 1~
|
|
- ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
- ~Updates_index~ is allocated with $N_updates$ elements
|
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
|
- ~determinant > 0~
|
|
|
|
*** Pedagogical kernel source (in Fortran)
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
|
|
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
|
subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
implicit none
|
|
integer*8 , intent(in) :: lds, dim, nupdates
|
|
real*8 , intent(in) :: upds(nupdates * lds)
|
|
real*8 , intent(in) :: s_inv(dim * lds)
|
|
real*8 , intent(out) , dimension(dim, nupdates) :: Updates
|
|
real*8 , intent(out) , dimension(dim, dim) :: Inverse
|
|
|
|
integer*8 :: i, j
|
|
|
|
! Construct Updates: lds x nupdates
|
|
do i = 1, nupdates
|
|
do j = 1, dim
|
|
Updates(j, i) = upds((i - 1) * lds + j)
|
|
end do
|
|
end do
|
|
|
|
! Construct Inverse: dim x lds
|
|
do i = 1, dim
|
|
do j = 1, dim
|
|
Inverse(i, j) = s_inv((i - 1) * lds + j)
|
|
end do
|
|
end do
|
|
end subroutine convert
|
|
#+end_src
|
|
|
|
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
|
subroutine copy_back_inv(Inverse, s_inv, lds, dim)
|
|
implicit none
|
|
integer*8 , intent(in) :: lds, dim
|
|
real*8 , intent(in) , dimension(dim, dim) :: Inverse
|
|
real*8 , intent(out) :: s_inv(dim * lds)
|
|
|
|
integer*8 :: i, j
|
|
|
|
! Copy updated inverse back to s_inv
|
|
do i = 1, dim
|
|
do j = 1, dim
|
|
s_inv((i - 1) * lds + j) = Inverse(i, j)
|
|
end do
|
|
end do
|
|
end subroutine copy_back_inv
|
|
#+end_src
|
|
|
|
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
|
subroutine copy_back_lu(Later_updates, later_upds, lds, dim, nupdates)
|
|
implicit none
|
|
integer*8 , intent(in) :: lds, dim, nupdates
|
|
real*8 , intent(in) , dimension(dim, nupdates) :: Later_updates
|
|
real*8 , intent(out) :: later_upds(nupdates * lds)
|
|
|
|
integer*8 :: i, j
|
|
|
|
! Copy updated inverse back to s_inv
|
|
do i = 1, nupdates
|
|
do j = 1, dim
|
|
later_upds((i - 1) * lds + j) = Later_updates(j, i)
|
|
end do
|
|
end do
|
|
end subroutine copy_back_lu
|
|
#+end_src
|
|
|
|
#+begin_src f90 :tangle (eval f)
|
|
integer function qmckl_sm_naive_doc_f(context, &
|
|
lds, dim, &
|
|
nupdates, &
|
|
upds, &
|
|
updates_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
determinant) result(info)
|
|
|
|
use qmckl
|
|
implicit none
|
|
integer*8 , intent(in) :: context
|
|
integer*8 , intent(in) :: lds, dim
|
|
integer*8 , intent(in) :: nupdates
|
|
integer*8 , intent(in) :: updates_index(nupdates)
|
|
real*8 , intent(in) :: upds(nupdates * lds)
|
|
real*8 , intent(in) :: breakdown
|
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
|
real*8 , intent(inout) :: determinant
|
|
|
|
real*8 , dimension(dim, nupdates) :: Updates
|
|
real*8 , dimension(dim, dim) :: Inverse
|
|
real*8 , dimension(dim) :: C
|
|
real*8 , dimension(dim) :: D
|
|
real*8 :: denominator, idenominator, update
|
|
integer*8 :: i, j, l, row
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
if (context == QMCKL_NULL_CONTEXT) then
|
|
info = QMCKL_INVALID_CONTEXT
|
|
return
|
|
endif
|
|
|
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
|
! matrices 'Updates' and 'Inverse'.
|
|
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
|
|
l = 1;
|
|
! For each update do...
|
|
do while (l < nupdates + 1)
|
|
|
|
! Compute C = S^{-1}U(l)
|
|
do i = 1, dim
|
|
C(i) = 0
|
|
do j = 1, dim
|
|
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
|
|
end do
|
|
end do
|
|
|
|
! Compute denominator = 1 + V(l)^TC
|
|
row = updates_index(l)
|
|
denominator = 1 + C(row)
|
|
|
|
! Return early if denominator is too small
|
|
if (abs(denominator) < breakdown) return
|
|
idenominator = 1 / denominator
|
|
|
|
! Update det(S)
|
|
determinant = determinant * denominator
|
|
|
|
! selecting column: v_l^T * S_inv
|
|
D = Inverse(row, :)
|
|
|
|
! A^{-1} = A^{-1} - C x D / denominator
|
|
do i = 1, dim
|
|
do j = 1, dim
|
|
update = C(i) * D(j) * idenominator
|
|
Inverse(i, j) = Inverse(i, j) - update
|
|
end do
|
|
end do
|
|
|
|
l = l + 1
|
|
end do
|
|
|
|
! Copy updated inverse back to s_inv
|
|
call copy_back_inv(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
end function qmckl_sm_naive_doc_f
|
|
#+end_src
|
|
|
|
**** C interface (not directly exposed)
|
|
The following Fortran function ~qmckl_sm_naive_doc~ makes sure
|
|
that the pedagogical kernel ~qmckl_sm_naive_doc_f~, written in
|
|
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sm_naive_doc~ will be exposed in the header file 'qmckl.h'
|
|
for C users and in the module file 'qmckl_f.F90' for Fortran users.
|
|
|
|
#+CALL: generate_c_interface(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
|
integer(c_int32_t) function qmckl_sm_naive_doc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C) result(info)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
integer(c_int32_t), external :: qmckl_sm_naive_doc_f
|
|
info = qmckl_sm_naive_doc_f &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant)
|
|
|
|
end function qmckl_sm_naive_doc
|
|
#+end_src
|
|
|
|
*** C headers (exposed in qmckl.h)
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) : comments org
|
|
qmckl_exit_code qmckl_sm_naive (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_private_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_private_func) :comments org
|
|
qmckl_exit_code qmckl_sm_naive_hpc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_sm_naive_doc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
*** C sources
|
|
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
|
#+begin_src c :tangle (eval c) :comments org
|
|
#include <stdbool.h>
|
|
#include <math.h>
|
|
#include "qmckl.h"
|
|
#include "config.h"
|
|
#include "assert.h"
|
|
#include "stdio.h"
|
|
|
|
#+end_src
|
|
~qmckl_sm_naive_hpc~ is a high performance variation of
|
|
~qmckl_sm_naive~ written in C. It is used in cases when ~Dim~ is
|
|
smaller than the leading dimension ~LDS~, irrespective of whetether ~LDS~
|
|
includes zero padding to benefit from SIMD instructions or not. Cases like this
|
|
include situations where one wants to apply updates to a square submatrix of the
|
|
full matrix.
|
|
It takes advantage of memory aligned data and assumes no data dependencies
|
|
inside the loops. The loops are fully vectorised whenever ~Dim~ is an integer
|
|
multiple of ~SIMD_LENGTH~.
|
|
#+begin_src c :tangle (eval c) :comments org
|
|
qmckl_exit_code qmckl_sm_naive_hpc(
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith( context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_naive_hpc",
|
|
NULL);
|
|
}
|
|
|
|
double __attribute__((aligned(8))) C[Dim];
|
|
double __attribute__((aligned(8))) D[LDS];
|
|
|
|
uint64_t l = 0;
|
|
// For each update
|
|
while (l < N_updates) {
|
|
// C = S^{-1} x u_l
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < Dim; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// Denominator: v_l^T * C
|
|
const int cui = Updates_index[l] - 1;
|
|
double den = 1.0f + C[cui];
|
|
|
|
if (fabs(den) < breakdown)
|
|
return QMCKL_FAILURE;
|
|
|
|
double iden = 1.0f / den;
|
|
|
|
// Update det(A)
|
|
if (determinant)
|
|
*determinant *= den;
|
|
|
|
// selecting column: v_l^T * S_inv
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < Dim; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// A^{-1} = A^{-1} - C x D / den
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < Dim; j++) {
|
|
const double update = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
~qmckl_exit_code qmckl_sm_naive_{Dim}~ is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively.
|
|
#+NAME:naive_template_code
|
|
#+begin_src c
|
|
static inline qmckl_exit_code qmckl_sm_naive_{Dim}(
|
|
const qmckl_context context,
|
|
const uint64_t N_updates,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_naive_{Dim}",
|
|
NULL);
|
|
}
|
|
|
|
#define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
|
|
|
double __attribute__((aligned(8))) C[{Dim}];
|
|
double __attribute__((aligned(8))) D[D{Dim}_P];
|
|
|
|
uint64_t l = 0;
|
|
// For each update
|
|
while (l < N_updates) {
|
|
// C = A^{-1} x U_l
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
C[i] = 0;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// Denominator
|
|
const int cui = Updates_index[l] - 1;
|
|
double den = 1.0f + C[cui];
|
|
|
|
if (fabs(den) < breakdown) {
|
|
return QMCKL_FAILURE;
|
|
}
|
|
double iden = 1.0f / den;
|
|
|
|
// Update det(A)
|
|
if (determinant)
|
|
*determinant *= den;
|
|
|
|
// selecting column: D = v_l^T * S_inv
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// A^{-1} = A^{-1} - C x D / den
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
double update = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
|
|
l += 1;
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
|
|
#+NAME:naive_kernel_generator
|
|
#+begin_src python :noweb yes
|
|
text="""
|
|
<<naive_template_code>>
|
|
"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim))
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
Python script that generated C switch cases that call individual kernel instances.
|
|
#+NAME:naive_switch-case_generator
|
|
#+begin_src python :noweb yes
|
|
text="""
|
|
case {Dim}:
|
|
return qmckl_sm_naive_{Dim}(context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim))
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
<<naive_kernel_generator()>>
|
|
#+end_src
|
|
~qmckl_sm_naive~ is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~.
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
qmckl_exit_code qmckl_sm_naive(const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_naive",
|
|
NULL);
|
|
}
|
|
|
|
#ifdef HAVE_HPC
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
switch (Dim) {
|
|
<<naive_switch-case_generator()>>
|
|
}
|
|
}
|
|
else { // Updating smaller sub-matrix
|
|
return qmckl_sm_naive_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
#else
|
|
return qmckl_sm_naive_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
#endif
|
|
|
|
return QMCKL_FAILURE;
|
|
}
|
|
#+end_src
|
|
|
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
|
:PROPERTIES:
|
|
:Name: qmckl_sm_naive
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_naive_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_naive &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_naive
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sm_naive_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_naive_hpc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_naive_hpc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sm_naive_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_naive_doc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_naive_doc
|
|
end interface
|
|
#+end_src
|
|
|
|
*** Performance
|
|
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
|
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
|
can never resolve a situation where applying the update causes singular behaviour.
|
|
*** Tests
|
|
The tests for the kernels are executed on datasets that are extracted from a run of
|
|
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
|
The tests are run such that the kernels reject the computed inverse whenever the computed
|
|
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
|
|
QMC=Chem. The tests will return QMCKL_SUCCESS whenever all the elements of the final matrix
|
|
$R=S.S^-1 - 1$ are smaller than the given tolerance value of 1e-3, and will return
|
|
QMCKL_FAILURE if the values are larger than this tolerance value.
|
|
|
|
#+begin_src c :tangle (eval c_test)
|
|
const uint64_t Dim = 21;
|
|
const uint64_t LDS = (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH;
|
|
const double breakdown = 1e-3;
|
|
const double tolerance = 1e-3;
|
|
double res[441];
|
|
|
|
#include "sm_test.h"
|
|
|
|
assert(Updates1 != NULL);
|
|
assert(Updates_index1 != NULL);
|
|
assert(Slater_inv1 != NULL);
|
|
|
|
// original determinant of Slater1 (before applying updates)
|
|
double det = 3.407025646103221e-10;
|
|
rc = qmckl_sm_naive(context,
|
|
LDS,
|
|
Dim,
|
|
N_updates1,
|
|
Updates1,
|
|
Updates_index1,
|
|
breakdown,
|
|
Slater_inv1,
|
|
&det);
|
|
|
|
// Check that the determinant is updated properly
|
|
assert(fabs(det + 4.120398385068217e-10) < 1e-15);
|
|
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
assert(rc == QMCKL_SUCCESS);
|
|
#+end_src
|
|
|
|
|
|
* Sherman-Morrison with Slagel Splitting (core)
|
|
** ~qmckl_sm_splitting_core~
|
|
:PROPERTIES:
|
|
:Name: qmckl_sm_splitting_core
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
*** Introduction
|
|
~qmckl_sm_splitting_core~ is the inner core part of 'Sherman-Morrison with update splitting' in the next section.
|
|
It is not normally used by itself but it is possible to use it nonetheless.
|
|
|
|
It has three extra parameters in its API:
|
|
- ~later_updates~ initially empty array that will contain the second halves of updates that were split during kernel execution
|
|
- ~later_index~ initially empty array that will contain the row/column numbers of the updates that were split during execution
|
|
- ~later~ initially zero integer that records the number of updates that were split during exection.
|
|
|
|
It is up to the user to decide what to do with these updates once the kernel returns. Normally ~qmckl_sm_splitting_core~ is
|
|
used as the core part of a recursive function, as is done in ~qmckl_sm_splitting~ or as part of a more complex
|
|
kernel like ~qmckl_sherman_morrison_smw32s~.
|
|
|
|
If the determinant is passed it will only be partially updated if there were any update splits.
|
|
|
|
*** API
|
|
#+NAME: qmckl_sm_splitting_core_args
|
|
| Variable | Type | In/Out | Description |
|
|
|-----------------+-------------------------+--------+---------------------------------------------------------------|
|
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
|
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
|
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
|
| ~N_updates~ | ~uint64_t~ | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
| ~Updates~ | ~double[LDS*N_updates]~ | in | Array containing the rank-1 updates |
|
|
| ~Updates_index~ | ~uint64_t[N_updates]~ | in | Array containing positions of the rank-1 updates |
|
|
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
|
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse Slater-matrix |
|
|
| ~later_updates~ | ~double[LDS*N_updates]~ | inout | Array containing the split updates for later |
|
|
| ~later_index~ | ~uint64_t[N_updates]~ | inout | Array containing the positions of the split updates for later |
|
|
| ~later~ | ~uint64_t~ | inout | Number of split updates for later |
|
|
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
|
|
|
*** Requirements
|
|
* ~LDS >= 2~
|
|
* ~Dim >= 2~
|
|
* ~N_updates >= 1~
|
|
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
* ~Updates_index~ is allocated with $N_updates$ elements
|
|
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
|
* ~later_updates~ is allocated with $later \times Dim$ elements
|
|
* ~later_index~ is allocated with $N_updates$ elements
|
|
* ~later >= 0~
|
|
|
|
*** Pedagogical kernel source (in Fortran)
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
|
|
#+begin_src f90 :tangle (eval f)
|
|
integer function qmckl_sm_splitting_core_doc_f( &
|
|
context, &
|
|
lds, dim, &
|
|
nupdates, &
|
|
upds, &
|
|
updates_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
later_upds, &
|
|
Later_index, &
|
|
Later, &
|
|
determinant) result(info)
|
|
|
|
use qmckl
|
|
implicit none
|
|
integer*8 , intent(in) :: context
|
|
integer*8 , intent(in) :: lds, dim
|
|
integer*8 , intent(in) :: nupdates
|
|
integer*8 , intent(in) :: updates_index(nupdates)
|
|
real*8 , intent(in) :: upds(lds * nupdates)
|
|
real*8 , intent(in) :: breakdown
|
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
|
real*8 , intent(inout) :: determinant
|
|
integer*8 , intent(inout) :: Later
|
|
integer*8 , intent(inout) :: Later_index(nupdates)
|
|
real*8 , intent(inout) :: later_upds(lds * nupdates)
|
|
|
|
real*8 , dimension(dim, nupdates) :: Updates
|
|
real*8 , dimension(dim, nupdates) :: Later_updates
|
|
real*8 , dimension(dim, dim) :: Inverse
|
|
real*8 , dimension(dim) :: C
|
|
real*8 , dimension(dim) :: D
|
|
real*8 :: denominator, idenominator, update
|
|
integer*8 :: i, j, l, row
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
if (context == QMCKL_NULL_CONTEXT) then
|
|
info = QMCKL_INVALID_CONTEXT
|
|
return
|
|
endif
|
|
|
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
|
! matrices 'Updates' and 'Inverse'.
|
|
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|
|
|
l = 1;
|
|
! For each update do...
|
|
do while (l < nupdates + 1)
|
|
|
|
! Compute C = S^{-1}U(l)
|
|
do i = 1, dim
|
|
C(i) = 0
|
|
do j = 1, dim
|
|
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
|
|
end do
|
|
end do
|
|
|
|
! Compute denominator = 1 + V(l)^TC
|
|
row = updates_index(l)
|
|
denominator = 1 + C(row)
|
|
|
|
! If denominator is too close to zero:
|
|
! - Split update in 2 before storing in Later_updates
|
|
! - Split previously computed vector C in 2
|
|
! - Recompute the denominator
|
|
if (abs(denominator) < breakdown) then
|
|
do i = 1, dim
|
|
Later_updates(i, l) = Updates(i, l) / 2
|
|
C(i) = C(i) / 2
|
|
end do
|
|
Later_index(Later + 1) = updates_index(l)
|
|
Later = Later + 1
|
|
denominator = 1 + C(row)
|
|
end if
|
|
|
|
idenominator = 1 / denominator
|
|
|
|
! Update det(S)
|
|
determinant = determinant * denominator
|
|
|
|
! selecting column: v_l^T * S_inv
|
|
D = Inverse(row, :)
|
|
|
|
! A^{-1} = A^{-1} - C x D / denominator
|
|
do i = 1, dim
|
|
do j = 1, dim
|
|
update = C(i) * D(j) * idenominator
|
|
Inverse(i, j) = Inverse(i, j) - update
|
|
end do
|
|
end do
|
|
|
|
l = l + 1
|
|
end do
|
|
|
|
! Copy updated inverse and later updates
|
|
! back to s_inv and later_upds
|
|
call copy_back_inv(Inverse, s_inv, lds, dim)
|
|
call copy_back_lu(Later_Updates, later_upds, lds, dim, nupdates)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
end function qmckl_sm_splitting_core_doc_f
|
|
#+end_src
|
|
|
|
**** C interface to the pedagogical kernel (not directly exposed)
|
|
The function ~qmckl_sm_splitting_core_doc~ makes sure that
|
|
~qmckl_sm_splitting_core_doc_f~ can be called from C using the
|
|
~ISO_C_BINDING~. Function ~qmckl_sm_splitting_core_doc~ will be
|
|
exposed in ~qmckl.h~ and ~qmckl_f.F90~, but
|
|
~qmckl_sm_splitting_core_doc_f~ will not.
|
|
|
|
#+CALL: generate_c_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
|
integer(c_int32_t) function qmckl_sm_splitting_core_doc &
|
|
(context, &
|
|
LDS, &
|
|
Dim, &
|
|
N_updates, &
|
|
Updates, &
|
|
Updates_index, &
|
|
breakdown, &
|
|
Slater_inv, &
|
|
later_updates, &
|
|
later_index, &
|
|
later, &
|
|
determinant) &
|
|
bind(C) result(info)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(LDS*N_updates)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: later_updates(LDS*N_updates)
|
|
integer (c_int64_t) , intent(inout) :: later_index(N_updates)
|
|
integer (c_int64_t) , intent(inout) :: later
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
integer(c_int32_t), external :: qmckl_sm_splitting_core_doc_f
|
|
info = qmckl_sm_splitting_core_doc_f &
|
|
(context, &
|
|
LDS, &
|
|
Dim, &
|
|
N_updates, &
|
|
Updates, &
|
|
Updates_index, &
|
|
breakdown, &
|
|
Slater_inv, &
|
|
later_updates, &
|
|
later_index, &
|
|
later, &
|
|
determinant)
|
|
|
|
end function qmckl_sm_splitting_core_doc
|
|
#+end_src
|
|
|
|
*** C headers (exposed in qmckl.h)
|
|
#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_sm_splitting_core (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* later_updates,
|
|
uint64_t* later_index,
|
|
uint64_t* later,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_private_func) :comments org
|
|
qmckl_exit_code qmckl_sm_splitting_core_hpc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* later_updates,
|
|
uint64_t* later_index,
|
|
uint64_t* later,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :no-expand comments org
|
|
qmckl_exit_code qmckl_sm_splitting_core_doc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* later_updates,
|
|
uint64_t* later_index,
|
|
uint64_t* later,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
*** C sources
|
|
#+begin_src c :tangle (eval c) :comments org
|
|
qmckl_exit_code qmckl_sm_splitting_core_hpc(
|
|
const qmckl_context context,
|
|
uint64_t LDS,
|
|
uint64_t Dim,
|
|
uint64_t N_updates,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict later_updates,
|
|
uint64_t* __restrict later_index,
|
|
uint64_t* __restrict later,
|
|
double* __restrict determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_splitting_core_hpc",
|
|
NULL);
|
|
}
|
|
|
|
double __attribute__((aligned(8))) C[LDS];
|
|
double __attribute__((aligned(8))) D[LDS];
|
|
|
|
uint64_t l = 0;
|
|
// For each update
|
|
while (l < N_updates) {
|
|
// C = S^{-1} x U_l
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
C[i] = 0.0f;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
|
}
|
|
}
|
|
|
|
// Denominator
|
|
const int cui = Updates_index[l] - 1;
|
|
double den = 1.0f + C[cui];
|
|
if (fabs(den) < breakdown) {
|
|
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
|
// second halve in later_updates
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t i = 0; i < LDS; i++) {
|
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // From here onwards we continue with applying the first halve of the
|
|
// update to Slater_inv
|
|
double iden = 1.0f / den;
|
|
|
|
if (determinant)
|
|
*determinant *= den;
|
|
|
|
// D = v^T x S^{-1} : 1 x LDS
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
D[j] = Slater_inv[cui * LDS + j];
|
|
}
|
|
|
|
// S^{-1} = S^{-1} - C x D / den
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
const double update = C[i] * D[j] * iden;
|
|
Slater_inv[i * LDS + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+NAME:slagel_splitting_template_code
|
|
#+begin_src c
|
|
static inline qmckl_exit_code qmckl_sm_splitting_core_{Dim}(
|
|
const qmckl_context context,
|
|
uint64_t N_updates,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict later_updates,
|
|
uint64_t* __restrict later_index,
|
|
uint64_t* __restrict later,
|
|
double* __restrict determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_splitting_core_{Dim}",
|
|
NULL);
|
|
}
|
|
|
|
double __attribute__((aligned(8))) C[D{Dim}_P];
|
|
double __attribute__((aligned(8))) D[D{Dim}_P];
|
|
|
|
uint64_t l = 0;
|
|
// For each update
|
|
while (l < N_updates) {
|
|
// C = S^{-1} x U_l
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
C[i] = 0.0f;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
// Denominator
|
|
const int cui = Updates_index[l] - 1;
|
|
double den = 1.0f + C[cui];
|
|
if (fabs(den) < breakdown) {
|
|
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
|
// second halve in later_updates
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t i = 0; i < D{Dim}_P; i++) {
|
|
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
|
C[i] *= 0.5f;
|
|
}
|
|
later_index[*later] = Updates_index[l];
|
|
(*later)++;
|
|
|
|
den = 1.0f + C[cui];
|
|
} // From here onwards we continue with applying the first halve of the
|
|
// update to Slater_inv
|
|
double iden = 1.0f / den;
|
|
|
|
if (determinant)
|
|
*determinant *= den;
|
|
|
|
// D = v^T x S^{-1} : 1 x D{Dim}_P
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
|
}
|
|
|
|
// S^{-1} = S^{-1} - C x D / den
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
const double update = C[i] * D[j] * iden;
|
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
|
}
|
|
}
|
|
l += 1;
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+NAME:slagel_splitting_kernel_generator
|
|
#+begin_src python :noweb yes
|
|
text="""
|
|
<<slagel_splitting_template_code>>
|
|
"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+NAME:slagel_splitting_switch-case_generator
|
|
#+begin_src python :noweb yes
|
|
text="""
|
|
case {Dim}: {
|
|
return qmckl_sm_splitting_core_{Dim}(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
<<slagel_splitting_kernel_generator()>>
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
qmckl_exit_code qmckl_sm_splitting_core(
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* later_updates,
|
|
uint64_t* later_index,
|
|
uint64_t* later,
|
|
double* determinant) {
|
|
|
|
#ifdef HAVE_HPC
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
switch (Dim) {
|
|
<<slagel_splitting_switch-case_generator()>>
|
|
default: {
|
|
assert(0 == 1 && "TEMPLATE NOT IMPLEMENTED!");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else { // Updating smaller sub-matrix
|
|
return qmckl_sm_splitting_core_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
}
|
|
#else
|
|
return qmckl_sm_splitting_core_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index,
|
|
later,
|
|
determinant);
|
|
#endif
|
|
|
|
return QMCKL_FAILURE;
|
|
}
|
|
#+end_src
|
|
|
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
|
:PROPERTIES:
|
|
:Name: qmckl_sm_splitting_core
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_core_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_splitting_core_hpc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, &
|
|
Slater_inv, later_updates, later_index, later, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: later_updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(inout) :: later_index(N_updates)
|
|
integer (c_int64_t) , intent(inout) :: later
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_splitting_core_hpc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_core_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_splitting_core_doc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, &
|
|
Slater_inv, later_updates, later_index, later, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: later_updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(inout) :: later_index(N_updates)
|
|
integer (c_int64_t) , intent(inout) :: later
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_splitting_core_doc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_splitting_core &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, &
|
|
Slater_inv, later_updates, later_index, later, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: later_updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(inout) :: later_index(N_updates)
|
|
integer (c_int64_t) , intent(inout) :: later
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_splitting_core
|
|
end interface
|
|
#+end_src
|
|
|
|
*** Performance
|
|
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
|
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
|
|
|
|
|
* Woodbury 2x2
|
|
** ~qmckl_woodbury_2x2~
|
|
:PROPERTIES:
|
|
:Name: qmckl_woodbury_2x2
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
*** Introduction
|
|
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
|
this algorithm is called the Woodbury Matrix Id
|
|
\[
|
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
|
\]
|
|
where
|
|
$S$ is the Slater-matrix
|
|
$U$ and $V$ are the matrices containing the updates and the canonical basis matrix
|
|
$S^{-1}$ is the inverse of the Slater-matrix
|
|
$C:= S^{-1}U$, a Dim $\times 2$ matrix
|
|
$B := 1 + VC$, the $2 \times 2$ matrix that is going to be inverted
|
|
$D := VS^{-1}$, a $2 \times Dim$ matrix
|
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
|
|
*** API
|
|
#+NAME: qmckl_woodbury_2x2_args
|
|
| Variable | Type | In/Out | Description |
|
|
|-----------------+-------------------+-------+-------------------------------------------------|
|
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
|
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
|
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
|
| ~Updates~ | ~double[2*Dim]~ | in | Array containing the updates |
|
|
| ~Updates_index~ | ~uint64_t[2]~ | in | Array containing the rank-1 updates |
|
|
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
|
| ~Slater_inv~ | ~double[LDS*Dim]~ | inout | Array containing the inverse of a Slater-matrix |
|
|
| ~determinant~ | ~double~ | inout | Determinant of Slater-matrix |
|
|
|
|
*** Requirements
|
|
- ~context~ is not ~qmckl_null_context~
|
|
- ~LDS >= 2~
|
|
- ~Dim >= 2~
|
|
- ~Updates~ is allocated with $2 \times Dim$ elements
|
|
- ~Updates_index~ is allocated with $2$ elements
|
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
|
|
|
*** Pedagogical kernel source (in Fortran)
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
|
|
#+begin_src f90 :tangle (eval f)
|
|
integer function qmckl_woodbury_2x2_doc_f(&
|
|
context, &
|
|
lds, dim, &
|
|
upds, &
|
|
updates_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
determinant) result(info)
|
|
|
|
use qmckl
|
|
implicit none
|
|
integer*8 , intent(in) :: context
|
|
integer*8 , intent(in) :: lds, dim
|
|
integer*8 , intent(in) :: updates_index(2)
|
|
real*8 , intent(in) :: upds(2 * lds)
|
|
real*8 , intent(in) :: breakdown
|
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
|
real*8 , intent(inout) :: determinant
|
|
|
|
integer*8 , dimension(2, dim) :: V
|
|
integer*8 , dimension(2, 2) :: Id
|
|
real*8 , dimension(dim, dim) :: Inverse
|
|
real*8 , dimension(dim, 2) :: Updates, C
|
|
real*8 , dimension(2, 2) :: D, invD
|
|
real*8 , dimension(2, dim) :: E, F
|
|
|
|
real*8 :: detD, idenominator, update
|
|
integer*8 :: i, j, k, l
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
if (context == QMCKL_NULL_CONTEXT) then
|
|
info = QMCKL_INVALID_CONTEXT
|
|
return
|
|
endif
|
|
|
|
! Construct V(2, dim) matrix
|
|
V = 0
|
|
V(1, updates_index(1)) = 1
|
|
V(2, updates_index(2)) = 1
|
|
|
|
! Construct Id(2, 2) matrix
|
|
Id = 0
|
|
Id(1, 1) = 1
|
|
Id(2, 2) = 1
|
|
|
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
|
! matrices 'Updates' and 'Inverse'.
|
|
call convert(upds, s_inv, Updates, Inverse, int(2,8), lds, dim)
|
|
|
|
! Compute C(dim, 2) = Inverse(dim, dim) x Updates(dim, 2)
|
|
C = 0
|
|
do i = 1, dim
|
|
do j = 1, 2
|
|
do k = 1, dim
|
|
C(i, j) = C(i, j) + Inverse(i, k) * Updates(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Construct matrix D(2, 2) := I(2, 2) + V(2, dim) x C(dim, 2)
|
|
D = 0
|
|
do i = 1, 2
|
|
do j = 1, 2
|
|
do k = 2, dim
|
|
D(i, j) = D(i, j) + V(i, k) * C(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
D = Id + D
|
|
|
|
! Compute determinant := det(D) explicitly
|
|
detD = D(1,1) * D(2,2) - D(1,2) * D(2,1)
|
|
|
|
! Return early if det(D) is too small
|
|
if (abs(detD) < breakdown) return
|
|
|
|
! Update det(S)
|
|
determinant = determinant * detD
|
|
|
|
! Compute inv(D) explicitly
|
|
invD(1,1) = D(2,2)
|
|
invD(1,2) = - D(1,2)
|
|
invD(2,1) = - D(2,1)
|
|
invD(2,2) = D(1,1)
|
|
invD = invD / detD
|
|
|
|
! Compute E(2, dim) := V(2, dim) x Inverse(dim, dim)
|
|
E = 0
|
|
do i = 1, 2
|
|
do j = 1, dim
|
|
do k = 1, dim
|
|
E(i, j) = E(i, j) + V(i, k) * Inverse(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Compute F(2, dim) := invD(2, 2) x E(2, dim)
|
|
F = 0
|
|
do i = 1, 2
|
|
do j = 1, dim
|
|
do k = 1, 2
|
|
F(i, j) = F(i, j) + invD(i, k) * E(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Compute Inverse(dim, dim) := Inverse(dim, dim) - C(dim, 2) x F(2, dim)
|
|
do i = 1, dim
|
|
do j = 1, dim
|
|
do k = 1, 2
|
|
Inverse(i, j) = Inverse(i, j) - C(i, k) * F(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Copy updated inverse and later updates
|
|
! back to s_inv and later_upds
|
|
call copy_back_inv(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
end function qmckl_woodbury_2x2_doc_f
|
|
#+end_src
|
|
|
|
**** C interface (not directly exposed)
|
|
The function ~qmckl_sm_splitting_core_doc~ makes sure that
|
|
~qmckl_sm_splitting_core_doc_f~ can be called from C using the
|
|
~ISO_C_BINDING~. Function ~qmckl_sm_splitting_core_doc~ will be
|
|
exposed in ~qmckl.h~ and ~qmckl_f.F90~, but
|
|
~qmckl_sm_splitting_core_doc_f~ will not.
|
|
|
|
#+CALL: generate_c_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_2x2_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
|
integer(c_int32_t) function qmckl_woodbury_2x2_doc &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C) result(info)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(2*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(2)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
integer(c_int32_t), external :: qmckl_woodbury_2x2_doc_f
|
|
info = qmckl_woodbury_2x2_doc_f &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant)
|
|
|
|
end function qmckl_woodbury_2x2_doc
|
|
#+end_src
|
|
|
|
*** C headers (exposed in qmckl.h)
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_woodbury_2x2 (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_2x2_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_woodbury_2x2_hpc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_2x2_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_woodbury_2x2_doc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
*** C sources
|
|
#+begin_src c :tangle (eval c) :comments org
|
|
qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict determinant) {
|
|
/*
|
|
C := S^{-1} * U, dim x 2
|
|
B := 1 + V * C, 2 x 2
|
|
D := V * S^{-1}, 2 x dim
|
|
*/
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_woodbury_2x2_hpc",
|
|
NULL);
|
|
}
|
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
|
|
// Compute C = (S^T)^{-1}U : Dim x 2
|
|
double __attribute__((aligned(8))) C[2 * Dim];
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t k = 0; k < LDS; k++) {
|
|
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
}
|
|
}
|
|
|
|
// Compute B = 1 + VC : 2 x 2
|
|
const double B0 = C[row1 * 2] + 1;
|
|
const double B1 = C[row1 * 2 + 1];
|
|
const double B2 = C[row2 * 2];
|
|
const double B3 = C[row2 * 2 + 1] + 1;
|
|
|
|
// Check if determinant of inverted matrix is not zero
|
|
double det = B0 * B3 - B1 * B2;
|
|
if (fabs(det) < breakdown) {
|
|
return QMCKL_FAILURE;
|
|
}
|
|
|
|
// Update det(S) when passed
|
|
if (determinant)
|
|
*determinant *= det;
|
|
|
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
|
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// tmp = B^{-1}D : 2 x LDS
|
|
double __attribute__((aligned(8))) tmp[2 * LDS];
|
|
double* r1dim = &(Slater_inv[row1 * LDS]);
|
|
double* r2dim = &(Slater_inv[row2 * LDS]);
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
|
}
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+NAME:woodbury_2x2_kernel_template
|
|
#+begin_src c
|
|
static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}(
|
|
const qmckl_context context,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict determinant) {
|
|
/*
|
|
C := S^{-1} * U, dim x 2
|
|
B := 1 + V * C, 2 x 2
|
|
D := V * S^{-1}, 2 x dim
|
|
*/
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_woodbury_2x2_{Dim}",
|
|
NULL);
|
|
}
|
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
|
|
// Compute C = (S^T)^{-1}U : {Dim} x 2
|
|
double __attribute__((aligned(8))) C[2 * {Dim}];
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
C[i * 2] = 0;
|
|
C[i * 2 + 1] = 0;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
|
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
}
|
|
}
|
|
|
|
// Compute B = 1 + VC : 2 x 2
|
|
const double B0 = C[row1 * 2] + 1;
|
|
const double B1 = C[row1 * 2 + 1];
|
|
const double B2 = C[row2 * 2];
|
|
const double B3 = C[row2 * 2 + 1] + 1;
|
|
|
|
// Check if determinant of inverted matrix is not zero
|
|
double det = B0 * B3 - B1 * B2;
|
|
if (fabs(det) < breakdown) {
|
|
return QMCKL_FAILURE;
|
|
}
|
|
|
|
// Update det(S) when passed
|
|
if (determinant)
|
|
*determinant *= det;
|
|
|
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
|
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
|
Binv[0] = idet * B3;
|
|
Binv[1] = -1.0 * idet * B1;
|
|
Binv[2] = -1.0 * idet * B2;
|
|
Binv[3] = idet * B0;
|
|
|
|
// tmp = B^{-1}D : 2 x D{Dim}_P
|
|
double __attribute__((aligned(8))) tmp[2 * D{Dim}_P];
|
|
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
|
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
|
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
|
}
|
|
|
|
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+NAME:woodbury_2x2_kernel_generator
|
|
#+begin_src python :noweb yes :exports none
|
|
text="""
|
|
<<woodbury_2x2_kernel_template>>
|
|
"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+NAME:woodbury_2x2_switch-case_generator
|
|
#+begin_src python :noweb yes :exports none
|
|
text="""
|
|
case {Dim}:
|
|
return qmckl_woodbury_2x2_{Dim}(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
<<woodbury_2x2_kernel_generator()>>
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_woodbury_2x2",
|
|
NULL);
|
|
}
|
|
|
|
#ifdef HAVE_HPC
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
switch (Dim) {
|
|
<<woodbury_2x2_switch-case_generator()>>
|
|
}
|
|
}
|
|
else { // Updating smaller sub-matrix
|
|
return qmckl_woodbury_2x2_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
#else
|
|
return qmckl_woodbury_2x2_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
// return qmckl_woodbury_2x2_hpc(
|
|
// context,
|
|
// LDS,
|
|
// Dim,
|
|
// Updates,
|
|
// Updates_index,
|
|
// breakdown,
|
|
// Slater_inv,
|
|
// determinant);
|
|
#endif
|
|
|
|
return QMCKL_FAILURE;
|
|
}
|
|
#+end_src
|
|
|
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
|
:PROPERTIES:
|
|
:Name: qmckl_woodbury_2x2
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_woodbury_2x2 &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(2*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(2)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_woodbury_2x2
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_2x2_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_woodbury_2x2_hpc &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(2*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(2)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_woodbury_2x2_hpc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_2x2_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_woodbury_2x2_doc &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(2*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(2)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_woodbury_2x2_doc
|
|
end interface
|
|
#+end_src
|
|
|
|
*** Performance
|
|
This function is most efficient when used in cases where there are only 2 rank-1 updates and
|
|
it is sure they will not result in a singular matrix.
|
|
|
|
*** Tests
|
|
#+begin_src c :tangle (eval c_test)
|
|
assert(Updates2 != NULL);
|
|
assert(Updates_index2 != NULL);
|
|
assert(Slater_inv2 != NULL);
|
|
det = -1.4432116661319376e-11;
|
|
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
|
assert(fabs(det-2.367058141251457e-10) < 1e-15);
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
assert(rc == QMCKL_SUCCESS);
|
|
#+end_src
|
|
|
|
|
|
* Woodbury 3x3
|
|
** ~qmckl_woodbury_3x3~
|
|
:PROPERTIES:
|
|
:Name: qmckl_woodbury_3x3
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
*** Introduction
|
|
The Woodbury 3x3 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
|
this algorithm is called the Woodbury Matrix Id
|
|
\[
|
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
|
\]
|
|
where
|
|
$S$ is the Slater-matrix
|
|
$U$ and $V$ are the matrices containing the updates and the canonical basis matrix
|
|
$S^{-1}$ is the inverse of the Slater-matrix
|
|
$C:= S^{-1}U$, a Dim $\times 3$ matrix
|
|
$B := 1 + VC$, the $3 \times 3$ matrix that is going to be inverted
|
|
$D := VS^{-1}$, a $3 \times Dim$ matrix
|
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
|
|
*** API
|
|
#+NAME: qmckl_woodbury_3x3_args
|
|
| Variable | Type | In/Out | Description |
|
|
|-----------------+-------------------+-------+-------------------------------------------------|
|
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
|
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
|
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
|
| ~Updates~ | ~double[3*Dim]~ | in | Array containing the updates |
|
|
| ~Updates_index~ | ~uint64_t[3]~ | in | Array containing the rank-1 updates |
|
|
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
|
| ~Slater_inv~ | ~double[LDS*Dim]~ | inout | Array containing the inverse of a Slater-matrix |
|
|
| ~determinant~ | ~double~ | inout | Determinant of Slater-matrix |
|
|
|
|
*** Requirements
|
|
- ~context~ is not ~qmckl_null_context~
|
|
- ~LDS >= 3~
|
|
- ~Dim >= 3~
|
|
- ~Updates~ is allocated with $3 \times Dim$ elements
|
|
- ~Updates_index~ is allocated with $3$ elements
|
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
|
|
|
*** Pedagogical kernel source (in Fortran)
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
|
|
#+begin_src f90 :tangle (eval f)
|
|
integer function qmckl_woodbury_3x3_doc_f(&
|
|
context, &
|
|
lds, dim, &
|
|
upds, &
|
|
updates_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
determinant) result(info)
|
|
|
|
use qmckl
|
|
implicit none
|
|
integer*8 , intent(in) :: context
|
|
integer*8 , intent(in) :: lds, dim
|
|
integer*8 , intent(in) :: updates_index(3)
|
|
real*8 , intent(in) :: upds(3 * lds)
|
|
real*8 , intent(in) :: breakdown
|
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
|
real*8 , intent(inout) :: determinant
|
|
|
|
integer*8 , dimension(3, dim) :: V
|
|
integer*8 , dimension(3, 3) :: Id
|
|
real*8 , dimension(dim, dim) :: Inverse
|
|
real*8 , dimension(dim, 3) :: Updates, C
|
|
real*8 , dimension(3, 3) :: D, invD
|
|
real*8 , dimension(3, dim) :: E, F
|
|
|
|
real*8 :: detD, idetD, idenominator, update
|
|
integer*8 :: i, j, k, l
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
if (context == QMCKL_NULL_CONTEXT) then
|
|
info = QMCKL_INVALID_CONTEXT
|
|
return
|
|
endif
|
|
|
|
! Construct V(3, dim) matrix
|
|
V = 0
|
|
V(1, updates_index(1)) = 1
|
|
V(2, updates_index(2)) = 1
|
|
V(3, updates_index(3)) = 1
|
|
|
|
! Construct Id(3, 3) matrix
|
|
Id = 0
|
|
Id(1, 1) = 1
|
|
Id(2, 2) = 1
|
|
Id(3, 3) = 1
|
|
|
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
|
! matrices 'Updates' and 'Inverse'.
|
|
call convert(upds, s_inv, Updates, Inverse, int(3,8), lds, dim)
|
|
|
|
! Compute C(dim, 3) = Inverse(dim, dim) x Updates(dim, 3)
|
|
C = 0
|
|
do i = 1, dim
|
|
do j = 1, 3
|
|
do k = 1, dim
|
|
C(i, j) = C(i, j) + Inverse(i, k) * Updates(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Construct matrix D(3, 3) := I(3, 3) + V(3, dim) x C(dim, 3)
|
|
D = 0
|
|
do i = 1, 3
|
|
do j = 1, 3
|
|
do k = 3, dim
|
|
D(i, j) = D(i, j) + V(i, k) * C(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
D = Id + D
|
|
|
|
! Compute determinant := det(D) explicitly
|
|
detD = D(1,1) * (D(2,2) * D(3,3) - D(2,3) * D(3,2)) - &
|
|
D(1,2) * (D(2,1) * D(3,3) - D(2,3) * D(3,1)) + &
|
|
D(1,3) * (D(2,1) * D(3,2) - D(2,2) * D(3,1))
|
|
|
|
! Return early if det(D) is too small
|
|
if (abs(detD) < breakdown) return
|
|
|
|
! Update det(S)
|
|
determinant = determinant * detD
|
|
|
|
idetD = 1.0d0 / detD
|
|
! Compute inv(D) explicitly
|
|
invD(1,1) = (D(2,2) * D(3,3) - D(3,2) * D(2,3)) * idetD
|
|
invD(1,2) = -(D(1,2) * D(3,3) - D(3,2) * D(1,3)) * idetD
|
|
invD(1,3) = (D(1,2) * D(2,3) - D(2,2) * D(1,3)) * idetD
|
|
invD(2,1) = -(D(2,1) * D(3,3) - D(3,1) * D(2,3)) * idetD
|
|
invD(2,2) = (D(1,1) * D(3,3) - D(3,1) * D(1,3)) * idetD
|
|
invD(2,3) = -(D(1,1) * D(2,3) - D(2,1) * D(1,3)) * idetD
|
|
invD(3,1) = (D(2,1) * D(3,2) - D(3,1) * D(2,2)) * idetD
|
|
invD(3,2) = -(D(1,1) * D(3,2) - D(3,1) * D(1,2)) * idetD
|
|
invD(3,3) = (D(1,1) * D(2,2) - D(2,1) * D(1,2)) * idetD
|
|
|
|
! Compute E(3, dim) := V(3, dim) x Inverse(dim, dim)
|
|
E = 0
|
|
do i = 1, 3
|
|
do j = 1, dim
|
|
do k = 1, dim
|
|
E(i, j) = E(i, j) + V(i, k) * Inverse(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Compute F(3, dim) := invD(3, 3) x E(3, dim)
|
|
F = 0
|
|
do i = 1, 3
|
|
do j = 1, dim
|
|
do k = 1, 3
|
|
F(i, j) = F(i, j) + invD(i, k) * E(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Compute Inverse(dim, dim) := Inverse(dim, dim) - C(dim, 3) x F(3, dim)
|
|
do i = 1, dim
|
|
do j = 1, dim
|
|
do k = 1, 3
|
|
Inverse(i, j) = Inverse(i, j) - C(i, k) * F(k, j)
|
|
end do
|
|
end do
|
|
end do
|
|
|
|
! Copy updated inverse and later updates
|
|
! back to s_inv and later_upds
|
|
call copy_back_inv(Inverse, s_inv, lds, dim)
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
end function qmckl_woodbury_3x3_doc_f
|
|
#+end_src
|
|
|
|
**** C interface (not directly exposed)
|
|
The function ~qmckl_sm_splitting_core_doc~ makes sure that
|
|
~qmckl_sm_splitting_core_doc_f~ can be called from C using the
|
|
~ISO_C_BINDING~. Function ~qmckl_sm_splitting_core_doc~ will be
|
|
exposed in ~qmckl.h~ and ~qmckl_f.F90~, but
|
|
~qmckl_sm_splitting_core_doc_f~ will not.
|
|
|
|
#+CALL: generate_c_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_3x3_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
|
integer(c_int32_t) function qmckl_woodbury_3x3_doc &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C) result(info)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
integer(c_int32_t), external :: qmckl_woodbury_3x3_doc_f
|
|
info = qmckl_woodbury_3x3_doc_f &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant)
|
|
|
|
end function qmckl_woodbury_3x3_doc
|
|
#+end_src
|
|
|
|
*** C headers (exposed in qmckl.h)
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_woodbury_3x3 (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_3x3_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_woodbury_3x3_hpc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_3x3_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_woodbury_3x3_doc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
*** C sources
|
|
#+begin_src c :tangle (eval c) :comments org
|
|
qmckl_exit_code qmckl_woodbury_3x3_hpc(const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict determinant) {
|
|
/*
|
|
C := S^{-1} * U, dim x 3
|
|
B := 1 + V * C, 3 x 3
|
|
D := V * S^{-1}, 3 x dim
|
|
*/
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_woodbury_3x3_hpc",
|
|
NULL);
|
|
}
|
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
const uint64_t row3 = (Updates_index[2] - 1);
|
|
|
|
// Compute C = (S^T)^{-1}U : Dim x 3
|
|
double __attribute__((aligned(8))) C[3 * Dim];
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t k = 0; k < LDS; k++) {
|
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
|
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
|
}
|
|
}
|
|
|
|
// Compute B = 1 + VC : 3 x 3
|
|
const double B0 = C[row1 * 3] + 1;
|
|
const double B1 = C[row1 * 3 + 1];
|
|
const double B2 = C[row1 * 3 + 2];
|
|
const double B3 = C[row2 * 3];
|
|
const double B4 = C[row2 * 3 + 1] + 1;
|
|
const double B5 = C[row2 * 3 + 2];
|
|
const double B6 = C[row3 * 3];
|
|
const double B7 = C[row3 * 3 + 1];
|
|
const double B8 = C[row3 * 3 + 2] + 1;
|
|
|
|
// Check if determinant of inverted matrix is not zero
|
|
double det;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
if (fabs(det) < breakdown) {
|
|
return QMCKL_FAILURE;
|
|
}
|
|
|
|
// Update det(S) when passed
|
|
if (determinant)
|
|
*determinant *= det;
|
|
|
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// tmp = B^{-1}D : 2 x LDS
|
|
double __attribute__((aligned(8))) tmp[3 * LDS];
|
|
double* r1dim = &(Slater_inv[row1 * LDS]);
|
|
double* r2dim = &(Slater_inv[row2 * LDS]);
|
|
double* r3dim = &(Slater_inv[row3 * LDS]);
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[LDS + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * LDS + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
|
for (uint64_t i = 0; i < Dim; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < LDS; j++) {
|
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
|
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
|
}
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+NAME:woodbury_3x3_kernel_template
|
|
#+begin_src c
|
|
static inline qmckl_exit_code qmckl_woodbury_3x3_{Dim}(
|
|
const qmckl_context context,
|
|
const double* __restrict Updates,
|
|
const uint64_t* __restrict Updates_index,
|
|
const double breakdown,
|
|
double* __restrict Slater_inv,
|
|
double* __restrict determinant) {
|
|
/*
|
|
C := S^{-1} * U, dim x 3
|
|
B := 1 + V * C, 3 x 3
|
|
D := V * S^{-1}, 3 x dim
|
|
,*/
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_woodbury_3x3_{Dim}",
|
|
NULL);
|
|
}
|
|
|
|
const uint64_t row1 = (Updates_index[0] - 1);
|
|
const uint64_t row2 = (Updates_index[1] - 1);
|
|
const uint64_t row3 = (Updates_index[2] - 1);
|
|
|
|
// Compute C = (S^T)^{-1}U : {Dim} x 3
|
|
double __attribute__((aligned(8))) C[3 * {Dim}];
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
C[i * 3] = 0;
|
|
C[i * 3 + 1] = 0;
|
|
C[i * 3 + 2] = 0;
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
|
C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
|
C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
|
C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k];
|
|
}
|
|
}
|
|
|
|
// Compute B = 1 + VC : 3 x 3
|
|
const double B0 = C[row1 * 3] + 1;
|
|
const double B1 = C[row1 * 3 + 1];
|
|
const double B2 = C[row1 * 3 + 2];
|
|
const double B3 = C[row2 * 3];
|
|
const double B4 = C[row2 * 3 + 1] + 1;
|
|
const double B5 = C[row2 * 3 + 2];
|
|
const double B6 = C[row3 * 3];
|
|
const double B7 = C[row3 * 3 + 1];
|
|
const double B8 = C[row3 * 3 + 2] + 1;
|
|
|
|
// Check if determinant of B is not too close to zero
|
|
double det;
|
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
|
B2 * (B3 * B7 - B4 * B6);
|
|
if (fabs(det) < breakdown) {
|
|
return QMCKL_FAILURE;
|
|
}
|
|
|
|
// Update det(Slater) if passed
|
|
if (determinant)
|
|
*determinant *= det;
|
|
|
|
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
|
|
|
// tmp = B^{-1}D : 3 x D{Dim}_P
|
|
double __attribute__((aligned(8))) tmp[3 * D{Dim}_P];
|
|
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
|
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
|
double* r3dim = &(Slater_inv[row3 * D{Dim}_P]);
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
|
tmp[D{Dim}_P + j] =
|
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
|
tmp[2 * D{Dim}_P + j] =
|
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
|
}
|
|
|
|
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
|
IVDEP
|
|
ALIGNED
|
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j];
|
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j];
|
|
}
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+NAME:woodbury_3x3_kernel_generator
|
|
#+begin_src python :noweb yes :exports none
|
|
text="""
|
|
<<woodbury_3x3_kernel_template>>
|
|
"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+NAME:woodbury_3x3_switch-case_generator
|
|
#+begin_src python :noweb yes :exports none
|
|
text="""
|
|
case {Dim}:
|
|
return qmckl_woodbury_3x3_{Dim}(context,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return ''.join(result)
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
<<woodbury_3x3_kernel_generator()>>
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
qmckl_exit_code qmckl_woodbury_3x3(const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_woodbury_3x3",
|
|
NULL);
|
|
}
|
|
|
|
#ifdef HAVE_HPC
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
|
switch (Dim) {
|
|
<<woodbury_3x3_switch-case_generator()>>
|
|
}
|
|
}
|
|
else { // Updating smaller sub-matrix
|
|
return qmckl_woodbury_3x3_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
}
|
|
#else
|
|
return qmckl_woodbury_3x3_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
// return qmckl_woodbury_3x3_hpc(
|
|
// context,
|
|
// LDS,
|
|
// Dim,
|
|
// Updates,
|
|
// Updates_index,
|
|
// breakdown,
|
|
// Slater_inv,
|
|
// determinant);
|
|
#endif
|
|
|
|
return QMCKL_FAILURE;
|
|
}
|
|
#+end_src
|
|
|
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
|
:PROPERTIES:
|
|
:Name: qmckl_woodbury_3x3
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_woodbury_3x3 &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_woodbury_3x3
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_3x3_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_woodbury_3x3_hpc &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_woodbury_3x3_hpc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_3x3_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_woodbury_3x3_doc &
|
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_woodbury_3x3_doc
|
|
end interface
|
|
#+end_src
|
|
|
|
*** Performance
|
|
This function is most efficient when used in cases where there are only 3 rank-1 updates and
|
|
it is sure they will not result in a singular matrix.
|
|
|
|
*** Tests
|
|
#+begin_src c :tangle (eval c_test)
|
|
assert(Updates3 != NULL);
|
|
assert(Updates_index3 != NULL);
|
|
assert(Slater_inv3_1 != NULL);
|
|
det = -1.23743195512859e-09;
|
|
rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
|
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
assert(rc == QMCKL_SUCCESS);
|
|
#+end_src
|
|
|
|
|
|
* Sherman-Morrison with Slagel Splitting
|
|
** ~qmckl_sm_splitting~
|
|
:PROPERTIES:
|
|
:Name: qmckl_sm_splitting
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
*** Introduction
|
|
This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator $1+v_j^T S^{-1} u_j$ in
|
|
the Sherman-Morrison formula is deemed to be too close to zero, the update $u_j$ is split in half:
|
|
$u_j \rightarrow \frac{1}{2} u_j$. One half is applied immediately --necessarily increasing the value of the
|
|
denominator because of the split-- while the other halve is put in a queue that will be applied when all the
|
|
remaining updates have been treated.
|
|
|
|
The kernel is executed recursively until the queue is eiter empty and all
|
|
updates are applied successfully, or the size of the queue equals the number of initial updates. In the last
|
|
case the Slater-matrix that would have resulted from applying the updates is singular and therefore the
|
|
kernel exits with an exit code.
|
|
|
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
|
from applying the updates to the original matrix.
|
|
|
|
*** API
|
|
#+NAME: qmckl_sm_splitting_args
|
|
| Variable | Type | In/Out | Description |
|
|
|---------------+-----------------------+--------+------------------------------------------------------|
|
|
| context | qmckl_context | in | Global state |
|
|
| LDS | uint64_t | in | Leading dimension of Slater_inv |
|
|
| Dim | uint64_t | in | Dimension of Slater_inv |
|
|
| N_updates | uint64_t | in | Number of rank-1 updates to be applied to Slater_inv |
|
|
| Updates | double[N_updates*LDS] | in | Array containing the updates |
|
|
| Updates_index | uint64_t[N_updates] | in | Array containing the rank-1 updates |
|
|
| breakdown | double | in | Break-down parameter on which to fail or not |
|
|
| Slater_inv | double[Dim*LDS] | inout | Array containing the inverse of a Slater-matrix |
|
|
| determinant | double | inout | Determinant of the Slater-matrix |
|
|
|
|
*** Requirements
|
|
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
|
* ~LDS >= 2~
|
|
* ~Dim >= 2~
|
|
* ~N_updates >= 1~
|
|
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
|
* ~Updates_index~ is allocated with $N_updates$ elements
|
|
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
|
|
|
*** Pedagogical kernel source (in Fortran)
|
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
|
not be used in real workloads.
|
|
|
|
#+begin_src f90 :tangle (eval f)
|
|
integer recursive function qmckl_sm_splitting_doc_f( &
|
|
context, &
|
|
lds, dim, &
|
|
nupdates, &
|
|
upds, &
|
|
updates_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
determinant) result(info)
|
|
|
|
use qmckl
|
|
implicit none
|
|
integer*8 , intent(in) :: context
|
|
integer*8 , intent(in) :: lds, dim
|
|
integer*8 , intent(in) :: nupdates
|
|
integer*8 , intent(in) :: updates_index(nupdates)
|
|
real*8 , intent(in) :: upds(lds * nupdates)
|
|
real*8 , intent(in) :: breakdown
|
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
|
real*8 , intent(inout) :: determinant
|
|
|
|
integer , external :: qmckl_sm_splitting_core_doc_f
|
|
|
|
integer*8 :: Later
|
|
integer*8 , dimension(nupdates) :: Later_index
|
|
real*8 , dimension(lds * nupdates) :: Later_updates
|
|
|
|
info = QMCKL_FAILURE
|
|
|
|
if (context == QMCKL_NULL_CONTEXT) then
|
|
info = QMCKL_INVALID_CONTEXT
|
|
return
|
|
endif
|
|
|
|
Later = 0
|
|
Later_index = 0
|
|
Later_updates = 0
|
|
|
|
info = qmckl_sm_splitting_core_doc_f( &
|
|
context, &
|
|
lds, dim, &
|
|
nupdates, &
|
|
upds, &
|
|
updates_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
Later_updates, &
|
|
Later_index, &
|
|
Later, &
|
|
determinant)
|
|
|
|
if (Later > 0) then
|
|
info = qmckl_sm_splitting_doc_f( &
|
|
context, &
|
|
lds, dim, &
|
|
Later, &
|
|
Later_updates, &
|
|
Later_index, &
|
|
breakdown, &
|
|
s_inv, &
|
|
determinant)
|
|
end if
|
|
|
|
info = QMCKL_SUCCESS
|
|
|
|
end function qmckl_sm_splitting_doc_f
|
|
#+end_src
|
|
|
|
**** C interface to the pedagogical kernel (not directly exposed)
|
|
The following Fortran function ~qmckl_sm_splitting_core_doc~ makes sure
|
|
that the pedagogical kernel ~qmckl_sm_splitting_core_doc_f~, written in
|
|
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function
|
|
~qmckl_sm_splitting_core_doc~ will be exposed in the header file 'qmckl.h'
|
|
for C users and in the module file 'qmckl_f.F90' for Fortran users.
|
|
|
|
#+CALL: generate_c_interface(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
|
integer(c_int32_t) function qmckl_sm_splitting_doc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C) result(info)
|
|
|
|
use, intrinsic :: iso_c_binding
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
integer(c_int32_t), external :: qmckl_sm_splitting_doc_f
|
|
info = qmckl_sm_splitting_doc_f &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant)
|
|
|
|
end function qmckl_sm_splitting_doc
|
|
#+end_src
|
|
|
|
*** C headers (exposed in qmckl.h)
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_sm_splitting (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_private_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_private_func) :comments org
|
|
qmckl_exit_code qmckl_sm_splitting_hpc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
#+CALL: generate_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src c :tangle (eval h_func) :comments org
|
|
qmckl_exit_code qmckl_sm_splitting_doc (
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant );
|
|
#+end_src
|
|
|
|
*** C source
|
|
#+NAME:splitting_switch-case_generator
|
|
#+begin_src python :noweb yes
|
|
text="""
|
|
case {Dim}: {
|
|
rc = qmckl_sm_splitting_core_{Dim}(
|
|
context,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
later_updates,
|
|
later_index, &later, determinant);
|
|
break;
|
|
}
|
|
"""
|
|
result = []
|
|
for Dim in <<kernel_generator_range>>:
|
|
Dim=str(Dim)
|
|
result.append(text.replace("{Dim}",Dim) )
|
|
|
|
return '\n'.join(result)
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
|
qmckl_exit_code qmckl_sm_splitting_hpc(
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_splitting_hpc",
|
|
NULL);
|
|
}
|
|
|
|
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
|
uint64_t later_index[N_updates];
|
|
uint64_t later = 0;
|
|
|
|
qmckl_exit_code rc;
|
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) {
|
|
switch (Dim) {
|
|
<<splitting_switch-case_generator()>>
|
|
default: {
|
|
assert(0 == 1 && "TEMPLATE NOT IMPLEMENTED!");
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
rc = qmckl_sm_splitting_core_hpc(
|
|
context, LDS, Dim, N_updates, Updates, Updates_index,
|
|
breakdown, Slater_inv, later_updates,
|
|
later_index, &later, determinant);
|
|
}
|
|
if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE;
|
|
|
|
if (later > 0) {
|
|
qmckl_exit_code rc = qmckl_sm_splitting_hpc(
|
|
context, LDS, Dim, later,
|
|
later_updates, later_index,
|
|
breakdown, Slater_inv, determinant);
|
|
if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE;
|
|
}
|
|
|
|
return QMCKL_SUCCESS;
|
|
}
|
|
#+end_src
|
|
|
|
#+begin_src c :tangle (eval c) :comment org
|
|
qmckl_exit_code qmckl_sm_splitting(
|
|
const qmckl_context context,
|
|
const uint64_t LDS,
|
|
const uint64_t Dim,
|
|
const uint64_t N_updates,
|
|
const double* Updates,
|
|
const uint64_t* Updates_index,
|
|
const double breakdown,
|
|
double* Slater_inv,
|
|
double* determinant) {
|
|
|
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
|
return qmckl_failwith(
|
|
context,
|
|
QMCKL_NULL_CONTEXT,
|
|
"qmckl_sm_splitting",
|
|
NULL);
|
|
}
|
|
#ifdef HAVE_HPC
|
|
return qmckl_sm_splitting_hpc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
#else
|
|
return qmckl_sm_splitting_doc(
|
|
context,
|
|
LDS,
|
|
Dim,
|
|
N_updates,
|
|
Updates,
|
|
Updates_index,
|
|
breakdown,
|
|
Slater_inv,
|
|
determinant);
|
|
#endif
|
|
|
|
}
|
|
#+end_src
|
|
|
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
|
:PROPERTIES:
|
|
:Name: qmckl_sm_naive
|
|
:CRetType: qmckl_exit_code
|
|
:FRetType: qmckl_exit_code
|
|
:END:
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_hpc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_splitting_hpc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_splitting_hpc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_doc")
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_splitting_doc &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_splitting_doc
|
|
end interface
|
|
#+end_src
|
|
|
|
#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
|
|
|
#+RESULTS:
|
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
|
interface
|
|
integer(c_int32_t) function qmckl_sm_splitting &
|
|
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
|
bind(C)
|
|
use, intrinsic :: iso_c_binding
|
|
import
|
|
implicit none
|
|
|
|
integer (c_int64_t) , intent(in) , value :: context
|
|
integer (c_int64_t) , intent(in) , value :: LDS
|
|
integer (c_int64_t) , intent(in) , value :: Dim
|
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
|
real (c_double ) , intent(in) , value :: breakdown
|
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
|
real (c_double ) , intent(inout) :: determinant
|
|
|
|
end function qmckl_sm_splitting
|
|
end interface
|
|
#+end_src
|
|
|
|
*** Performance...
|
|
This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high.
|
|
|
|
*** Test
|
|
#+begin_src c :tangle (eval c_test)
|
|
assert(Updates3 != NULL);
|
|
assert(Updates_index3 != NULL);
|
|
assert(Slater_inv3_2 != NULL);
|
|
det = -1.23743195512859e-09;
|
|
rc = qmckl_sm_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
|
|
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
res[i * Dim + j] = 0;
|
|
for (unsigned int k = 0; k < Dim; k++) {
|
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
|
|
}
|
|
}
|
|
}
|
|
rc = QMCKL_SUCCESS;
|
|
for (unsigned int i = 0; i < Dim; i++) {
|
|
for (unsigned int j = 0; j < Dim; j++) {
|
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
|
rc = QMCKL_FAILURE;
|
|
}
|
|
}
|
|
}
|
|
assert(rc == QMCKL_SUCCESS);
|
|
#+end_src
|
|
|
|
|
|
|
|
* End of files
|
|
|
|
#+begin_src c :comments link :tangle (eval c_test)
|
|
assert (qmckl_context_destroy(context) == QMCKL_SUCCESS);
|
|
return 0;
|
|
|
|
}
|
|
#+end_src
|
|
|
|
# -*- mode: org -*-
|
|
# vim: syntax=c
|