1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 10:06:09 +01:00

Added WB2 kernel back.

This commit is contained in:
Francois Coppens 2023-02-27 13:33:53 +01:00
parent db13db8afa
commit 883416d84c

View File

@ -89,15 +89,15 @@ from applying the updates to the original matrix.
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix | | ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
*** Requirements *** Requirements
* ~context~ is not ~QMCKL_NULL_CONTEXT~ - ~context~ is not ~QMCKL_NULL_CONTEXT~
* ~LDS >= 2~ - ~LDS >= 2~
* ~Dim >= 2~ - ~Dim >= 2~
* ~N_updates >= 1~ - ~N_updates >= 1~
* ~Updates~ is allocated with $N_updates \times Dim$ elements - ~Updates~ is allocated with $N_updates \times Dim$ elements
* ~Updates_index~ is allocated with $N_updates$ elements - ~Updates_index~ is allocated with $N_updates$ elements
* ~breakdown~ is a small number such that $0 < breakdown << 1$ - ~breakdown~ is a small number such that $0 < breakdown << 1$
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements - ~Slater_inv~ is allocated with $Dim \times Dim$ elements
* ~determinant > 0~ - ~determinant > 0~
*** Pedagogical kernel source (in Fortran) *** Pedagogical kernel source (in Fortran)
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
@ -251,7 +251,7 @@ integer function qmckl_sm_naive_doc_f(context, &
end function qmckl_sm_naive_doc_f end function qmckl_sm_naive_doc_f
#+end_src #+end_src
**** C interface to the pedagogical kernel (not directly exposed) **** C interface (not directly exposed)
The following Fortran function ~qmckl_sm_naive_doc~ makes sure The following Fortran function ~qmckl_sm_naive_doc~ makes sure
that the pedagogical kernel ~qmckl_sm_naive_doc_f~, written in that the pedagogical kernel ~qmckl_sm_naive_doc_f~, written in
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sm_naive_doc~ will be exposed in the header file 'qmckl.h' Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sm_naive_doc~ will be exposed in the header file 'qmckl.h'
@ -1418,6 +1418,382 @@ This function cannot be used by itself and is used in Sherman-Morrison with upda
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels. with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
* Woodbury 2x2
** ~qmckl_woodbury_2x2~
:PROPERTIES:
:Name: qmckl_woodbury_2x2
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
*** Introduction
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
this algorithm is called the Woodbury Matrix Identity
\[
(S + U V)^{-1} = S^{-1} - C B^{-1} D
\]
where
$S$ is the Slater-matrix
$U$ and $V$ are the matrices containing the updates and the canonical basis matrix
$S^{-1}$ is the inverse of the Slater-matrix
$C:= S^{-1}U$, a Dim $\times 2$ matrix
$B := 1 + VC$, the $2 \times 2$ matrix that is going to be inverted
$D := VS^{-1}$, a $2 \times Dim$ matrix
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
from applying the updates to the original matrix.
*** API
#+NAME: qmckl_woodbury_2x2_args
| Variable | Type | In/Out | Description |
|-----------------+-------------------+-------+-------------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
| ~Updates~ | ~double[2*Dim]~ | in | Array containing the updates |
| ~Updates_index~ | ~uint64_t[2]~ | in | Array containing the rank-1 updates |
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
| ~Slater_inv~ | ~double[LDS*Dim]~ | inout | Array containing the inverse of a Slater-matrix |
| ~determinant~ | ~double~ | inout | Determinant of Slater-matrix |
*** Requirements
- ~context~ is not ~qmckl_null_context~
- ~LDS >= 2~
- ~Dim >= 2~
- ~Updates~ is allocated with $2 \times Dim$ elements
- ~Updates_index~ is allocated with $2$ elements
- ~breakdown~ is a small number such that $0 < breakdown << 1$
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
*** Pedagogical kernel source (in Fortran)
**** C interface (not directly exposed)
*** C headers (exposed in qmckl.h)
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_2x2 (
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant );
#+end_src
*** C sources
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* __restrict Slater_inv,
double* __restrict determinant) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
D := V * S^{-1}, 2 x dim
*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2_hpc",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = (S^T)^{-1}U : Dim x 2
double __attribute__((aligned(8))) C[2 * Dim];
for (uint64_t i = 0; i < Dim; i++) {
C[i * 2] = 0;
C[i * 2 + 1] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < LDS; k++) {
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
}
}
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
const double B3 = C[row2 * 2 + 1] + 1;
// Check if determinant of inverted matrix is not zero
double det = B0 * B3 - B1 * B2;
if (fabs(det) < breakdown) {
return QMCKL_FAILURE;
}
// Update det(S) when passed
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// tmp = B^{-1}D : 2 x LDS
double __attribute__((aligned(8))) tmp[2 * LDS];
double* r1dim = &(Slater_inv[row1 * LDS]);
double* r2dim = &(Slater_inv[row2 * LDS]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint64_t i = 0; i < Dim; i++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
}
}
return QMCKL_SUCCESS;
}
#+end_src
#+NAME:woodbury_2x2_kernel_template
#+begin_src c
static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}(
const qmckl_context context,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* __restrict Slater_inv,
double* __restrict determinant) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
D := V * S^{-1}, 2 x dim
*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2_{Dim}",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = (S^T)^{-1}U : {Dim} x 2
double __attribute__((aligned(8))) C[2 * {Dim}];
for (uint64_t i = 0; i < {Dim}; i++) {
C[i * 2] = 0;
C[i * 2 + 1] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < D{Dim}_P; k++) {
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
}
}
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
const double B3 = C[row2 * 2 + 1] + 1;
// Check if determinant of inverted matrix is not zero
double det = B0 * B3 - B1 * B2;
if (fabs(det) < breakdown) {
return QMCKL_FAILURE;
}
// Update det(S) when passed
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// tmp = B^{-1}D : 2 x D{Dim}_P
double __attribute__((aligned(8))) tmp[2 * D{Dim}_P];
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
for (uint64_t i = 0; i < {Dim}; i++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
}
}
return QMCKL_SUCCESS;
}
#+end_src
#+NAME:woodbury_2x2_kernel_generator
#+begin_src python :noweb yes :exports none
text="""
<<woodbury_2x2_kernel_template>>
"""
result = []
for Dim in <<kernel_generator_range>>:
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return ''.join(result)
#+end_src
#+NAME:woodbury_2x2_switch-case_generator
#+begin_src python :noweb yes :exports none
text="""
case {Dim}:
return qmckl_woodbury_2x2_{Dim}(context,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);"""
result = []
for Dim in <<kernel_generator_range>>:
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return ''.join(result)
#+end_src
#+begin_src c :tangle (eval c) :comments org :noweb yes
<<woodbury_2x2_kernel_generator()>>
qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2",
NULL);
}
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
switch (Dim) {
<<woodbury_2x2_switch-case_generator()>>
}
}
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
return qmckl_woodbury_2x2_hpc(context,
LDS,
Dim,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
}
return QMCKL_FAILURE;
}
#+end_src
*** Fortran interfaces (exposed in qmckl_f.F90)
:PROPERTIES:
:Name: qmckl_woodbury_2x2
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_2x2 &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: LDS
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(2*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(2)
real (c_double ) , intent(in) , value :: breakdown
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_woodbury_2x2
end interface
#+end_src
*** Performance
This function is most efficient when used in cases where there are only 2 rank-1 updates and
it is sure they will not result in a singular matrix.
*** Tests
#+begin_src c :tangle (eval c_test)
assert(Updates2 != NULL);
assert(Updates_index2 != NULL);
assert(Slater_inv2 != NULL);
det = -1.4432116661319376e-11;
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
assert(fabs(det-2.367058141251457e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
}
}
}
rc = QMCKL_SUCCESS;
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
rc = QMCKL_FAILURE;
}
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
rc = QMCKL_FAILURE;
}
}
}
assert(rc == QMCKL_SUCCESS);
#+end_src
* Sherman-Morrison with Slagel Splitting * Sherman-Morrison with Slagel Splitting
** ~qmckl_sm_splitting~ ** ~qmckl_sm_splitting~
:PROPERTIES: :PROPERTIES: