1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 18:16:28 +01:00

Merge pull request #109 from fmgjcoppens/wb2x2_doc

Wb2x2 doc kernel
This commit is contained in:
Anthony Scemama 2023-07-13 09:44:31 +02:00 committed by GitHub
commit 38a7c3ba6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 689 additions and 61 deletions

View File

@ -31,6 +31,9 @@ This is the range that determines the how many high performance kernel instantce
#+begin_src python :noweb yes :exports none #+begin_src python :noweb yes :exports none
range(2, 22) range(2, 22)
#+end_src #+end_src
#+RESULTS: kernel_generator_range
: None
* Naïve Sherman-Morrison * Naïve Sherman-Morrison
@ -89,15 +92,15 @@ from applying the updates to the original matrix.
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix | | ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
*** Requirements *** Requirements
* ~context~ is not ~QMCKL_NULL_CONTEXT~ - ~context~ is not ~QMCKL_NULL_CONTEXT~
* ~LDS >= 2~ - ~LDS >= 2~
* ~Dim >= 2~ - ~Dim >= 2~
* ~N_updates >= 1~ - ~N_updates >= 1~
* ~Updates~ is allocated with $N_updates \times Dim$ elements - ~Updates~ is allocated with $N_updates \times Dim$ elements
* ~Updates_index~ is allocated with $N_updates$ elements - ~Updates_index~ is allocated with $N_updates$ elements
* ~breakdown~ is a small number such that $0 < breakdown << 1$ - ~breakdown~ is a small number such that $0 < breakdown << 1$
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements - ~Slater_inv~ is allocated with $Dim \times Dim$ elements
* ~determinant > 0~ - ~determinant > 0~
*** Pedagogical kernel source (in Fortran) *** Pedagogical kernel source (in Fortran)
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
@ -110,21 +113,21 @@ subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
integer*8 , intent(in) :: lds, dim, nupdates integer*8 , intent(in) :: lds, dim, nupdates
real*8 , intent(in) :: upds(nupdates * lds) real*8 , intent(in) :: upds(nupdates * lds)
real*8 , intent(in) :: s_inv(dim * lds) real*8 , intent(in) :: s_inv(dim * lds)
real*8 , intent(out) , dimension(lds, nupdates) :: Updates real*8 , intent(out) , dimension(dim, nupdates) :: Updates
real*8 , intent(out) , dimension(dim, lds) :: Inverse real*8 , intent(out) , dimension(dim, dim) :: Inverse
integer*8 :: i, j integer*8 :: i, j
! Construct Updates: lds x nupdates ! Construct Updates: lds x nupdates
do i = 1, nupdates do i = 1, nupdates
do j = 1, lds do j = 1, dim
Updates(j, i) = upds((i - 1) * lds + j) Updates(j, i) = upds((i - 1) * lds + j)
end do end do
end do end do
! Construct Inverse: dim x lds ! Construct Inverse: dim x lds
do i = 1, dim do i = 1, dim
do j = 1, lds do j = 1, dim
Inverse(i, j) = s_inv((i - 1) * lds + j) Inverse(i, j) = s_inv((i - 1) * lds + j)
end do end do
end do end do
@ -135,14 +138,14 @@ end subroutine convert
subroutine copy_back_inv(Inverse, s_inv, lds, dim) subroutine copy_back_inv(Inverse, s_inv, lds, dim)
implicit none implicit none
integer*8 , intent(in) :: lds, dim integer*8 , intent(in) :: lds, dim
real*8 , intent(in) , dimension(dim, lds) :: Inverse real*8 , intent(in) , dimension(dim, dim) :: Inverse
real*8 , intent(out) :: s_inv(dim * lds) real*8 , intent(out) :: s_inv(dim * lds)
integer*8 :: i, j integer*8 :: i, j
! Copy updated inverse back to s_inv ! Copy updated inverse back to s_inv
do i = 1, dim do i = 1, dim
do j = 1, lds do j = 1, dim
s_inv((i - 1) * lds + j) = Inverse(i, j) s_inv((i - 1) * lds + j) = Inverse(i, j)
end do end do
end do end do
@ -150,17 +153,17 @@ end subroutine copy_back_inv
#+end_src #+end_src
#+begin_src f90 :tangle (eval f) :comment org :exports none #+begin_src f90 :tangle (eval f) :comment org :exports none
subroutine copy_back_lu(Later_updates, later_upds, lds, nupdates) subroutine copy_back_lu(Later_updates, later_upds, lds, dim, nupdates)
implicit none implicit none
integer*8 , intent(in) :: lds, nupdates integer*8 , intent(in) :: lds, dim, nupdates
real*8 , intent(in) , dimension(lds, nupdates) :: Later_updates real*8 , intent(in) , dimension(dim, nupdates) :: Later_updates
real*8 , intent(out) :: later_upds(nupdates * lds) real*8 , intent(out) :: later_upds(nupdates * lds)
integer*8 :: i, j integer*8 :: i, j
! Copy updated inverse back to s_inv ! Copy updated inverse back to s_inv
do i = 1, nupdates do i = 1, nupdates
do j = 1, lds do j = 1, dim
later_upds((i - 1) * lds + j) = Later_updates(j, i) later_upds((i - 1) * lds + j) = Later_updates(j, i)
end do end do
end do end do
@ -188,10 +191,10 @@ integer function qmckl_sm_naive_doc_f(context, &
real*8 , intent(inout) :: s_inv(dim * lds) real*8 , intent(inout) :: s_inv(dim * lds)
real*8 , intent(inout) :: determinant real*8 , intent(inout) :: determinant
real*8 , dimension(lds, nupdates) :: Updates real*8 , dimension(dim, nupdates) :: Updates
real*8 , dimension(dim, lds) :: Inverse real*8 , dimension(dim, dim) :: Inverse
real*8 , dimension(dim) :: C real*8 , dimension(dim) :: C
real*8 , dimension(lds) :: D real*8 , dimension(dim) :: D
real*8 :: denominator, idenominator, update real*8 :: denominator, idenominator, update
integer*8 :: i, j, l, row integer*8 :: i, j, l, row
@ -251,7 +254,7 @@ integer function qmckl_sm_naive_doc_f(context, &
end function qmckl_sm_naive_doc_f end function qmckl_sm_naive_doc_f
#+end_src #+end_src
**** C interface to the pedagogical kernel (not directly exposed) **** C interface (not directly exposed)
The following Fortran function ~qmckl_sm_naive_doc~ makes sure The following Fortran function ~qmckl_sm_naive_doc~ makes sure
that the pedagogical kernel ~qmckl_sm_naive_doc_f~, written in that the pedagogical kernel ~qmckl_sm_naive_doc_f~, written in
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sm_naive_doc~ will be exposed in the header file 'qmckl.h' Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sm_naive_doc~ will be exposed in the header file 'qmckl.h'
@ -843,16 +846,14 @@ integer function qmckl_sm_splitting_core_doc_f( &
integer*8 , intent(inout) :: Later_index(nupdates) integer*8 , intent(inout) :: Later_index(nupdates)
real*8 , intent(inout) :: later_upds(lds * nupdates) real*8 , intent(inout) :: later_upds(lds * nupdates)
real*8 , dimension(lds, nupdates) :: Updates real*8 , dimension(dim, nupdates) :: Updates
real*8 , dimension(lds, nupdates) :: Later_updates real*8 , dimension(dim, nupdates) :: Later_updates
real*8 , dimension(dim, lds) :: Inverse real*8 , dimension(dim, dim) :: Inverse
real*8 , dimension(dim) :: C real*8 , dimension(dim) :: C
real*8 , dimension(lds) :: D real*8 , dimension(dim) :: D
real*8 :: denominator, idenominator, update real*8 :: denominator, idenominator, update
integer*8 :: i, j, l, row integer*8 :: i, j, l, row
write(*,*) "Entering 'qmckl_sm_splittinig_core_doc_f'"
info = QMCKL_FAILURE info = QMCKL_FAILURE
if (context == QMCKL_NULL_CONTEXT) then if (context == QMCKL_NULL_CONTEXT) then
@ -864,7 +865,6 @@ integer function qmckl_sm_splitting_core_doc_f( &
! matrices 'Updates' and 'Inverse'. ! matrices 'Updates' and 'Inverse'.
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim) call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
l = 1; l = 1;
! For each update do... ! For each update do...
do while (l < nupdates + 1) do while (l < nupdates + 1)
@ -917,12 +917,10 @@ integer function qmckl_sm_splitting_core_doc_f( &
! Copy updated inverse and later updates ! Copy updated inverse and later updates
! back to s_inv and later_upds ! back to s_inv and later_upds
call copy_back_inv(Inverse, s_inv, lds, dim) call copy_back_inv(Inverse, s_inv, lds, dim)
call copy_back_lu(Later_Updates, later_upds, lds, nupdates) call copy_back_lu(Later_Updates, later_upds, lds, dim, nupdates)
info = QMCKL_SUCCESS info = QMCKL_SUCCESS
write(*,*) "Leaving 'qmckl_sm_splittinig_core_doc_f'"
end function qmckl_sm_splitting_core_doc_f end function qmckl_sm_splitting_core_doc_f
#+end_src #+end_src
@ -1383,7 +1381,7 @@ interface
end interface end interface
#+end_src #+end_src
#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_core") #+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS: #+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none #+begin_src f90 :tangle (eval fh_func) :comments org :exports none
@ -1418,6 +1416,649 @@ This function cannot be used by itself and is used in Sherman-Morrison with upda
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels. with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
* Woodbury 2x2
** ~qmckl_woodbury_2x2~
:PROPERTIES:
:Name: qmckl_woodbury_2x2
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
*** Introduction
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
this algorithm is called the Woodbury Matrix Id
\[
(S + U V)^{-1} = S^{-1} - C B^{-1} D
\]
where
$S$ is the Slater-matrix
$U$ and $V$ are the matrices containing the updates and the canonical basis matrix
$S^{-1}$ is the inverse of the Slater-matrix
$C:= S^{-1}U$, a Dim $\times 2$ matrix
$B := 1 + VC$, the $2 \times 2$ matrix that is going to be inverted
$D := VS^{-1}$, a $2 \times Dim$ matrix
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
from applying the updates to the original matrix.
*** API
#+NAME: qmckl_woodbury_2x2_args
| Variable | Type | In/Out | Description |
|-----------------+-------------------+-------+-------------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
| ~Updates~ | ~double[2*Dim]~ | in | Array containing the updates |
| ~Updates_index~ | ~uint64_t[2]~ | in | Array containing the rank-1 updates |
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
| ~Slater_inv~ | ~double[LDS*Dim]~ | inout | Array containing the inverse of a Slater-matrix |
| ~determinant~ | ~double~ | inout | Determinant of Slater-matrix |
*** Requirements
- ~context~ is not ~qmckl_null_context~
- ~LDS >= 2~
- ~Dim >= 2~
- ~Updates~ is allocated with $2 \times Dim$ elements
- ~Updates_index~ is allocated with $2$ elements
- ~breakdown~ is a small number such that $0 < breakdown << 1$
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
*** Pedagogical kernel source (in Fortran)
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
not be used in real workloads.
#+begin_src f90 :tangle (eval f)
integer function qmckl_woodbury_2x2_doc_f(&
context, &
lds, dim, &
upds, &
updates_index, &
breakdown, &
s_inv, &
determinant) result(info)
use qmckl
implicit none
integer*8 , intent(in) :: context
integer*8 , intent(in) :: lds, dim
integer*8 , intent(in) :: updates_index(2)
real*8 , intent(in) :: upds(2 * lds)
real*8 , intent(in) :: breakdown
real*8 , intent(inout) :: s_inv(dim * lds)
real*8 , intent(inout) :: determinant
integer*8 , dimension(2, dim) :: V
integer*8 , dimension(2, 2) :: Id
real*8 , dimension(dim, dim) :: Inverse
real*8 , dimension(dim, 2) :: Updates, C
real*8 , dimension(2, 2) :: D, invD
real*8 , dimension(2, dim) :: E, F
real*8 :: detD, idenominator, update
integer*8 :: i, j, k, l
info = QMCKL_FAILURE
if (context == QMCKL_NULL_CONTEXT) then
info = QMCKL_INVALID_CONTEXT
return
endif
! Construct V(2, dim) matrix
V = 0
V(1, updates_index(1)) = 1
V(2, updates_index(2)) = 1
! Construct Id(2, 2) matrix
Id = 0
Id(1, 1) = 1
Id(2, 2) = 1
! Convert 'upds' and 's_inv' into the more easily readable Fortran
! matrices 'Updates' and 'Inverse'.
call convert(upds, s_inv, Updates, Inverse, int(2,8), lds, dim)
! Compute C(dim, 2) = Inverse(dim, dim) x Updates(dim, 2)
C = 0
do i = 1, dim
do j = 1, 2
do k = 1, dim
C(i, j) = C(i, j) + Inverse(i, k) * Updates(k, j)
end do
end do
end do
! Construct matrix D(2, 2) := I(2, 2) + V(2, dim) x C(dim, 2)
D = 0
do i = 1, 2
do j = 1, 2
do k = 2, dim
D(i, j) = D(i, j) + V(i, k) * C(k, j)
end do
end do
end do
D = Id + D
! Compute determinant := det(D) explicitly
detD = D(1,1) * D(2,2) - D(1,2) * D(2,1)
! Return early if det(D) is too small
if (abs(detD) < breakdown) return
! Update det(S)
determinant = determinant * detD
! Compute inv(D) explicitly
invD(1,1) = D(2,2)
invD(1,2) = - D(1,2)
invD(2,1) = - D(2,1)
invD(2,2) = D(1,1)
invD = invD / detD
! Compute E(2, dim) := V(2, dim) x Inverse(dim, dim)
E = 0
do i = 1, 2
do j = 1, dim
do k = 1, dim
E(i, j) = E(i, j) + V(i, k) * Inverse(k, j)
end do
end do
end do
! Compute F(2, dim) := invD(2, 2) x E(2, dim)
F = 0
do i = 1, 2
do j = 1, dim
do k = 1, 2
F(i, j) = F(i, j) + invD(i, k) * E(k, j)
end do
end do
end do
! Compute Inverse(dim, dim) := Inverse(dim, dim) - C(dim, 2) x F(2, dim)
do i = 1, dim
do j = 1, dim
do k = 1, 2
Inverse(i, j) = Inverse(i, j) - C(i, k) * F(k, j)
end do
end do
end do
! Copy updated inverse and later updates
! back to s_inv and later_upds
call copy_back_inv(Inverse, s_inv, lds, dim)
info = QMCKL_SUCCESS
end function qmckl_woodbury_2x2_doc_f
#+end_src
**** C interface (not directly exposed)
The function ~qmckl_sm_splitting_core_doc~ makes sure that
~qmckl_sm_splitting_core_doc_f~ can be called from C using the
~ISO_C_BINDING~. Function ~qmckl_sm_splitting_core_doc~ will be
exposed in ~qmckl.h~ and ~qmckl_f.F90~, but
~qmckl_sm_splitting_core_doc_f~ will not.
#+CALL: generate_c_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_2x2_doc")
#+RESULTS:
#+begin_src f90 :tangle (eval f) :comments org :exports none
integer(c_int32_t) function qmckl_woodbury_2x2_doc &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C) result(info)
use, intrinsic :: iso_c_binding
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: LDS
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(2*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(2)
real (c_double ) , intent(in) , value :: breakdown
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
integer(c_int32_t), external :: qmckl_woodbury_2x2_doc_f
info = qmckl_woodbury_2x2_doc_f &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant)
end function qmckl_woodbury_2x2_doc
#+end_src
*** C headers (exposed in qmckl.h)
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_2x2 (
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant );
#+end_src
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_2x2_hpc")
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_2x2_hpc (
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant );
#+end_src
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_2x2_doc")
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_2x2_doc (
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant );
#+end_src
*** C sources
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* __restrict Slater_inv,
double* __restrict determinant) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
D := V * S^{-1}, 2 x dim
*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2_hpc",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = (S^T)^{-1}U : Dim x 2
double __attribute__((aligned(8))) C[2 * Dim];
for (uint64_t i = 0; i < Dim; i++) {
C[i * 2] = 0;
C[i * 2 + 1] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < LDS; k++) {
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
}
}
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
const double B3 = C[row2 * 2 + 1] + 1;
// Check if determinant of inverted matrix is not zero
double det = B0 * B3 - B1 * B2;
if (fabs(det) < breakdown) {
return QMCKL_FAILURE;
}
// Update det(S) when passed
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// tmp = B^{-1}D : 2 x LDS
double __attribute__((aligned(8))) tmp[2 * LDS];
double* r1dim = &(Slater_inv[row1 * LDS]);
double* r2dim = &(Slater_inv[row2 * LDS]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint64_t i = 0; i < Dim; i++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
}
}
return QMCKL_SUCCESS;
}
#+end_src
#+NAME:woodbury_2x2_kernel_template
#+begin_src c
static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}(
const qmckl_context context,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* __restrict Slater_inv,
double* __restrict determinant) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
D := V * S^{-1}, 2 x dim
*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2_{Dim}",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = (S^T)^{-1}U : {Dim} x 2
double __attribute__((aligned(8))) C[2 * {Dim}];
for (uint64_t i = 0; i < {Dim}; i++) {
C[i * 2] = 0;
C[i * 2 + 1] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < D{Dim}_P; k++) {
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
}
}
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
const double B3 = C[row2 * 2 + 1] + 1;
// Check if determinant of inverted matrix is not zero
double det = B0 * B3 - B1 * B2;
if (fabs(det) < breakdown) {
return QMCKL_FAILURE;
}
// Update det(S) when passed
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// tmp = B^{-1}D : 2 x D{Dim}_P
double __attribute__((aligned(8))) tmp[2 * D{Dim}_P];
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
for (uint64_t i = 0; i < {Dim}; i++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
}
}
return QMCKL_SUCCESS;
}
#+end_src
#+NAME:woodbury_2x2_kernel_generator
#+begin_src python :noweb yes :exports none
text="""
<<woodbury_2x2_kernel_template>>
"""
result = []
for Dim in <<kernel_generator_range>>:
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return ''.join(result)
#+end_src
#+NAME:woodbury_2x2_switch-case_generator
#+begin_src python :noweb yes :exports none
text="""
case {Dim}:
return qmckl_woodbury_2x2_{Dim}(context,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);"""
result = []
for Dim in <<kernel_generator_range>>:
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return ''.join(result)
#+end_src
#+begin_src c :tangle (eval c) :comments org :noweb yes
<<woodbury_2x2_kernel_generator()>>
#+end_src
#+begin_src c :tangle (eval c) :comments org :noweb yes
qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(
context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2",
NULL);
}
#ifdef HAVE_HPC
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
switch (Dim) {
<<woodbury_2x2_switch-case_generator()>>
}
}
else { // Updating smaller sub-matrix
return qmckl_woodbury_2x2_hpc(
context,
LDS,
Dim,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
}
#else
return qmckl_woodbury_2x2_doc(
context,
LDS,
Dim,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
// return qmckl_woodbury_2x2_hpc(
// context,
// LDS,
// Dim,
// Updates,
// Updates_index,
// breakdown,
// Slater_inv,
// determinant);
#endif
return QMCKL_FAILURE;
}
#+end_src
*** Fortran interfaces (exposed in qmckl_f.F90)
:PROPERTIES:
:Name: qmckl_woodbury_2x2
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_2x2 &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: LDS
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(2*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(2)
real (c_double ) , intent(in) , value :: breakdown
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_woodbury_2x2
end interface
#+end_src
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_2x2_hpc")
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_2x2_hpc &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: LDS
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(2*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(2)
real (c_double ) , intent(in) , value :: breakdown
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_woodbury_2x2_hpc
end interface
#+end_src
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_2x2_doc")
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_2x2_doc &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: LDS
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(2*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(2)
real (c_double ) , intent(in) , value :: breakdown
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_woodbury_2x2_doc
end interface
#+end_src
*** Performance
This function is most efficient when used in cases where there are only 2 rank-1 updates and
it is sure they will not result in a singular matrix.
*** Tests
#+begin_src c :tangle (eval c_test)
assert(Updates2 != NULL);
assert(Updates_index2 != NULL);
assert(Slater_inv2 != NULL);
det = -1.4432116661319376e-11;
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
assert(fabs(det-2.367058141251457e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
}
}
}
rc = QMCKL_SUCCESS;
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
rc = QMCKL_FAILURE;
}
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
rc = QMCKL_FAILURE;
}
}
}
assert(rc == QMCKL_SUCCESS);
#+end_src
* Sherman-Morrison with Slagel Splitting * Sherman-Morrison with Slagel Splitting
** ~qmckl_sm_splitting~ ** ~qmckl_sm_splitting~
:PROPERTIES: :PROPERTIES:
@ -1498,8 +2139,6 @@ integer recursive function qmckl_sm_splitting_doc_f( &
integer*8 , dimension(nupdates) :: Later_index integer*8 , dimension(nupdates) :: Later_index
real*8 , dimension(lds * nupdates) :: Later_updates real*8 , dimension(lds * nupdates) :: Later_updates
write(*,*) "Entering 'qmckl_sm_splitting_doc_f'"
info = QMCKL_FAILURE info = QMCKL_FAILURE
if (context == QMCKL_NULL_CONTEXT) then if (context == QMCKL_NULL_CONTEXT) then
@ -1538,8 +2177,6 @@ integer recursive function qmckl_sm_splitting_doc_f( &
info = QMCKL_SUCCESS info = QMCKL_SUCCESS
write(*,*) "Leaving 'qmckl_sm_splitting_doc_f'"
end function qmckl_sm_splitting_doc_f end function qmckl_sm_splitting_doc_f
#+end_src #+end_src
@ -1565,21 +2202,16 @@ integer(c_int32_t) function qmckl_sm_splitting_doc &
integer (c_int64_t) , intent(in) , value :: LDS integer (c_int64_t) , intent(in) , value :: LDS
integer (c_int64_t) , intent(in) , value :: Dim integer (c_int64_t) , intent(in) , value :: Dim
integer (c_int64_t) , intent(in) , value :: N_updates integer (c_int64_t) , intent(in) , value :: N_updates
real (c_double ) , intent(in) :: Updates(LDS*N_updates) real (c_double ) , intent(in) :: Updates(N_updates*LDS)
integer (c_int64_t) , intent(in) :: Updates_index(N_updates) integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
real (c_double ) , intent(in) , value :: breakdown real (c_double ) , intent(in) , value :: breakdown
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
real (c_double ) , intent(inout) :: determinant real (c_double ) , intent(inout) :: determinant
integer(c_int32_t), external :: qmckl_sm_splitting_doc_f integer(c_int32_t), external :: qmckl_sm_splitting_doc_f
write(*,*) "Entering 'qmckl_sm_splitting_doc'"
info = qmckl_sm_splitting_doc_f & info = qmckl_sm_splitting_doc_f &
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant)
write(*,*) "Leaving 'qmckl_sm_splitting_doc'"
end function qmckl_sm_splitting_doc end function qmckl_sm_splitting_doc
#+end_src #+end_src
@ -1722,8 +2354,6 @@ qmckl_exit_code qmckl_sm_splitting(
const double breakdown, const double breakdown,
double* Slater_inv, double* Slater_inv,
double* determinant) { double* determinant) {
printf("Entering 'qmckl_sm_splitting'\n");
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith( return qmckl_failwith(
@ -1756,8 +2386,6 @@ qmckl_exit_code qmckl_sm_splitting(
determinant); determinant);
#endif #endif
printf("Leaving 'qmckl_sm_splitting'\n");
return QMCKL_SUCCESS; return QMCKL_SUCCESS;
} }
#+end_src #+end_src
@ -1821,7 +2449,7 @@ interface
end interface end interface
#+end_src #+end_src
#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting") #+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS: #+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none #+begin_src f90 :tangle (eval fh_func) :comments org :exports none

File diff suppressed because one or more lines are too long