mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 04:14:49 +01:00
Added woodbury 3x3 HPC and DOC kernels back. They pass the tests.
This commit is contained in:
parent
81be8263a0
commit
44323468e6
@ -2059,6 +2059,698 @@ assert(rc == QMCKL_SUCCESS);
|
|||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
* Woodbury 3x3
|
||||||
|
** ~qmckl_woodbury_3x3~
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_woodbury_3x3
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
*** Introduction
|
||||||
|
The Woodbury 3x3 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
||||||
|
this algorithm is called the Woodbury Matrix Id
|
||||||
|
\[
|
||||||
|
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
||||||
|
\]
|
||||||
|
where
|
||||||
|
$S$ is the Slater-matrix
|
||||||
|
$U$ and $V$ are the matrices containing the updates and the canonical basis matrix
|
||||||
|
$S^{-1}$ is the inverse of the Slater-matrix
|
||||||
|
$C:= S^{-1}U$, a Dim $\times 3$ matrix
|
||||||
|
$B := 1 + VC$, the $3 \times 3$ matrix that is going to be inverted
|
||||||
|
$D := VS^{-1}$, a $3 \times Dim$ matrix
|
||||||
|
|
||||||
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||||
|
from applying the updates to the original matrix.
|
||||||
|
|
||||||
|
*** API
|
||||||
|
#+NAME: qmckl_woodbury_3x3_args
|
||||||
|
| Variable | Type | In/Out | Description |
|
||||||
|
|-----------------+-------------------+-------+-------------------------------------------------|
|
||||||
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
|
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
||||||
|
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
||||||
|
| ~Updates~ | ~double[3*Dim]~ | in | Array containing the updates |
|
||||||
|
| ~Updates_index~ | ~uint64_t[3]~ | in | Array containing the rank-1 updates |
|
||||||
|
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
||||||
|
| ~Slater_inv~ | ~double[LDS*Dim]~ | inout | Array containing the inverse of a Slater-matrix |
|
||||||
|
| ~determinant~ | ~double~ | inout | Determinant of Slater-matrix |
|
||||||
|
|
||||||
|
*** Requirements
|
||||||
|
- ~context~ is not ~qmckl_null_context~
|
||||||
|
- ~LDS >= 3~
|
||||||
|
- ~Dim >= 3~
|
||||||
|
- ~Updates~ is allocated with $3 \times Dim$ elements
|
||||||
|
- ~Updates_index~ is allocated with $3$ elements
|
||||||
|
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||||
|
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||||
|
|
||||||
|
*** Pedagogical kernel source (in Fortran)
|
||||||
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
||||||
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
||||||
|
not be used in real workloads.
|
||||||
|
|
||||||
|
#+begin_src f90 :tangle (eval f)
|
||||||
|
integer function qmckl_woodbury_3x3_doc_f(&
|
||||||
|
context, &
|
||||||
|
lds, dim, &
|
||||||
|
upds, &
|
||||||
|
updates_index, &
|
||||||
|
breakdown, &
|
||||||
|
s_inv, &
|
||||||
|
determinant) result(info)
|
||||||
|
|
||||||
|
use qmckl
|
||||||
|
implicit none
|
||||||
|
integer*8 , intent(in) :: context
|
||||||
|
integer*8 , intent(in) :: lds, dim
|
||||||
|
integer*8 , intent(in) :: updates_index(3)
|
||||||
|
real*8 , intent(in) :: upds(3 * lds)
|
||||||
|
real*8 , intent(in) :: breakdown
|
||||||
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
||||||
|
real*8 , intent(inout) :: determinant
|
||||||
|
|
||||||
|
integer*8 , dimension(3, dim) :: V
|
||||||
|
integer*8 , dimension(3, 3) :: Id
|
||||||
|
real*8 , dimension(dim, dim) :: Inverse
|
||||||
|
real*8 , dimension(dim, 3) :: Updates, C
|
||||||
|
real*8 , dimension(3, 3) :: D, invD
|
||||||
|
real*8 , dimension(3, dim) :: E, F
|
||||||
|
|
||||||
|
real*8 :: detD, idetD, idenominator, update
|
||||||
|
integer*8 :: i, j, k, l
|
||||||
|
|
||||||
|
info = QMCKL_FAILURE
|
||||||
|
|
||||||
|
if (context == QMCKL_NULL_CONTEXT) then
|
||||||
|
info = QMCKL_INVALID_CONTEXT
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Construct V(3, dim) matrix
|
||||||
|
V = 0
|
||||||
|
V(1, updates_index(1)) = 1
|
||||||
|
V(2, updates_index(2)) = 1
|
||||||
|
V(3, updates_index(3)) = 1
|
||||||
|
|
||||||
|
! Construct Id(3, 3) matrix
|
||||||
|
Id = 0
|
||||||
|
Id(1, 1) = 1
|
||||||
|
Id(2, 2) = 1
|
||||||
|
Id(3, 3) = 1
|
||||||
|
|
||||||
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
||||||
|
! matrices 'Updates' and 'Inverse'.
|
||||||
|
call convert(upds, s_inv, Updates, Inverse, int(3,8), lds, dim)
|
||||||
|
|
||||||
|
! Compute C(dim, 3) = Inverse(dim, dim) x Updates(dim, 3)
|
||||||
|
C = 0
|
||||||
|
do i = 1, dim
|
||||||
|
do j = 1, 3
|
||||||
|
do k = 1, dim
|
||||||
|
C(i, j) = C(i, j) + Inverse(i, k) * Updates(k, j)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
|
! Construct matrix D(3, 3) := I(3, 3) + V(3, dim) x C(dim, 3)
|
||||||
|
D = 0
|
||||||
|
do i = 1, 3
|
||||||
|
do j = 1, 3
|
||||||
|
do k = 3, dim
|
||||||
|
D(i, j) = D(i, j) + V(i, k) * C(k, j)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
D = Id + D
|
||||||
|
|
||||||
|
! Compute determinant := det(D) explicitly
|
||||||
|
detD = D(1,1) * (D(2,2) * D(3,3) - D(2,3) * D(3,2)) - &
|
||||||
|
D(1,2) * (D(2,1) * D(3,3) - D(2,3) * D(3,1)) + &
|
||||||
|
D(1,3) * (D(2,1) * D(3,2) - D(2,2) * D(3,1))
|
||||||
|
|
||||||
|
! Return early if det(D) is too small
|
||||||
|
if (abs(detD) < breakdown) return
|
||||||
|
|
||||||
|
! Update det(S)
|
||||||
|
determinant = determinant * detD
|
||||||
|
|
||||||
|
idetD = 1.0d0 / detD
|
||||||
|
! Compute inv(D) explicitly
|
||||||
|
invD(1,1) = (D(2,2) * D(3,3) - D(3,2) * D(2,3)) * idetD
|
||||||
|
invD(1,2) = -(D(1,2) * D(3,3) - D(3,2) * D(1,3)) * idetD
|
||||||
|
invD(1,3) = (D(1,2) * D(2,3) - D(2,2) * D(1,3)) * idetD
|
||||||
|
invD(2,1) = -(D(2,1) * D(3,3) - D(3,1) * D(2,3)) * idetD
|
||||||
|
invD(2,2) = (D(1,1) * D(3,3) - D(3,1) * D(1,3)) * idetD
|
||||||
|
invD(2,3) = -(D(1,1) * D(2,3) - D(2,1) * D(1,3)) * idetD
|
||||||
|
invD(3,1) = (D(2,1) * D(3,2) - D(3,1) * D(2,2)) * idetD
|
||||||
|
invD(3,2) = -(D(1,1) * D(3,2) - D(3,1) * D(1,2)) * idetD
|
||||||
|
invD(3,3) = (D(1,1) * D(2,2) - D(2,1) * D(1,2)) * idetD
|
||||||
|
|
||||||
|
! Compute E(3, dim) := V(3, dim) x Inverse(dim, dim)
|
||||||
|
E = 0
|
||||||
|
do i = 1, 3
|
||||||
|
do j = 1, dim
|
||||||
|
do k = 1, dim
|
||||||
|
E(i, j) = E(i, j) + V(i, k) * Inverse(k, j)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
|
! Compute F(3, dim) := invD(3, 3) x E(3, dim)
|
||||||
|
F = 0
|
||||||
|
do i = 1, 3
|
||||||
|
do j = 1, dim
|
||||||
|
do k = 1, 3
|
||||||
|
F(i, j) = F(i, j) + invD(i, k) * E(k, j)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
|
! Compute Inverse(dim, dim) := Inverse(dim, dim) - C(dim, 3) x F(3, dim)
|
||||||
|
do i = 1, dim
|
||||||
|
do j = 1, dim
|
||||||
|
do k = 1, 3
|
||||||
|
Inverse(i, j) = Inverse(i, j) - C(i, k) * F(k, j)
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
|
||||||
|
! Copy updated inverse and later updates
|
||||||
|
! back to s_inv and later_upds
|
||||||
|
call copy_back_inv(Inverse, s_inv, lds, dim)
|
||||||
|
|
||||||
|
info = QMCKL_SUCCESS
|
||||||
|
|
||||||
|
end function qmckl_woodbury_3x3_doc_f
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
**** C interface (not directly exposed)
|
||||||
|
The function ~qmckl_sm_splitting_core_doc~ makes sure that
|
||||||
|
~qmckl_sm_splitting_core_doc_f~ can be called from C using the
|
||||||
|
~ISO_C_BINDING~. Function ~qmckl_sm_splitting_core_doc~ will be
|
||||||
|
exposed in ~qmckl.h~ and ~qmckl_f.F90~, but
|
||||||
|
~qmckl_sm_splitting_core_doc_f~ will not.
|
||||||
|
|
||||||
|
#+CALL: generate_c_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_3x3_doc")
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
||||||
|
integer(c_int32_t) function qmckl_woodbury_3x3_doc &
|
||||||
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||||
|
bind(C) result(info)
|
||||||
|
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer (c_int64_t) , intent(in) , value :: context
|
||||||
|
integer (c_int64_t) , intent(in) , value :: LDS
|
||||||
|
integer (c_int64_t) , intent(in) , value :: Dim
|
||||||
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
||||||
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
||||||
|
real (c_double ) , intent(in) , value :: breakdown
|
||||||
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||||
|
real (c_double ) , intent(inout) :: determinant
|
||||||
|
|
||||||
|
integer(c_int32_t), external :: qmckl_woodbury_3x3_doc_f
|
||||||
|
info = qmckl_woodbury_3x3_doc_f &
|
||||||
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant)
|
||||||
|
|
||||||
|
end function qmckl_woodbury_3x3_doc
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** C headers (exposed in qmckl.h)
|
||||||
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
qmckl_exit_code qmckl_woodbury_3x3 (
|
||||||
|
const qmckl_context context,
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const double* Updates,
|
||||||
|
const uint64_t* Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* Slater_inv,
|
||||||
|
double* determinant );
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_3x3_hpc")
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
qmckl_exit_code qmckl_woodbury_3x3_hpc (
|
||||||
|
const qmckl_context context,
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const double* Updates,
|
||||||
|
const uint64_t* Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* Slater_inv,
|
||||||
|
double* determinant );
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname="qmckl_woodbury_3x3_doc")
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
qmckl_exit_code qmckl_woodbury_3x3_doc (
|
||||||
|
const qmckl_context context,
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const double* Updates,
|
||||||
|
const uint64_t* Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* Slater_inv,
|
||||||
|
double* determinant );
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** C sources
|
||||||
|
#+begin_src c :tangle (eval c) :comments org
|
||||||
|
qmckl_exit_code qmckl_woodbury_3x3_hpc(const qmckl_context context,
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const double* __restrict Updates,
|
||||||
|
const uint64_t* __restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* __restrict Slater_inv,
|
||||||
|
double* __restrict determinant) {
|
||||||
|
/*
|
||||||
|
C := S^{-1} * U, dim x 3
|
||||||
|
B := 1 + V * C, 3 x 3
|
||||||
|
D := V * S^{-1}, 3 x dim
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||||
|
return qmckl_failwith(context,
|
||||||
|
QMCKL_NULL_CONTEXT,
|
||||||
|
"qmckl_woodbury_3x3_hpc",
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint64_t row1 = (Updates_index[0] - 1);
|
||||||
|
const uint64_t row2 = (Updates_index[1] - 1);
|
||||||
|
const uint64_t row3 = (Updates_index[2] - 1);
|
||||||
|
|
||||||
|
// Compute C = (S^T)^{-1}U : Dim x 3
|
||||||
|
double __attribute__((aligned(8))) C[3 * Dim];
|
||||||
|
for (uint64_t i = 0; i < Dim; i++) {
|
||||||
|
C[i * 3] = 0;
|
||||||
|
C[i * 3 + 1] = 0;
|
||||||
|
C[i * 3 + 2] = 0;
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t k = 0; k < LDS; k++) {
|
||||||
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
||||||
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||||
|
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute B = 1 + VC : 3 x 3
|
||||||
|
const double B0 = C[row1 * 3] + 1;
|
||||||
|
const double B1 = C[row1 * 3 + 1];
|
||||||
|
const double B2 = C[row1 * 3 + 2];
|
||||||
|
const double B3 = C[row2 * 3];
|
||||||
|
const double B4 = C[row2 * 3 + 1] + 1;
|
||||||
|
const double B5 = C[row2 * 3 + 2];
|
||||||
|
const double B6 = C[row3 * 3];
|
||||||
|
const double B7 = C[row3 * 3 + 1];
|
||||||
|
const double B8 = C[row3 * 3 + 2] + 1;
|
||||||
|
|
||||||
|
// Check if determinant of inverted matrix is not zero
|
||||||
|
double det;
|
||||||
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
||||||
|
B2 * (B3 * B7 - B4 * B6);
|
||||||
|
if (fabs(det) < breakdown) {
|
||||||
|
return QMCKL_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update det(S) when passed
|
||||||
|
if (determinant)
|
||||||
|
*determinant *= det;
|
||||||
|
|
||||||
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
||||||
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
||||||
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
||||||
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
||||||
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
||||||
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
||||||
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
||||||
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
||||||
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
||||||
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
||||||
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
||||||
|
|
||||||
|
// tmp = B^{-1}D : 2 x LDS
|
||||||
|
double __attribute__((aligned(8))) tmp[3 * LDS];
|
||||||
|
double* r1dim = &(Slater_inv[row1 * LDS]);
|
||||||
|
double* r2dim = &(Slater_inv[row2 * LDS]);
|
||||||
|
double* r3dim = &(Slater_inv[row3 * LDS]);
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < LDS; j++) {
|
||||||
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
||||||
|
tmp[LDS + j] =
|
||||||
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
||||||
|
tmp[2 * LDS + j] =
|
||||||
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||||
|
for (uint64_t i = 0; i < Dim; i++) {
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < LDS; j++) {
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME:woodbury_3x3_kernel_template
|
||||||
|
#+begin_src c
|
||||||
|
static inline qmckl_exit_code qmckl_woodbury_3x3_{Dim}(
|
||||||
|
const qmckl_context context,
|
||||||
|
const double* __restrict Updates,
|
||||||
|
const uint64_t* __restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* __restrict Slater_inv,
|
||||||
|
double* __restrict determinant) {
|
||||||
|
/*
|
||||||
|
C := S^{-1} * U, dim x 3
|
||||||
|
B := 1 + V * C, 3 x 3
|
||||||
|
D := V * S^{-1}, 3 x dim
|
||||||
|
,*/
|
||||||
|
|
||||||
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||||
|
return qmckl_failwith(context,
|
||||||
|
QMCKL_NULL_CONTEXT,
|
||||||
|
"qmckl_woodbury_3x3_{Dim}",
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
const uint64_t row1 = (Updates_index[0] - 1);
|
||||||
|
const uint64_t row2 = (Updates_index[1] - 1);
|
||||||
|
const uint64_t row3 = (Updates_index[2] - 1);
|
||||||
|
|
||||||
|
// Compute C = (S^T)^{-1}U : {Dim} x 3
|
||||||
|
double __attribute__((aligned(8))) C[3 * {Dim}];
|
||||||
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||||
|
C[i * 3] = 0;
|
||||||
|
C[i * 3 + 1] = 0;
|
||||||
|
C[i * 3 + 2] = 0;
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
||||||
|
C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
||||||
|
C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
||||||
|
C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute B = 1 + VC : 3 x 3
|
||||||
|
const double B0 = C[row1 * 3] + 1;
|
||||||
|
const double B1 = C[row1 * 3 + 1];
|
||||||
|
const double B2 = C[row1 * 3 + 2];
|
||||||
|
const double B3 = C[row2 * 3];
|
||||||
|
const double B4 = C[row2 * 3 + 1] + 1;
|
||||||
|
const double B5 = C[row2 * 3 + 2];
|
||||||
|
const double B6 = C[row3 * 3];
|
||||||
|
const double B7 = C[row3 * 3 + 1];
|
||||||
|
const double B8 = C[row3 * 3 + 2] + 1;
|
||||||
|
|
||||||
|
// Check if determinant of B is not too close to zero
|
||||||
|
double det;
|
||||||
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
||||||
|
B2 * (B3 * B7 - B4 * B6);
|
||||||
|
if (fabs(det) < breakdown) {
|
||||||
|
return QMCKL_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update det(Slater) if passed
|
||||||
|
if (determinant)
|
||||||
|
*determinant *= det;
|
||||||
|
|
||||||
|
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
||||||
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
||||||
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
||||||
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
||||||
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
||||||
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
||||||
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
||||||
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
||||||
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
||||||
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
||||||
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
||||||
|
|
||||||
|
// tmp = B^{-1}D : 3 x D{Dim}_P
|
||||||
|
double __attribute__((aligned(8))) tmp[3 * D{Dim}_P];
|
||||||
|
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
||||||
|
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
||||||
|
double* r3dim = &(Slater_inv[row3 * D{Dim}_P]);
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||||
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
||||||
|
tmp[D{Dim}_P + j] =
|
||||||
|
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
||||||
|
tmp[2 * D{Dim}_P + j] =
|
||||||
|
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
||||||
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||||
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j];
|
||||||
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j];
|
||||||
|
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME:woodbury_3x3_kernel_generator
|
||||||
|
#+begin_src python :noweb yes :exports none
|
||||||
|
text="""
|
||||||
|
<<woodbury_3x3_kernel_template>>
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
for Dim in <<kernel_generator_range>>:
|
||||||
|
Dim=str(Dim)
|
||||||
|
result.append(text.replace("{Dim}",Dim) )
|
||||||
|
|
||||||
|
return ''.join(result)
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME:woodbury_3x3_switch-case_generator
|
||||||
|
#+begin_src python :noweb yes :exports none
|
||||||
|
text="""
|
||||||
|
case {Dim}:
|
||||||
|
return qmckl_woodbury_3x3_{Dim}(context,
|
||||||
|
Updates,
|
||||||
|
Updates_index,
|
||||||
|
breakdown,
|
||||||
|
Slater_inv,
|
||||||
|
determinant);"""
|
||||||
|
result = []
|
||||||
|
for Dim in <<kernel_generator_range>>:
|
||||||
|
Dim=str(Dim)
|
||||||
|
result.append(text.replace("{Dim}",Dim) )
|
||||||
|
|
||||||
|
return ''.join(result)
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||||
|
<<woodbury_3x3_kernel_generator()>>
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||||
|
qmckl_exit_code qmckl_woodbury_3x3(const qmckl_context context,
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const double* Updates,
|
||||||
|
const uint64_t* Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* Slater_inv,
|
||||||
|
double* determinant) {
|
||||||
|
|
||||||
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||||
|
return qmckl_failwith(
|
||||||
|
context,
|
||||||
|
QMCKL_NULL_CONTEXT,
|
||||||
|
"qmckl_woodbury_3x3",
|
||||||
|
NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_HPC
|
||||||
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||||
|
switch (Dim) {
|
||||||
|
<<woodbury_3x3_switch-case_generator()>>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // Updating smaller sub-matrix
|
||||||
|
return qmckl_woodbury_3x3_hpc(
|
||||||
|
context,
|
||||||
|
LDS,
|
||||||
|
Dim,
|
||||||
|
Updates,
|
||||||
|
Updates_index,
|
||||||
|
breakdown,
|
||||||
|
Slater_inv,
|
||||||
|
determinant);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
return qmckl_woodbury_3x3_doc(
|
||||||
|
context,
|
||||||
|
LDS,
|
||||||
|
Dim,
|
||||||
|
Updates,
|
||||||
|
Updates_index,
|
||||||
|
breakdown,
|
||||||
|
Slater_inv,
|
||||||
|
determinant);
|
||||||
|
// return qmckl_woodbury_3x3_hpc(
|
||||||
|
// context,
|
||||||
|
// LDS,
|
||||||
|
// Dim,
|
||||||
|
// Updates,
|
||||||
|
// Updates_index,
|
||||||
|
// breakdown,
|
||||||
|
// Slater_inv,
|
||||||
|
// determinant);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return QMCKL_FAILURE;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_woodbury_3x3
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||||
|
interface
|
||||||
|
integer(c_int32_t) function qmckl_woodbury_3x3 &
|
||||||
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||||
|
bind(C)
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
import
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer (c_int64_t) , intent(in) , value :: context
|
||||||
|
integer (c_int64_t) , intent(in) , value :: LDS
|
||||||
|
integer (c_int64_t) , intent(in) , value :: Dim
|
||||||
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
||||||
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
||||||
|
real (c_double ) , intent(in) , value :: breakdown
|
||||||
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||||
|
real (c_double ) , intent(inout) :: determinant
|
||||||
|
|
||||||
|
end function qmckl_woodbury_3x3
|
||||||
|
end interface
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_3x3_hpc")
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||||
|
interface
|
||||||
|
integer(c_int32_t) function qmckl_woodbury_3x3_hpc &
|
||||||
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||||
|
bind(C)
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
import
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer (c_int64_t) , intent(in) , value :: context
|
||||||
|
integer (c_int64_t) , intent(in) , value :: LDS
|
||||||
|
integer (c_int64_t) , intent(in) , value :: Dim
|
||||||
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
||||||
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
||||||
|
real (c_double ) , intent(in) , value :: breakdown
|
||||||
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||||
|
real (c_double ) , intent(inout) :: determinant
|
||||||
|
|
||||||
|
end function qmckl_woodbury_3x3_hpc
|
||||||
|
end interface
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname="qmckl_woodbury_3x3_doc")
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||||
|
interface
|
||||||
|
integer(c_int32_t) function qmckl_woodbury_3x3_doc &
|
||||||
|
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||||
|
bind(C)
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
import
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer (c_int64_t) , intent(in) , value :: context
|
||||||
|
integer (c_int64_t) , intent(in) , value :: LDS
|
||||||
|
integer (c_int64_t) , intent(in) , value :: Dim
|
||||||
|
real (c_double ) , intent(in) :: Updates(3*Dim)
|
||||||
|
integer (c_int64_t) , intent(in) :: Updates_index(3)
|
||||||
|
real (c_double ) , intent(in) , value :: breakdown
|
||||||
|
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||||
|
real (c_double ) , intent(inout) :: determinant
|
||||||
|
|
||||||
|
end function qmckl_woodbury_3x3_doc
|
||||||
|
end interface
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** Performance
|
||||||
|
This function is most efficient when used in cases where there are only 3 rank-1 updates and
|
||||||
|
it is sure they will not result in a singular matrix.
|
||||||
|
|
||||||
|
*** Tests
|
||||||
|
#+begin_src c :tangle (eval c_test)
|
||||||
|
assert(Updates3 != NULL);
|
||||||
|
assert(Updates_index3 != NULL);
|
||||||
|
assert(Slater_inv3_1 != NULL);
|
||||||
|
det = -1.23743195512859e-09;
|
||||||
|
rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
||||||
|
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
||||||
|
for (unsigned int i = 0; i < Dim; i++) {
|
||||||
|
for (unsigned int j = 0; j < Dim; j++) {
|
||||||
|
res[i * Dim + j] = 0;
|
||||||
|
for (unsigned int k = 0; k < Dim; k++) {
|
||||||
|
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rc = QMCKL_SUCCESS;
|
||||||
|
for (unsigned int i = 0; i < Dim; i++) {
|
||||||
|
for (unsigned int j = 0; j < Dim; j++) {
|
||||||
|
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
||||||
|
rc = QMCKL_FAILURE;
|
||||||
|
}
|
||||||
|
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
||||||
|
rc = QMCKL_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(rc == QMCKL_SUCCESS);
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
* Sherman-Morrison with Slagel Splitting
|
* Sherman-Morrison with Slagel Splitting
|
||||||
** ~qmckl_sm_splitting~
|
** ~qmckl_sm_splitting~
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user