mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 18:16:28 +01:00
Improved documentation and Requirements sections.
This commit is contained in:
parent
dcd6428c50
commit
78c574af49
@ -27,7 +27,6 @@ int main() {
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
* Naïve Sherman-Morrison
|
||||
|
||||
** ~qmckl_sherman_morrison~
|
||||
@ -37,13 +36,12 @@ int main() {
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
This is the simplest of the available Sherman-Morrison-Woodbury
|
||||
kernels. It applies rank-1 updates one by one in the order
|
||||
that is given. It only checks if the denominator in the
|
||||
Sherman-Morrison formula is not too close to zero when an update is evaluated.
|
||||
It will exit with an error code of the denominator is too close to zero.
|
||||
|
||||
The formula that is applied is
|
||||
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
||||
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
||||
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
||||
|
||||
The formula for any update $u_j$ (index $j$ is suppresed for clarity) that is applied is
|
||||
\[
|
||||
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
|
||||
\]
|
||||
@ -53,6 +51,17 @@ int main() {
|
||||
$u$ and $v^T$ are the column and row vectors containing the updates,
|
||||
$S^{-1}$ is the inverse of the Slater-matrix.
|
||||
|
||||
Even though the Slater-matrix $S$ with all updates applied at once is invertable, during the course of applying
|
||||
updates to the inverse Slater-matrix $S^{-1}$ one-by-one it can happen that one of the intermediate inverse
|
||||
matrices $S^{-1}$ becomes singular. Therefore a global threshold value $\epsilon$ is defined that is used to
|
||||
evaluate each individual update $u_j$ when it is applied.
|
||||
|
||||
This value sets the lower bound for which the
|
||||
denominator $1+v_j^TS^{-1}u_j$ is considered to be too small and will most probably result in a singular matrix
|
||||
$S$, or at least in an inverse of $S$ of very poor numerical quality. Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$,
|
||||
the update is applied as usual. If $1+v_j^TS^{-1}u_j \leq \epsilon$ the update is rejected and the kernel exits with
|
||||
return code \texttt{QMCKL_FAILURE}.
|
||||
|
||||
#+NAME: qmckl_sherman_morrison_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
| uint64_t | Dim | in | Leading dimension of Slater_inv |
|
||||
@ -64,13 +73,13 @@ int main() {
|
||||
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
- ~Dim >= 2~
|
||||
- ~N_updates >= 1~
|
||||
- ~Updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~Updates_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~Slater_inv~ is allocated with at least $Dim \times Dim \times 8$ bytes
|
||||
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
* ~Dim >= 2~
|
||||
* ~N_updates >= 1~
|
||||
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
||||
* ~Updates_index~ is allocated with $N_updates$ elements
|
||||
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
|
||||
*** C header
|
||||
|
||||
@ -297,8 +306,8 @@ assert(rc == QMCKL_SUCCESS);
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
The simplest version of the generalised Sherman-Morrison-Woodbury kernels. It is used to apply two
|
||||
rank-1 updates at once. The formula used in this algorithm is called the Woodbury Matrix Identity
|
||||
The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in
|
||||
this algorithm is called the Woodbury Matrix Identity
|
||||
\[
|
||||
(S + U V)^{-1} = S^{-1} - C B^{-1} D
|
||||
\]
|
||||
@ -322,11 +331,11 @@ assert(rc == QMCKL_SUCCESS);
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~qmckl_null_context~
|
||||
- ~dim >= 2~
|
||||
- ~updates~ is allocated with at least $2 \times 2 \times 8$ bytes
|
||||
- ~updates_index~ is allocated with $2 \times 8$ bytes
|
||||
- ~Dim >= 2~
|
||||
- ~Updates~ is allocated with $2 \times Dim$ elements
|
||||
- ~Updates_index~ is allocated with $2$ elements
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~slater_inv~ is allocated with at least $dim \times dim \times 8$ bytes
|
||||
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
|
||||
*** C header
|
||||
|
||||
@ -438,7 +447,6 @@ qmckl_exit_code qmckl_woodbury_2_c(const qmckl_context context,
|
||||
|
||||
#+end_src
|
||||
|
||||
|
||||
*** Performance
|
||||
|
||||
This function is most efficient when used in cases where there are only 2 rank-1 updates.
|
||||
@ -548,11 +556,11 @@ assert(rc == QMCKL_SUCCESS);
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~qmckl_null_context~
|
||||
- ~dim >= 2~
|
||||
- ~updates~ is allocated with at least $3 \times 2 \times 8$ bytes
|
||||
- ~updates_index~ is allocated with $3 \times 8$ bytes
|
||||
- ~Dim >= 2~
|
||||
- ~Updates~ is allocated with $3 \times Dim$ elements
|
||||
- ~Updates_index~ is allocated with $3$ elements
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~slater_inv~ is allocated with at least $dim \times dim \times 8$ bytes
|
||||
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
|
||||
*** C header
|
||||
|
||||
@ -790,13 +798,13 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
- ~Dim >= 2~
|
||||
- ~N_updates >= 1~
|
||||
- ~Updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~Updates_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~Slater_inv~ is allocated with at least $Dim \times Dim \times 8$ bytes
|
||||
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
* ~Dim >= 2~
|
||||
* ~N_updates >= 1~
|
||||
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
||||
* ~Updates_index~ is allocated with $N_updates$ elements
|
||||
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
|
||||
*** C header
|
||||
|
||||
@ -951,427 +959,6 @@ assert(rc == QMCKL_SUCCESS);
|
||||
#+end_src
|
||||
|
||||
|
||||
* Woodbury 2x2 with Sherman-Morrison and update splitting
|
||||
|
||||
** ~qmckl_sherman_morrison_smw2s~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_sherman_morrison_smw2s
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
The Woodbury 2x2 kernel with Sherman-Morrison and update splitting combines the low-level Woodbury 2x2 kernel
|
||||
and Sherman-Morrison with update splitting. For a given number of updates $N$ it splits the number of updates
|
||||
in blocks of two updates. The blocks of two updates are then applied one by one using Woodbury 2x2. If a block
|
||||
of updates fails, both updates in the block are applied with Sherman-Morrison instead, split if necessary and
|
||||
with their second half put in a queue. After all blocks are processed the remaining one update --in case there
|
||||
was an odd number of updates to begin with-- is also aplpied with Sherman-Morrison and split if necessary.
|
||||
The queue containing the collected second halves of all the processed updates is processed at the very end to
|
||||
avoid having many intermediate queues containing only a few updates that risks an increased probability
|
||||
of artificially created non-singular intermediate matrices, resulting from division up the total number of
|
||||
updates in blocks of three.
|
||||
|
||||
|
||||
#+NAME: qmckl_sherman_morrison_smw2s_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
| uint64_t | Dim | in | Leading dimension of Slater_inv |
|
||||
| uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv |
|
||||
| double | Updates[N_updates*Dim] | in | Array containing the updates |
|
||||
| uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates |
|
||||
| double | breakdown | in | Break-down parameter on which to fail or not |
|
||||
| double | Slater_inv[Dim*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
||||
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
- ~Dim >= 2~
|
||||
- ~N_updates >= 1~
|
||||
- ~Updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~Updates_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~Slater_inv~ is allocated with at least $Dim \times Dim \times 8$ bytes
|
||||
|
||||
*** C header
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sherman_morrison_smw2s_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_sherman_morrison_smw2s_c (
|
||||
const qmckl_context context,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv );
|
||||
#+end_src
|
||||
|
||||
*** Source Fortran
|
||||
|
||||
#+begin_src f90 :tangle (eval f)
|
||||
integer function qmckl_sherman_morrison_smw2s_f(context, Slater_inv, Dim, N_updates, &
|
||||
Updates, Updates_index) result(info)
|
||||
use qmckl
|
||||
implicit none
|
||||
integer(qmckl_context) , intent(in) :: context
|
||||
integer*8 , intent(in), value :: Dim, N_updates
|
||||
integer*8 , intent(in) :: Updates_index(N_updates)
|
||||
real*8 , intent(in) :: Updates(N_updates*Dim)
|
||||
real*8 , intent(inout) :: Slater_inv(Dim*Dim)
|
||||
info = qmckl_sherman_morrison_smw2s (context, Dim, N_updates, Updates, Updates_index, Slater_inv)
|
||||
end function qmckl_sherman_morrison_smw2s_f
|
||||
#+end_src
|
||||
|
||||
*** Source C
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_sherman_morrison_smw2s_c(const qmckl_context context,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double * Slater_inv) {
|
||||
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
|
||||
// std::cerr << "Called qmckl_sherman_morrison_woodbury_2 with " << N_updates
|
||||
// << " updates" << std::endl;
|
||||
// #endif
|
||||
|
||||
qmckl_exit_code rc;
|
||||
|
||||
uint64_t n_of_2blocks = N_updates / 2;
|
||||
uint64_t remainder = N_updates % 2;
|
||||
uint64_t length_2block = 2 * Dim;
|
||||
|
||||
// Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with
|
||||
// Woodbury 2x2 kernel
|
||||
double later_updates[Dim * N_updates];
|
||||
uint64_t later_index[N_updates];
|
||||
uint64_t later = 0;
|
||||
if (n_of_2blocks > 0) {
|
||||
for (uint64_t i = 0; i < n_of_2blocks; i++) {
|
||||
const double *Updates_2block = &Updates[i * length_2block];
|
||||
const uint64_t *Updates_index_2block = &Updates_index[i * 2];
|
||||
rc = qmckl_woodbury_2_c(context, Dim, Updates_2block, Updates_index_2block, breakdown, Slater_inv);
|
||||
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting_c(Dim, 2, Updates_2block, Updates_index_2block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l);
|
||||
later = later + l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (remainder == 1) { // Apply last remaining update with slagel_splitting
|
||||
const double *Updates_1block = &Updates[n_of_2blocks * length_2block];
|
||||
const uint64_t *Updates_index_1block = &Updates_index[2 * n_of_2blocks];
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting_c(Dim, 1, Updates_1block, Updates_index_1block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l);
|
||||
later = later + l;
|
||||
}
|
||||
|
||||
if (later > 0) {
|
||||
rc = qmckl_sherman_morrison_splitting_c(context, Dim, later, later_updates, later_index, breakdown, Slater_inv);
|
||||
}
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#+end_src
|
||||
|
||||
*** Performance...
|
||||
|
||||
This kernel performs best for the case of two rank-1 update and a low fail-rate.
|
||||
|
||||
** C interface :noexport:
|
||||
|
||||
#+CALL: generate_c_interface(table=qmckl_sherman_morrison_smw2s_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
||||
integer(c_int32_t) function qmckl_sherman_morrison_smw2s &
|
||||
(context, Dim, N_updates, Updates, Updates_index, Slater_inv) &
|
||||
bind(C) result(info)
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
implicit none
|
||||
|
||||
integer (c_int64_t) , intent(in) , value :: context
|
||||
integer (c_int64_t) , intent(in) , value :: Dim
|
||||
integer (c_int64_t) , intent(in) , value :: N_updates
|
||||
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
||||
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
||||
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
||||
|
||||
integer(c_int32_t), external :: qmckl_sherman_morrison_smw2s_c
|
||||
info = qmckl_sherman_morrison_smw2s_c &
|
||||
(context, Dim, N_updates, Updates, Updates_index, Slater_inv)
|
||||
|
||||
end function qmckl_sherman_morrison_smw2s
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_smw2s_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||
interface
|
||||
integer(c_int32_t) function qmckl_sherman_morrison_smw2s &
|
||||
(context, Dim, N_updates, Updates, Updates_index, Slater_inv) &
|
||||
bind(C)
|
||||
use, intrinsic :: iso_c_binding
|
||||
import
|
||||
implicit none
|
||||
|
||||
integer (c_int64_t) , intent(in) , value :: context
|
||||
integer (c_int64_t) , intent(in) , value :: Dim
|
||||
integer (c_int64_t) , intent(in) , value :: N_updates
|
||||
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
||||
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
||||
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
||||
|
||||
end function qmckl_sherman_morrison_smw2s
|
||||
end interface
|
||||
#+end_src
|
||||
|
||||
*** Test :noexport:
|
||||
|
||||
#+begin_src c :tangle (eval c_test)
|
||||
rc = qmckl_sherman_morrison_smw2s_c(context, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5_1);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5_1[k * Dim + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
rc = QMCKL_SUCCESS;
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
||||
rc = QMCKL_FAILURE;
|
||||
}
|
||||
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
||||
rc = QMCKL_FAILURE;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(rc == QMCKL_SUCCESS);
|
||||
#+end_src
|
||||
|
||||
|
||||
* Woodbury 3x3 with Sherman-Morrison and update splitting
|
||||
|
||||
** ~qmckl_sherman_morrison_smw3s~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_sherman_morrison_smw3s
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
The Woodbury 3x3 kernel with Sherman-Morrison and update splitting combines the low-level Woodbury 3x3 kernel
|
||||
and Sherman-Morrison with update splitting. It works the same as Woodbury 2x2 with Sherman-Morrison and update
|
||||
splitting, except that the updates are divided in blocks of three rank-1 updates instead of blocks of two
|
||||
rank-1 updates.
|
||||
|
||||
#+NAME: qmckl_sherman_morrison_smw3s_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
| uint64_t | Dim | in | Leading dimension of Slater_inv |
|
||||
| uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv |
|
||||
| double | Updates[N_updates*Dim] | in | Array containing the updates |
|
||||
| uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates |
|
||||
| double | breakdown | in | Break-down parameter on which to fail or not |
|
||||
| double | Slater_inv[Dim*Dim] | inout | Array containing the inverse of a Slater-matrix |
|
||||
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
- ~Dim >= 2~
|
||||
- ~N_updates >= 1~
|
||||
- ~Updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~Updates_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~Slater_inv~ is allocated with at least $Dim \times Dim \times 8$ bytes
|
||||
|
||||
*** C header
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sherman_morrison_smw3s_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_sherman_morrison_smw3s_c (
|
||||
const qmckl_context context,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv );
|
||||
#+end_src
|
||||
|
||||
*** Source Fortran
|
||||
|
||||
#+begin_src f90 :tangle (eval f)
|
||||
integer function qmckl_sherman_morrison_smw3s_f(context, Slater_inv, Dim, N_updates, &
|
||||
Updates, Updates_index) result(info)
|
||||
use qmckl
|
||||
implicit none
|
||||
integer(qmckl_context) , intent(in) :: context
|
||||
integer*8 , intent(in), value :: Dim, N_updates
|
||||
integer*8 , intent(in) :: Updates_index(N_updates)
|
||||
real*8 , intent(in) :: Updates(N_updates*Dim)
|
||||
real*8 , intent(inout) :: Slater_inv(Dim*Dim)
|
||||
!logical, external :: qmckl_sherman_morrison_f
|
||||
info = qmckl_sherman_morrison_smw3s(context, Dim, N_updates, Updates, Updates_index, Slater_inv)
|
||||
end function qmckl_sherman_morrison_smw3s_f
|
||||
#+end_src
|
||||
|
||||
*** Source C
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_sherman_morrison_smw3s_c(const qmckl_context context,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double * Slater_inv) {
|
||||
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
|
||||
// std::cerr << "Called qmckl_sherman_morrison_woodbury_3 with " << N_updates
|
||||
// << " updates" << std::endl;
|
||||
// #endif
|
||||
|
||||
qmckl_exit_code rc;
|
||||
|
||||
uint64_t n_of_3blocks = N_updates / 3;
|
||||
uint64_t remainder = N_updates % 3;
|
||||
uint64_t length_3block = 3 * Dim;
|
||||
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
|
||||
// Woodbury 3x3 kernel
|
||||
double later_updates[Dim * N_updates];
|
||||
uint64_t later_index[N_updates];
|
||||
uint64_t later = 0;
|
||||
if (n_of_3blocks > 0) {
|
||||
for (uint64_t i = 0; i < n_of_3blocks; i++) {
|
||||
const double *Updates_3block = &Updates[i * length_3block];
|
||||
const uint64_t *Updates_index_3block = &Updates_index[i * 3];
|
||||
rc = qmckl_woodbury_3_c(context, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv);
|
||||
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting_c(Dim, 3, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l);
|
||||
later = later + l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (remainder != 0) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
||||
const double *Updates_remainder_block = &Updates[n_of_3blocks * length_3block];
|
||||
const uint64_t *Updates_index_remainder_block = &Updates_index[3 * n_of_3blocks];
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting_c(Dim, remainder, Updates_remainder_block, Updates_index_remainder_block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l);
|
||||
later = later + l;
|
||||
}
|
||||
|
||||
if (later > 0) {
|
||||
rc = qmckl_sherman_morrison_splitting_c(context, Dim, later, later_updates, later_index, breakdown, Slater_inv);
|
||||
}
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#+end_src
|
||||
|
||||
*** Performance...
|
||||
|
||||
This kernel performs best for the case of three rank-1 update and a low fail-rate.
|
||||
|
||||
|
||||
** C interface :noexport:
|
||||
|
||||
#+CALL: generate_c_interface(table=qmckl_sherman_morrison_smw3s_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
||||
integer(c_int32_t) function qmckl_sherman_morrison_smw3s &
|
||||
(context, Dim, N_updates, Updates, Updates_index, Slater_inv) &
|
||||
bind(C) result(info)
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
implicit none
|
||||
|
||||
integer (c_int64_t) , intent(in) , value :: context
|
||||
integer (c_int64_t) , intent(in) , value :: Dim
|
||||
integer (c_int64_t) , intent(in) , value :: N_updates
|
||||
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
||||
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
||||
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
||||
|
||||
integer(c_int32_t), external :: qmckl_sherman_morrison_smw3s_c
|
||||
info = qmckl_sherman_morrison_smw3s_c &
|
||||
(context, Dim, N_updates, Updates, Updates_index, Slater_inv)
|
||||
|
||||
end function qmckl_sherman_morrison_smw3s
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_smw3s_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||
interface
|
||||
integer(c_int32_t) function qmckl_sherman_morrison_smw3s &
|
||||
(context, Dim, N_updates, Updates, Updates_index, Slater_inv) &
|
||||
bind(C)
|
||||
use, intrinsic :: iso_c_binding
|
||||
import
|
||||
implicit none
|
||||
|
||||
integer (c_int64_t) , intent(in) , value :: context
|
||||
integer (c_int64_t) , intent(in) , value :: Dim
|
||||
integer (c_int64_t) , intent(in) , value :: N_updates
|
||||
real (c_double ) , intent(in) :: Updates(N_updates*Dim)
|
||||
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
||||
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
|
||||
|
||||
end function qmckl_sherman_morrison_smw3s
|
||||
end interface
|
||||
#+end_src
|
||||
|
||||
*** Test :noexport:
|
||||
|
||||
#+begin_src c :tangle (eval c_test)
|
||||
rc = qmckl_sherman_morrison_smw3s_c(context, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5_2);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5_2[k * Dim + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
rc = QMCKL_SUCCESS;
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) {
|
||||
rc = QMCKL_FAILURE;
|
||||
}
|
||||
if (i != j && fabs(res[i * Dim + j]) > tolerance) {
|
||||
rc = QMCKL_FAILURE;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(rc == QMCKL_SUCCESS);
|
||||
#+end_src
|
||||
|
||||
|
||||
* Woodbury 3x3 and 2x2 with Sherman-Morrison and update splitting
|
||||
|
||||
** ~qmckl_sherman_morrison_smw32s~
|
||||
@ -1398,13 +985,13 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
*** Requirements
|
||||
|
||||
- ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
- ~Dim >= 2~
|
||||
- ~N_updates >= 1~
|
||||
- ~Updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~Updates_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~Slater_inv~ is allocated with at least $Dim \times Dim \times 8$ bytes
|
||||
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
* ~Dim >= 2~
|
||||
* ~N_updates >= 1~
|
||||
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
||||
* ~Updates_index~ is allocated with $N_updates$ elements
|
||||
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
|
||||
*** C header
|
||||
|
||||
@ -1593,8 +1180,8 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
* Helper Functions
|
||||
|
||||
Helper functions that are used by the Sherman-Morrison-Woodbury kernels.
|
||||
These functions can only be used internally by higher level functions.
|
||||
Private helper-functions that are used by the Sherman-Morrison-Woodbury kernels.
|
||||
These functions can only be used internally by the kernels in this module.
|
||||
|
||||
** ~qmckl_slagel_splitting~
|
||||
:PROPERTIES:
|
||||
@ -1603,13 +1190,14 @@ These functions can only be used internally by higher level functions.
|
||||
:FRetType: double precision
|
||||
:END:
|
||||
|
||||
~qmckl_slagel_splitting~ is used internally to apply a list of rank-1 updates while splitting an update if necessary.
|
||||
In case of a split it applies the first half of the update while putting the second half in waiting queue to be applied at the end.
|
||||
~qmckl_slagel_splitting~ is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel.
|
||||
It is used internally to apply a collection of $N$ of rank-1 updates to the inverse Slater-matrix $S^{-1}$ and
|
||||
splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update,
|
||||
while putting the second half in a waiting queue to be applied at the end.
|
||||
|
||||
For a given update $u_j$ we define a threshold value $\epsilon_j$, which is the minimum value of
|
||||
$1+v_j^TS^{-1}u_j$ for a non-singular matrix $S$. If $1+v_j^TS^{-1}u_j \geq \epsilon_j$,
|
||||
the update is applied as usual. Otherwise, $u_j$ will be redefined as $\frac{u_j}{2}$, and the other half
|
||||
(to be applied at the end) will be defined using vectors $\frac{u_{j'}}{2}$ and $v_{j'}^T=v_{j'}^T$.
|
||||
Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$, the update is applied as usual. Otherwise, $u_j$ will be redefined
|
||||
as $\frac{1}{2}u_j$. One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors
|
||||
$u_{j'}=\frac{1}{2}u_j$ and $v_{j'}^T=v_{j}^T$, which are stored in the array \texttt{later_updates}.
|
||||
|
||||
#+NAME: qmckl_slagel_splitting_args
|
||||
| uint64_t | Dim | in | Leading dimension of Slater_inv |
|
||||
@ -1627,12 +1215,12 @@ These functions can only be used internally by higher level functions.
|
||||
|
||||
- ~Dim >= 2~
|
||||
- ~N_updates >= 1~
|
||||
- ~Updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~Updates_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~Updates~ is allocated with $N_updates \times Dim$ elements
|
||||
- ~Updates_index~ is allocated with $N_updates$ elements
|
||||
- ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
- ~Slater_inv~ is allocated with at least $Dim \times Dim \times 8$ bytes
|
||||
- ~later_updates~ is allocated with at least $1 \times 2 \times 8$ bytes
|
||||
- ~later_index~ is allocated with at least $1 \times 8$ bytes
|
||||
- ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
- ~later_updates~ is allocated with $later \times Dim$ elements
|
||||
- ~later_index~ is allocated with $N_updates$ elements
|
||||
- ~later >= 0~
|
||||
|
||||
*** C header
|
||||
|
Loading…
Reference in New Issue
Block a user