1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-06-30 00:44:52 +02:00

Added and tested Woodbury 3x3 kernel to QMCkl.

Residual = wb3 14 9.92936e-07 1.90518e-11
ok -- cycle 14

Residual = qmckl_wb3 14 9.92936e-07 1.90518e-11
ok -- cycle 14. #25
This commit is contained in:
Francois Coppens 2021-07-22 11:38:50 +02:00
parent fdb8f5d50a
commit c6d00d5c5b
2 changed files with 323 additions and 104 deletions

View File

@ -7,39 +7,39 @@
static double threshold();
// Naïve Sherman Morrison
bool qmckl_sherman_morrison(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index);
bool qmckl_sherman_morrison(double *Slater_inv, uint64_t Dim, uint64_t N_updates,
double *Updates, uint64_t *Updates_index);
// Woodbury 2x2 kernel
bool qmckl_woodbury_2(double *Slater_inv, const unsigned int Dim, double *Updates,
const unsigned int *Updates_index);
bool qmckl_woodbury_2(double *Slater_inv, const uint64_t Dim, double *Updates,
const uint64_t *Updates_index);
// Woodbury 3x3 kernel
bool qmckl_woodbury_3(double *Slater_inv, const unsigned int Dim, double *Updates,
const unsigned int *Updates_index);
bool qmckl_woodbury_3(double *Slater_inv, const uint64_t Dim, double *Updates,
const uint64_t *Updates_index);
// Sherman Morrison, with J. Slagel splitting (caller function)
void qmckl_sherman_morrison_splitting(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index);
void qmckl_sherman_morrison_splitting(double *Slater_inv, uint64_t Dim, uint64_t N_updates,
double *Updates, uint64_t *Updates_index);
// Sherman Morrison, with J. Slagel splitting
// http://hdl.handle.net/10919/52966
static void slagel_splitting(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index,
double *later_updates, unsigned int *later_index,
unsigned int *later);
static void slagel_splitting(double *Slater_inv, uint64_t Dim, uint64_t N_updates,
double *Updates, uint64_t *Updates_index,
double *later_updates, uint64_t *later_index,
uint64_t *later);
// Mixed Sherman-Morrison-Woodbury kernel using
// Woodbury 2x2 and Sherman-Morrison with update-splitting
void qmckl_sherman_morrison_woodbury_2(double *Slater_inv, const unsigned int Dim,
const unsigned int N_updates, double *Updates,
unsigned int *Updates_index);
void qmckl_sherman_morrison_woodbury_2(double *Slater_inv, const uint64_t Dim,
const uint64_t N_updates, double *Updates,
uint64_t *Updates_index);
// Mixed Sherman-Morrison-Woodbury kernel using
// Woodbury 3x3, Woodbury 2x2 and Sherman-Morrison with update-splitting
void qmckl_sherman_morrison_woodbury_3(double *Slater_inv, const unsigned int Dim,
const unsigned int N_updates, double *Updates,
unsigned int *Updates_index);
void qmckl_sherman_morrison_woodbury_3(double *Slater_inv, const uint64_t Dim,
const uint64_t N_updates, double *Updates,
uint64_t *Updates_index);
@ -53,8 +53,8 @@ static double threshold() {
}
// Naïve Sherman Morrison
bool qmckl_sherman_morrison(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index) {
bool qmckl_sherman_morrison(double *Slater_inv, uint64_t Dim, uint64_t N_updates,
double *Updates, uint64_t *Updates_index) {
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called qmckl_sherman_morrison with " << N_updates << " updates" << std::endl;
// #endif
@ -62,13 +62,13 @@ bool qmckl_sherman_morrison(double *Slater_inv, unsigned int Dim, unsigned int N
double C[Dim];
double D[Dim];
unsigned int l = 0;
uint64_t l = 0;
// For each update
while (l < N_updates) {
// C = A^{-1} x U_l
for (unsigned int i = 0; i < Dim; i++) {
for (uint64_t i = 0; i < Dim; i++) {
C[i] = 0;
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t j = 0; j < Dim; j++) {
C[i] += Slater_inv[i * Dim + j] * Updates[l * Dim + j];
}
}
@ -81,13 +81,13 @@ bool qmckl_sherman_morrison(double *Slater_inv, unsigned int Dim, unsigned int N
double iden = 1 / den;
// D = v^T x A^{-1}
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t j = 0; j < Dim; j++) {
D[j] = Slater_inv[(Updates_index[l] - 1) * Dim + j];
}
// A^{-1} = A^{-1} - C x D / den
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
double update = C[i] * D[j] * iden;
Slater_inv[i * Dim + j] -= update;
}
@ -99,8 +99,8 @@ bool qmckl_sherman_morrison(double *Slater_inv, unsigned int Dim, unsigned int N
}
// Woodbury 2x2 kernel
bool qmckl_woodbury_2(double *Slater_inv, const unsigned int Dim, double *Updates,
const unsigned int *Updates_index) {
bool qmckl_woodbury_2(double *Slater_inv, const uint64_t Dim, double *Updates,
const uint64_t *Updates_index) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
@ -110,16 +110,16 @@ bool qmckl_woodbury_2(double *Slater_inv, const unsigned int Dim, double *Update
// std::cerr << "Called Woodbury 2x2 kernel" << std::endl;
// #endif
const unsigned int row1 = (Updates_index[0] - 1);
const unsigned int row2 = (Updates_index[1] - 1);
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[2 * Dim];
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < 2; j++) {
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < 2; j++) {
C[i * 2 + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
for (uint64_t k = 0; k < Dim; k++) {
C[i * 2 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k];
}
}
@ -146,16 +146,16 @@ bool qmckl_woodbury_2(double *Slater_inv, const unsigned int Dim, double *Update
// Compute tmp = B^{-1} x (V.S^{-1})
double tmp[2 * Dim];
for (unsigned int i = 0; i < 2; i++) {
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t i = 0; i < 2; i++) {
for (uint64_t j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 2] * Slater_inv[row1 * Dim + j];
tmp[i * Dim + j] += Binv[i * 2 + 1] * Slater_inv[row2 * Dim + j];
}
}
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
Slater_inv[i * Dim + j] -= C[i * 2] * tmp[j];
Slater_inv[i * Dim + j] -= C[i * 2 + 1] * tmp[Dim + j];
}
@ -165,8 +165,8 @@ bool qmckl_woodbury_2(double *Slater_inv, const unsigned int Dim, double *Update
}
// Woodbury 3x3 kernel
bool qmckl_woodbury_3(double *Slater_inv, const unsigned int Dim, double *Updates,
const unsigned int *Updates_index) {
bool qmckl_woodbury_3(double *Slater_inv, const uint64_t Dim, double *Updates,
const uint64_t *Updates_index) {
/*
C := S^{-1} * U, dim x 3
B := 1 + V * C, 3 x 3
@ -176,17 +176,17 @@ bool qmckl_woodbury_3(double *Slater_inv, const unsigned int Dim, double *Update
// std::cerr << "Called Woodbury 3x3 kernel" << std::endl;
// #endif
const unsigned int row1 = (Updates_index[0] - 1);
const unsigned int row2 = (Updates_index[1] - 1);
const unsigned int row3 = (Updates_index[2] - 1);
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
const uint64_t row3 = (Updates_index[2] - 1);
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[3 * Dim];
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < 3; j++) {
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < 3; j++) {
C[i * 3 + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
for (uint64_t k = 0; k < Dim; k++) {
C[i * 3 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k];
}
}
@ -226,8 +226,8 @@ bool qmckl_woodbury_3(double *Slater_inv, const unsigned int Dim, double *Update
// Compute tmp = B^{-1} x (V.S^{-1})
double tmp[3 * Dim];
for (unsigned int i = 0; i < 3; i++) {
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t i = 0; i < 3; i++) {
for (uint64_t j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 3] * Slater_inv[row1 * Dim + j];
tmp[i * Dim + j] += Binv[i * 3 + 1] * Slater_inv[row2 * Dim + j];
tmp[i * Dim + j] += Binv[i * 3 + 2] * Slater_inv[row3 * Dim + j];
@ -235,8 +235,8 @@ bool qmckl_woodbury_3(double *Slater_inv, const unsigned int Dim, double *Update
}
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
Slater_inv[i * Dim + j] -= C[i * 3] * tmp[j];
Slater_inv[i * Dim + j] -= C[i * 3 + 1] * tmp[Dim + j];
Slater_inv[i * Dim + j] -= C[i * 3 + 2] * tmp[2 * Dim + j];
@ -248,15 +248,15 @@ bool qmckl_woodbury_3(double *Slater_inv, const unsigned int Dim, double *Update
// Sherman Morrison, with J. Slagel splitting (caller function)
// http://hdl.handle.net/10919/52966
void qmckl_sherman_morrison_splitting(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index) {
void qmckl_sherman_morrison_splitting(double *Slater_inv, uint64_t Dim, uint64_t N_updates,
double *Updates, uint64_t *Updates_index) {
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called qmckl_sherman_morrison_splitting with " << N_updates << " updates" << std::endl;
// #endif
double later_updates[Dim * N_updates];
unsigned int later_index[N_updates];
unsigned int later = 0;
uint64_t later_index[N_updates];
uint64_t later = 0;
slagel_splitting(Slater_inv, Dim, N_updates, Updates, Updates_index, later_updates,
later_index, &later);
@ -268,10 +268,10 @@ void qmckl_sherman_morrison_splitting(double *Slater_inv, unsigned int Dim, unsi
// Sherman Morrison, with J. Slagel splitting
// http://hdl.handle.net/10919/52966
static void slagel_splitting(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index,
double *later_updates, unsigned int *later_index,
unsigned int *later) {
static void slagel_splitting(double *Slater_inv, uint64_t Dim, uint64_t N_updates,
double *Updates, uint64_t *Updates_index,
double *later_updates, uint64_t *later_index,
uint64_t *later) {
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called slagel_splitting with " << N_updates << " updates" << std::endl;
// #endif
@ -279,13 +279,13 @@ static void slagel_splitting(double *Slater_inv, unsigned int Dim, unsigned int
double C[Dim];
double D[Dim];
unsigned int l = 0;
uint64_t l = 0;
// For each update
while (l < N_updates) {
// C = S^{-1} x U_l
for (unsigned int i = 0; i < Dim; i++) {
for (uint64_t i = 0; i < Dim; i++) {
C[i] = 0;
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t j = 0; j < Dim; j++) {
C[i] += Slater_inv[i * Dim + j] * Updates[l * Dim + j];
}
}
@ -295,7 +295,7 @@ static void slagel_splitting(double *Slater_inv, unsigned int Dim, unsigned int
if (fabs(den) < threshold()) {
// U_l = U_l / 2 (do the split)
for (unsigned int i = 0; i < Dim; i++) {
for (uint64_t i = 0; i < Dim; i++) {
later_updates[*later * Dim + i] = Updates[l * Dim + i] / 2.0;
C[i] /= 2.0;
}
@ -307,13 +307,13 @@ static void slagel_splitting(double *Slater_inv, unsigned int Dim, unsigned int
double iden = 1 / den;
// D = v^T x S^{-1}
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t j = 0; j < Dim; j++) {
D[j] = Slater_inv[(Updates_index[l] - 1) * Dim + j];
}
// S^{-1} = S^{-1} - C x D / den
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
double update = C[i] * D[j] * iden;
Slater_inv[i * Dim + j] -= update;
}
@ -324,32 +324,32 @@ static void slagel_splitting(double *Slater_inv, unsigned int Dim, unsigned int
// Sherman-Morrison-Woodbury 2x2 kernel
// qmckl_woodbury_2, slagel_splitting mixing scheme
void qmckl_sherman_morrison_woodbury_2(double *Slater_inv, const unsigned int Dim,
const unsigned int N_updates, double *Updates,
unsigned int *Updates_index) {
void qmckl_sherman_morrison_woodbury_2(double *Slater_inv, const uint64_t Dim,
const uint64_t N_updates, double *Updates,
uint64_t *Updates_index) {
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called qmckl_sherman_morrison_woodbury_2 with " << N_updates
// << " updates" << std::endl;
// #endif
unsigned int n_of_2blocks = N_updates / 2;
unsigned int remainder = N_updates % 2;
unsigned int length_2block = 2 * Dim;
unsigned int length_1block = 1 * Dim;
uint64_t n_of_2blocks = N_updates / 2;
uint64_t remainder = N_updates % 2;
uint64_t length_2block = 2 * Dim;
uint64_t length_1block = 1 * Dim;
// Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with
// Woodbury 2x2 kernel
double later_updates[Dim * N_updates];
unsigned int later_index[N_updates];
unsigned int later = 0;
uint64_t later_index[N_updates];
uint64_t later = 0;
if (n_of_2blocks > 0) {
for (unsigned int i = 0; i < n_of_2blocks; i++) {
for (uint64_t i = 0; i < n_of_2blocks; i++) {
double *Updates_2block = &Updates[i * length_2block];
unsigned int *Updates_index_2block = &Updates_index[i * 2];
uint64_t *Updates_index_2block = &Updates_index[i * 2];
bool ok;
ok = qmckl_woodbury_2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to slagel_splitting
unsigned int l = 0;
uint64_t l = 0;
slagel_splitting(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l;
@ -359,8 +359,8 @@ void qmckl_sherman_morrison_woodbury_2(double *Slater_inv, const unsigned int Di
if (remainder == 1) { // Apply last remaining update with slagel_splitting
double *Updates_1block = &Updates[n_of_2blocks * length_2block];
unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks];
unsigned int l = 0;
uint64_t *Updates_index_1block = &Updates_index[2 * n_of_2blocks];
uint64_t l = 0;
slagel_splitting(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l;
@ -373,33 +373,33 @@ void qmckl_sherman_morrison_woodbury_2(double *Slater_inv, const unsigned int Di
// Sherman-Morrison-Woodbury 3x3 kernel
// qmckl_woodbury_2, qmckl_woodbury_3, slagel_splitting mixing scheme
void qmckl_sherman_morrison_woodbury_3(double *Slater_inv, const unsigned int Dim,
const unsigned int N_updates, double *Updates,
unsigned int *Updates_index) {
void qmckl_sherman_morrison_woodbury_3(double *Slater_inv, const uint64_t Dim,
const uint64_t N_updates, double *Updates,
uint64_t *Updates_index) {
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called qmckl_sherman_morrison_woodbury_3 with " << N_updates
// << " updates" << std::endl;
// #endif
unsigned int n_of_3blocks = N_updates / 3;
unsigned int remainder = N_updates % 3;
unsigned int length_3block = 3 * Dim;
unsigned int length_2block = 2 * Dim;
unsigned int length_1block = 1 * Dim;
uint64_t n_of_3blocks = N_updates / 3;
uint64_t remainder = N_updates % 3;
uint64_t length_3block = 3 * Dim;
uint64_t length_2block = 2 * Dim;
uint64_t length_1block = 1 * Dim;
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
// Woodbury 3x3 kernel
double later_updates[Dim * N_updates];
unsigned int later_index[N_updates];
unsigned int later = 0;
uint64_t later_index[N_updates];
uint64_t later = 0;
if (n_of_3blocks > 0) {
for (unsigned int i = 0; i < n_of_3blocks; i++) {
for (uint64_t i = 0; i < n_of_3blocks; i++) {
double *Updates_3block = &Updates[i * length_3block];
unsigned int *Updates_index_3block = &Updates_index[i * 3];
uint64_t *Updates_index_3block = &Updates_index[i * 3];
bool ok;
ok = qmckl_woodbury_3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
if (!ok) { // Send the entire block to slagel_splitting
unsigned int l = 0;
uint64_t l = 0;
slagel_splitting(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l;
@ -409,11 +409,11 @@ void qmckl_sherman_morrison_woodbury_3(double *Slater_inv, const unsigned int Di
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
uint64_t *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
bool ok;
ok = qmckl_woodbury_2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to slagel_splitting
unsigned int l = 0;
uint64_t l = 0;
slagel_splitting(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l;
@ -421,8 +421,8 @@ void qmckl_sherman_morrison_woodbury_3(double *Slater_inv, const unsigned int Di
}
else if (remainder == 1) { // Apply last remaining update with slagel_splitting
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
unsigned int l = 0;
uint64_t *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
uint64_t l = 0;
slagel_splitting(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l;

View File

@ -29,6 +29,7 @@ int main() {
qmckl_exit_code rc;
#+end_src
* Sherman-Morrison Helper Functions
Helper functions that are used by the Sherman-Morrison-Woodbury
@ -42,7 +43,7 @@ kernels.
:FRetType: double precision
:END:
This function is used to set the threshold value that is used in the kernels to determine if a matrix is invertable or not. In the Sherman-Morrison kernels this is determined by comparing the denominator in the Sherman-Morrison formula to the value set in threshold. If the value is smaller than the threshold value it means the matrix is not invertable. In the Woodbury kernels the threshold value is compared with the value of the determinant of the update matrix.
This function is used to set the threshold value that is used in the kernels to determine if a matrix is invertable or not. In the Sherman-Morrison kernels this is determined by comparing the denominator in the Sherman-Morrison formula to the value set in threshold. If the value is smaller than the threshold value it means the matrix is not invertable. In the Woodbury kernels the threshold value is compared with the value of the determinant of the update matrix.
#+NAME: qmckl_sherman_morrison_threshold_args
| double | thresh | out | Threshold |
@ -66,7 +67,6 @@ This function is used to set the threshold value that is used in the kernels to
double* const thresh );
#+end_src
*** Source Fortran
#+begin_src f90 :tangle (eval f)
@ -96,7 +96,6 @@ return QMCKL_SUCCESS;
}
#+end_src
*** Performance
** C interface :noexport:
@ -338,7 +337,7 @@ assert(rc == QMCKL_SUCCESS);
* Woodbury 2x2
[TODO: FMJC] Add main body intro.
This is the Woodbury 3x3 kernel.
** ~qmckl_woodbury_2~
:PROPERTIES:
@ -543,6 +542,225 @@ assert(rc == QMCKL_SUCCESS);
#+end_src
* Woodbury 3x3
This is the Woodbury 3x3 kernel.
** ~qmckl_woodbury_3~
:PROPERTIES:
:Name: qmckl_woodbury_3
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
This is the simplest of the available Sherman-Morrison-Woodbury
kernels in QMCkl. It applies rank-1 updates one by one in the order
that is given. It only checks if the denominator in the
Sherman-Morrison formula is not too close to zero (and exit with an
error if it does) during the application of an update.
#+NAME: qmckl_woodbury_3_args
| qmckl_context | context | in | Global state |
| uint64_t | Dim | in | Leading dimension of Slater_inv |
| double | Updates[3*Dim] | in | Array containing the updates |
| uint64_t | Updates_index[3] | in | Array containing the rank-1 updates |
| double | Slater_inv[Dim*Dim] | inout | Array containing the inverse of a Slater-matrix |
*** Requirements
Add description of the input variables. (see for e.g. qmckl_distance.org)
*** C header
#+CALL: generate_c_header(table=qmckl_woodbury_3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_3_c (
const qmckl_context context,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
double* Slater_inv );
#+end_src
*** Source Fortran
#+begin_src f90 :tangle (eval f)
integer function qmckl_woodbury_3_f(context, Slater_inv, Dim, &
Updates, Updates_index) result(info)
use qmckl
implicit none
integer(qmckl_context) , intent(in) :: context
integer*8 , intent(in), value :: Dim
integer*8 , intent(in) :: Updates_index(3)
real*8 , intent(in) :: Updates(3*Dim)
real*8 , intent(inout) :: Slater_inv(Dim*Dim)
!logical, external :: qmckl_woodbury_3_f
info = qmckl_woodbury_2(context, Dim, Updates, Updates_index, Slater_inv)
end function qmckl_woodbury_3_f
#+end_src
*** Source C
#+begin_src c :tangle (eval c) :comments org
#include <stdbool.h>
#include "qmckl.h"
qmckl_exit_code qmckl_woodbury_3_c(const qmckl_context context,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
double * Slater_inv) {
/*
C := S^{-1} * U, dim x 3
B := 1 + V * C, 3 x 3
D := V * S^{-1}, 3 x dim
,*/
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called Woodbury 3x3 kernel" << std::endl;
// #endif
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
const uint64_t row3 = (Updates_index[2] - 1);
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[3 * Dim];
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < 3; j++) {
C[i * 3 + j] = 0;
for (uint64_t k = 0; k < Dim; k++) {
C[i * 3 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k];
}
}
}
// Compute B = 1 + V.C
const double B0 = C[row1 * 3] + 1;
const double B1 = C[row1 * 3 + 1];
const double B2 = C[row1 * 3 + 2];
const double B3 = C[row2 * 3];
const double B4 = C[row2 * 3 + 1] + 1;
const double B5 = C[row2 * 3 + 2];
const double B6 = C[row3 * 3];
const double B7 = C[row3 * 3 + 1];
const double B8 = C[row3 * 3 + 2] + 1;
// Check if determinant of B is not too close to zero
double det;
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
B2 * (B3 * B7 - B4 * B6);
double thresh = 0.0;
qmckl_exit_code rc = qmckl_sherman_morrison_threshold_c(&thresh);
if (fabs(det) < thresh) {
return QMCKL_FAILURE;
}
// Compute B^{-1} with explicit formula for 3x3 inversion
double Binv[9], idet = 1.0 / det;
Binv[0] = (B4 * B8 - B7 * B5) * idet;
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
Binv[2] = (B1 * B5 - B4 * B2) * idet;
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
Binv[4] = (B0 * B8 - B6 * B2) * idet;
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
Binv[6] = (B3 * B7 - B6 * B4) * idet;
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
Binv[8] = (B0 * B4 - B3 * B1) * idet;
// Compute tmp = B^{-1} x (V.S^{-1})
double tmp[3 * Dim];
for (uint64_t i = 0; i < 3; i++) {
for (uint64_t j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 3] * Slater_inv[row1 * Dim + j];
tmp[i * Dim + j] += Binv[i * 3 + 1] * Slater_inv[row2 * Dim + j];
tmp[i * Dim + j] += Binv[i * 3 + 2] * Slater_inv[row3 * Dim + j];
}
}
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
Slater_inv[i * Dim + j] -= C[i * 3] * tmp[j];
Slater_inv[i * Dim + j] -= C[i * 3 + 1] * tmp[Dim + j];
Slater_inv[i * Dim + j] -= C[i * 3 + 2] * tmp[2 * Dim + j];
}
}
return QMCKL_SUCCESS;
}
#+end_src
*** Performance...
** C interface :noexport:
#+CALL: generate_c_interface(table=qmckl_woodbury_3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval f) :comments org :exports none
integer(c_int32_t) function qmckl_woodbury_3 &
(context, Dim, Updates, Updates_index, Slater_inv) &
bind(C) result(info)
use, intrinsic :: iso_c_binding
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(3*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(3)
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
integer(c_int32_t), external :: qmckl_woodbury_3_c
info = qmckl_woodbury_3_c &
(context, Dim, Updates, Updates_index, Slater_inv)
end function qmckl_woodbury_3
#+end_src
#+CALL: generate_f_interface(table=qmckl_woodbury_3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_3 &
(context, Dim, Updates, Updates_index, Slater_inv) &
bind(C)
use, intrinsic :: iso_c_binding
import
implicit none
integer (c_int64_t) , intent(in) , value :: context
integer (c_int64_t) , intent(in) , value :: Dim
real (c_double ) , intent(in) :: Updates(3*Dim)
integer (c_int64_t) , intent(in) :: Updates_index(3)
real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim)
end function qmckl_woodbury_3
end interface
#+end_src
*** Test :noexport:
[TODO: FMJC] Write tests for the Sherman-Morrison part.
#+begin_src c :tangle (eval c_test)
const uint64_t woodbury3_Dim = 3;
const uint64_t woodbury3_Updates_index[3] = {1, 1, 1};
const double woodbury3_Updates[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
double woodbury3_Slater_inv[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
// [TODO : FMJC ] add realistic tests
rc = qmckl_woodbury_3_c(context, woodbury3_Dim, woodbury3_Updates, woodbury3_Updates_index, woodbury3_Slater_inv);
assert(rc == QMCKL_SUCCESS);
#+end_src
* End of files
@ -556,3 +774,4 @@ assert(rc == QMCKL_SUCCESS);
# -*- mode: org -*-
# vim: syntax=c