mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-19 20:42:50 +01:00
- Added test data for SIMD_LENGTH values 1, 2, 4, 8.
- Added macros that automatically select the correct test data for a given value of SIMD_LENGTH. - Updated the kernel test code to deal correcly with zero padded matrices. - Fixed kernels that genereated false negatives due to bugs in the kernels that surfaced only after feeding them padded matrices.
This commit is contained in:
parent
c7467465a9
commit
c54521a1f8
@ -151,15 +151,15 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
|
||||
double __attribute__((aligned(8))) C[Dim];
|
||||
double __attribute__((aligned(8))) D[LDS];
|
||||
|
||||
uint32_t l = 0;
|
||||
uint64_t l = 0;
|
||||
// For each update
|
||||
while (l < N_updates) {
|
||||
// C = S^{-1} x u_l
|
||||
for (uint32_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
C[i] = 0.0f;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint32_t j = 0; j < LDS; j++) {
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
||||
}
|
||||
}
|
||||
@ -180,15 +180,15 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
|
||||
// selecting column: v_l^T * S_inv
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint32_t j = 0; j < LDS; j++) {
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
D[j] = Slater_inv[cui * LDS + j];
|
||||
}
|
||||
|
||||
// A^{-1} = A^{-1} - C x D / den
|
||||
for (uint32_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint32_t j = 0; j < LDS; j++) {
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
const double update = C[i] * D[j] * iden;
|
||||
Slater_inv[i * LDS + j] -= update;
|
||||
}
|
||||
@ -413,6 +413,7 @@ The tests for the kernels are executed on datasets that are extracted from a run
|
||||
|
||||
#+begin_src c :tangle (eval c_test)
|
||||
const uint64_t Dim = 21;
|
||||
const uint64_t LDS = (1 + (Dim) / SIMD_LENGTH) * SIMD_LENGTH;
|
||||
const double breakdown = 1e-3;
|
||||
const double tolerance = 1e-3;
|
||||
double res[441];
|
||||
@ -425,7 +426,15 @@ assert(Slater_inv1 != NULL);
|
||||
|
||||
// original determinant of Slater1 (before applying updates)
|
||||
double det = 3.407025646103221e-10;
|
||||
rc = qmckl_sherman_morrison(context, Dim, Dim, N_updates1, Updates1, Updates_index1, breakdown, Slater_inv1, &det);
|
||||
rc = qmckl_sherman_morrison(context,
|
||||
LDS,
|
||||
Dim,
|
||||
N_updates1,
|
||||
Updates1,
|
||||
Updates_index1,
|
||||
breakdown,
|
||||
Slater_inv1,
|
||||
&det);
|
||||
|
||||
// Check that the determinant is updated properly
|
||||
assert(fabs(det + 4.120398385068217e-10) < 1e-15);
|
||||
@ -434,7 +443,7 @@ for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * Dim + j];
|
||||
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -509,28 +518,24 @@ assert(rc == QMCKL_SUCCESS);
|
||||
const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant);
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant);
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
/*
|
||||
C := S^{-1} * U, dim x 2
|
||||
B := 1 + V * C, 2 x 2
|
||||
@ -538,25 +543,29 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
*/
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_NULL_CONTEXT;
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_2",
|
||||
NULL);
|
||||
}
|
||||
|
||||
const uint64_t row1 = (Updates_index[0] - 1);
|
||||
const uint64_t row2 = (Updates_index[1] - 1);
|
||||
|
||||
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
|
||||
// OF LAYOUT OF 'Updates' !!
|
||||
double C[2 * Dim];
|
||||
// Compute C = (S^T)^{-1}U : Dim x 2
|
||||
double __attribute__((aligned(8))) C[2 * Dim];
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t j = 0; j < 2; j++) {
|
||||
C[i * 2 + j] = 0;
|
||||
for (uint64_t k = 0; k < Dim; k++) {
|
||||
C[i * 2 + j] += Slater_inv[i * LDS + k] * Updates[Dim * j + k];
|
||||
C[i * 2] = 0;
|
||||
C[i * 2 + 1] = 0;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t k = 0; k < LDS; k++) {
|
||||
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
||||
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute B = 1 + V * C
|
||||
// Compute B = 1 + VC : 2 x 2
|
||||
const double B0 = C[row1 * 2] + 1;
|
||||
const double B1 = C[row1 * 2 + 1];
|
||||
const double B2 = C[row2 * 2];
|
||||
@ -565,35 +574,39 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
// Check if determinant of inverted matrix is not zero
|
||||
double det = B0 * B3 - B1 * B2;
|
||||
if (fabs(det) < breakdown) {
|
||||
return QMCKL_FAILURE;
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
|
||||
// Update det(S) when passed
|
||||
if (determinant != NULL)
|
||||
*determinant *= det;
|
||||
if (determinant)
|
||||
*determinant *= det;
|
||||
|
||||
// Compute B^{-1} with explicit formula for 2x2 inversion
|
||||
double Binv[4], idet = 1.0 / det;
|
||||
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
||||
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
||||
Binv[0] = idet * B3;
|
||||
Binv[1] = -1.0 * idet * B1;
|
||||
Binv[2] = -1.0 * idet * B2;
|
||||
Binv[3] = idet * B0;
|
||||
|
||||
// Compute tmp = B^{-1} x (V.S^{-1})
|
||||
double tmp[2 * Dim];
|
||||
for (uint64_t i = 0; i < 2; i++) {
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
tmp[i * Dim + j] = Binv[i * 2] * Slater_inv[row1 * LDS + j];
|
||||
tmp[i * Dim + j] += Binv[i * 2 + 1] * Slater_inv[row2 * LDS + j];
|
||||
}
|
||||
// tmp = B^{-1}D : 2 x LDS
|
||||
double __attribute__((aligned(8))) tmp[2 * LDS];
|
||||
double* __restrict r1dim = &(Slater_inv[row1 * LDS]);
|
||||
double* __restrict r2dim = &(Slater_inv[row2 * LDS]);
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
||||
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
||||
}
|
||||
|
||||
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
|
||||
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
||||
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[Dim + j];
|
||||
}
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
||||
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
@ -644,13 +657,13 @@ assert(Updates2 != NULL);
|
||||
assert(Updates_index2 != NULL);
|
||||
assert(Slater_inv2 != NULL);
|
||||
det = -1.4432116661319376e-11;
|
||||
rc = qmckl_woodbury_2(context, Dim, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
||||
rc = qmckl_woodbury_2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
||||
assert(fabs(det-2.367058141251457e-10) < 1e-15);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * Dim + j];
|
||||
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -687,8 +700,8 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||
from applying the updates to the original matrix.
|
||||
|
||||
|
||||
#pragma ivdep
|
||||
#pragma vector aligned
|
||||
|
||||
#+NAME: qmckl_woodbury_3_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
@ -720,28 +733,24 @@ assert(rc == QMCKL_SUCCESS);
|
||||
const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant);
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant);
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
/*
|
||||
C := S^{-1} * U, dim x 3
|
||||
B := 1 + V * C, 3 x 3
|
||||
@ -749,26 +758,32 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
,*/
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_NULL_CONTEXT;
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_3",
|
||||
NULL);
|
||||
}
|
||||
|
||||
const uint64_t row1 = (Updates_index[0] - 1);
|
||||
const uint64_t row2 = (Updates_index[1] - 1);
|
||||
const uint64_t row3 = (Updates_index[2] - 1);
|
||||
|
||||
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
|
||||
// OF LAYOUT OF 'Updates' !!
|
||||
double C[3 * Dim];
|
||||
// Compute C = (S^T)^{-1}U : Dim x 3
|
||||
double __attribute__((aligned(8))) C[3 * Dim];
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t j = 0; j < 3; j++) {
|
||||
C[i * 3 + j] = 0;
|
||||
for (uint64_t k = 0; k < Dim; k++) {
|
||||
C[i * 3 + j] += Slater_inv[i * LDS + k] * Updates[Dim * j + k];
|
||||
C[i * 3] = 0;
|
||||
C[i * 3 + 1] = 0;
|
||||
C[i * 3 + 2] = 0;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t k = 0; k < LDS; k++) {
|
||||
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
||||
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute B = 1 + V.C
|
||||
// Compute B = 1 + VC : 3 x 3
|
||||
const double B0 = C[row1 * 3] + 1;
|
||||
const double B1 = C[row1 * 3 + 1];
|
||||
const double B2 = C[row1 * 3 + 2];
|
||||
@ -784,15 +799,15 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
||||
B2 * (B3 * B7 - B4 * B6);
|
||||
if (fabs(det) < breakdown) {
|
||||
return QMCKL_FAILURE;
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
|
||||
// Update det(Slater) if passed
|
||||
if (determinant != NULL)
|
||||
*determinant *= det;
|
||||
if (determinant)
|
||||
*determinant *= det;
|
||||
|
||||
// Compute B^{-1} with explicit formula for 3x3 inversion
|
||||
double Binv[9], idet = 1.0 / det;
|
||||
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
||||
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
||||
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
||||
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
||||
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
||||
@ -803,23 +818,30 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
||||
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
||||
|
||||
// Compute tmp = B^{-1} x (V.S^{-1})
|
||||
double tmp[3 * Dim];
|
||||
for (uint64_t i = 0; i < 3; i++) {
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
tmp[i * Dim + j] = Binv[i * 3] * Slater_inv[row1 * LDS + j];
|
||||
tmp[i * Dim + j] += Binv[i * 3 + 1] * Slater_inv[row2 * LDS + j];
|
||||
tmp[i * Dim + j] += Binv[i * 3 + 2] * Slater_inv[row3 * LDS + j];
|
||||
}
|
||||
// tmp = B^{-1}D : 3 x LDS
|
||||
double __attribute__((aligned(8))) tmp[3 * LDS];
|
||||
double* __restrict r1dim = &(Slater_inv[row1 * LDS]);
|
||||
double* __restrict r2dim = &(Slater_inv[row2 * LDS]);
|
||||
double* __restrict r3dim = &(Slater_inv[row3 * LDS]);
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
||||
tmp[LDS + j] =
|
||||
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
||||
tmp[2 * LDS + j] =
|
||||
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
||||
}
|
||||
|
||||
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
|
||||
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
||||
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[Dim + j];
|
||||
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * Dim + j];
|
||||
}
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
||||
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
||||
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
@ -870,13 +892,13 @@ assert(Updates3 != NULL);
|
||||
assert(Updates_index3 != NULL);
|
||||
assert(Slater_inv3_1 != NULL);
|
||||
det = -1.23743195512859e-09;
|
||||
rc = qmckl_woodbury_3(context, Dim, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
||||
rc = qmckl_woodbury_3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
||||
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * Dim + j];
|
||||
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -953,43 +975,45 @@ assert(rc == QMCKL_SUCCESS);
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant);
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant);
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_sherman_morrison_splitting(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_NULL_CONTEXT;
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_sherman_morrison_splitting",
|
||||
NULL);
|
||||
}
|
||||
|
||||
double later_updates[Dim * N_updates];
|
||||
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
||||
uint64_t later_index[N_updates];
|
||||
uint64_t later = 0;
|
||||
|
||||
(void) qmckl_slagel_splitting(LDS, Dim, N_updates, Updates, Updates_index,
|
||||
breakdown, Slater_inv, later_updates, later_index, &later, determinant);
|
||||
qmckl_exit_code rc = qmckl_slagel_splitting(
|
||||
LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv,
|
||||
later_updates, later_index, &later, determinant);
|
||||
|
||||
if (later > 0) {
|
||||
(void) qmckl_sherman_morrison_splitting(context, LDS, Dim, later,
|
||||
later_updates, later_index, breakdown, Slater_inv, determinant);
|
||||
qmckl_exit_code rc = qmckl_sherman_morrison_splitting(
|
||||
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
||||
Slater_inv, determinant);
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
@ -1041,13 +1065,13 @@ assert(Updates3 != NULL);
|
||||
assert(Updates_index3 != NULL);
|
||||
assert(Slater_inv3_2 != NULL);
|
||||
det = -1.23743195512859e-09;
|
||||
rc = qmckl_sherman_morrison_splitting(context, Dim, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
|
||||
rc = qmckl_sherman_morrison_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
|
||||
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * Dim + j];
|
||||
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1117,83 +1141,129 @@ assert(rc == QMCKL_SUCCESS);
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant);
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant);
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_NULL_CONTEXT;
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_sherman_morrison_smw32s",
|
||||
NULL);
|
||||
}
|
||||
|
||||
qmckl_exit_code rc;
|
||||
|
||||
uint64_t n_of_3blocks = N_updates / 3;
|
||||
uint64_t remainder = N_updates % 3;
|
||||
uint64_t length_3block = 3 * Dim;
|
||||
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
|
||||
// Woodbury 3x3 kernel
|
||||
double later_updates[Dim * N_updates];
|
||||
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
||||
uint64_t later_index[N_updates];
|
||||
uint64_t later = 0;
|
||||
|
||||
// Special case for 4 rank-1 updates: 2+2
|
||||
if (N_updates == 4) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_woodbury_2(context, LDS, Dim, Updates, Updates_index,
|
||||
breakdown, Slater_inv, determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates, Updates_index,
|
||||
breakdown, Slater_inv,
|
||||
later_updates + (LDS * later),
|
||||
later_index + later, &l, determinant);
|
||||
later += l;
|
||||
}
|
||||
rc = qmckl_woodbury_2(context, LDS, Dim, &Updates[2 * LDS],
|
||||
&Updates_index[2], breakdown, Slater_inv,
|
||||
determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting(
|
||||
LDS, Dim, 2, &Updates[2 * LDS], &Updates_index[2], breakdown,
|
||||
Slater_inv, later_updates + (LDS * later), later_index + later,
|
||||
&l, determinant);
|
||||
later += l;
|
||||
}
|
||||
if (later > 0) {
|
||||
rc = qmckl_sherman_morrison_splitting(
|
||||
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
||||
Slater_inv, determinant);
|
||||
}
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
// And for the other cases != 4
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates
|
||||
// with Woodbury 3x3 kernel
|
||||
uint64_t n_of_3blocks = N_updates / 3;
|
||||
uint64_t remainder = N_updates % 3;
|
||||
uint64_t length_3block = 3 * LDS;
|
||||
|
||||
if (n_of_3blocks > 0) {
|
||||
for (uint64_t i = 0; i < n_of_3blocks; i++) {
|
||||
const double *Updates_3block = &Updates[i * length_3block];
|
||||
const uint64_t *Updates_index_3block = &Updates_index[i * 3];
|
||||
rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant);
|
||||
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||
const double* Updates_3block = &Updates[i * length_3block];
|
||||
const uint64_t* Updates_index_3block = &Updates_index[i * 3];
|
||||
qmckl_exit_code rc = qmckl_woodbury_3(
|
||||
context, LDS, Dim, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
(void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
|
||||
later = later + l;
|
||||
rc = qmckl_slagel_splitting(
|
||||
LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, later_updates + (LDS * later),
|
||||
later_index + later, &l, determinant);
|
||||
later += l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
||||
if (remainder == 2) {
|
||||
const double *Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||
const uint64_t *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||
rc = qmckl_woodbury_2(context, LDS, Dim, Updates_2block, Updates_index_2block, breakdown, Slater_inv, determinant);
|
||||
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||
const double* Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||
const uint64_t* Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||
qmckl_exit_code rc = qmckl_woodbury_2(
|
||||
context, LDS, Dim, Updates_2block, Updates_index_2block,
|
||||
breakdown, Slater_inv, determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
(void) qmckl_slagel_splitting(LDS, Dim, 2, Updates_2block, Updates_index_2block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
|
||||
later = later + l;
|
||||
rc = qmckl_slagel_splitting(
|
||||
LDS, Dim, 2, Updates_2block, Updates_index_2block, breakdown,
|
||||
Slater_inv, later_updates + (LDS * later), later_index + later,
|
||||
&l, determinant);
|
||||
later += l;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply last remaining update with slagel_splitting
|
||||
else if (remainder == 1) {
|
||||
const double *Updates_1block = &Updates[n_of_3blocks * length_3block];
|
||||
const uint64_t *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
||||
if (remainder == 1) {
|
||||
const double* Updates_1block = &Updates[n_of_3blocks * length_3block];
|
||||
const uint64_t* Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
||||
uint64_t l = 0;
|
||||
(void) qmckl_slagel_splitting(LDS, Dim, 1, Updates_1block, Updates_index_1block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
|
||||
later = later + l;
|
||||
qmckl_exit_code rc = qmckl_slagel_splitting(
|
||||
LDS, Dim, 1, Updates_1block, Updates_index_1block, breakdown,
|
||||
Slater_inv, later_updates + (LDS * later), later_index + later, &l,
|
||||
determinant);
|
||||
later += l;
|
||||
}
|
||||
|
||||
if (later > 0) {
|
||||
(void) qmckl_sherman_morrison_splitting(context, LDS, Dim, later, later_updates, later_index, breakdown, Slater_inv, determinant);
|
||||
qmckl_exit_code rc = qmckl_sherman_morrison_splitting(
|
||||
context, LDS, Dim, later, later_updates, later_index, breakdown,
|
||||
Slater_inv, determinant);
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1243,14 +1313,14 @@ assert(Updates5 != NULL);
|
||||
assert(Updates_index5 != NULL);
|
||||
assert(Slater_inv5 != NULL);
|
||||
det = -3.186005284713128e-10;
|
||||
rc = qmckl_sherman_morrison_smw32s(context, Dim, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5, &det);
|
||||
rc = qmckl_sherman_morrison_smw32s(context, LDS, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5, &det);
|
||||
assert(fabs(det + 5.260200118412903e-10) < 1e-15);
|
||||
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
res[i * Dim + j] = 0;
|
||||
for (unsigned int k = 0; k < Dim; k++) {
|
||||
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5[k * Dim + j];
|
||||
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5[k * LDS + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1329,80 +1399,83 @@ These functions can only be used internally by the kernels in this module.
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant);
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict later_updates,
|
||||
uint64_t* __restrict later_index,
|
||||
uint64_t* __restrict later,
|
||||
double* __restrict determinant);
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
#include <math.h>
|
||||
#include "qmckl.h"
|
||||
|
||||
qmckl_exit_code qmckl_slagel_splitting(uint64_t LDS,
|
||||
uint64_t Dim,
|
||||
uint64_t N_updates,
|
||||
const double *Updates,
|
||||
const uint64_t *Updates_index,
|
||||
const double breakdown,
|
||||
double *Slater_inv,
|
||||
double *later_updates,
|
||||
uint64_t *later_index,
|
||||
uint64_t *later,
|
||||
double *determinant) {
|
||||
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
|
||||
// std::cerr << "Called slagel_splitting with " << N_updates << " updates" << std::endl;
|
||||
// #endif
|
||||
uint64_t Dim,
|
||||
uint64_t N_updates,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict later_updates,
|
||||
uint64_t* __restrict later_index,
|
||||
uint64_t* __restrict later,
|
||||
double* __restrict determinant) {
|
||||
|
||||
double C[Dim];
|
||||
double D[Dim];
|
||||
double __attribute__((aligned(8))) C[LDS];
|
||||
double __attribute__((aligned(8))) D[LDS];
|
||||
|
||||
uint64_t l = 0;
|
||||
// For each update
|
||||
while (l < N_updates) {
|
||||
// C = S^{-1} x U_l
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
C[i] = 0;
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * Dim + j];
|
||||
C[i] = 0.0f;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
||||
}
|
||||
}
|
||||
|
||||
// Denominator
|
||||
double den = 1 + C[Updates_index[l] - 1];
|
||||
if (fabs(den) < breakdown) { // Here is decided to split the update, or not.
|
||||
|
||||
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve in later_updates
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
later_updates[*later * Dim + i] = Updates[l * Dim + i] / 2.0;
|
||||
C[i] /= 2.0;
|
||||
const int cui = Updates_index[l] - 1;
|
||||
double den = 1.0f + C[cui];
|
||||
if (fabs(den) < breakdown) {
|
||||
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
||||
// second halve in later_updates
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t i = 0; i < LDS; i++) {
|
||||
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
||||
C[i] *= 0.5f;
|
||||
}
|
||||
later_index[*later] = Updates_index[l];
|
||||
(*later)++;
|
||||
|
||||
den = 1 + C[Updates_index[l] - 1];
|
||||
} // From here onwards we continue with applying the first havel of the update to Slater_inv
|
||||
double iden = 1 / den;
|
||||
den = 1.0f + C[cui];
|
||||
} // From here onwards we continue with applying the first halve of the
|
||||
// update to Slater_inv
|
||||
double iden = 1.0f / den;
|
||||
|
||||
if (determinant != NULL)
|
||||
if (determinant)
|
||||
*determinant *= den;
|
||||
|
||||
// D = v^T x S^{-1}
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
D[j] = Slater_inv[(Updates_index[l] - 1) * LDS + j];
|
||||
// D = v^T x S^{-1} : 1 x LDS
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
D[j] = Slater_inv[cui * LDS + j];
|
||||
}
|
||||
|
||||
// S^{-1} = S^{-1} - C x D / den
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
for (uint64_t j = 0; j < Dim; j++) {
|
||||
double update = C[i] * D[j] * iden;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
const double update = C[i] * D[j] * iden;
|
||||
Slater_inv[i * LDS + j] -= update;
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user