1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-11-19 20:42:50 +01:00

Merge pull request #99 from fmgjcoppens/master

Various SIMD_LENGTH related issues.
This commit is contained in:
Anthony Scemama 2023-01-26 14:07:29 +01:00 committed by GitHub
commit 728a81f96b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 306 additions and 191 deletions

View File

@ -151,15 +151,15 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
double __attribute__((aligned(8))) C[Dim];
double __attribute__((aligned(8))) D[LDS];
uint32_t l = 0;
uint64_t l = 0;
// For each update
while (l < N_updates) {
// C = S^{-1} x u_l
for (uint32_t i = 0; i < Dim; i++) {
for (uint64_t i = 0; i < Dim; i++) {
C[i] = 0.0f;
IVDEP
ALIGNED
for (uint32_t j = 0; j < LDS; j++) {
for (uint64_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
}
}
@ -180,15 +180,15 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
// selecting column: v_l^T * S_inv
IVDEP
ALIGNED
for (uint32_t j = 0; j < LDS; j++) {
for (uint64_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j];
}
// A^{-1} = A^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) {
for (uint64_t i = 0; i < Dim; i++) {
IVDEP
ALIGNED
for (uint32_t j = 0; j < LDS; j++) {
for (uint64_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
@ -413,6 +413,7 @@ The tests for the kernels are executed on datasets that are extracted from a run
#+begin_src c :tangle (eval c_test)
const uint64_t Dim = 21;
const uint64_t LDS = (1 + (Dim) / SIMD_LENGTH) * SIMD_LENGTH;
const double breakdown = 1e-3;
const double tolerance = 1e-3;
double res[441];
@ -425,7 +426,15 @@ assert(Slater_inv1 != NULL);
// original determinant of Slater1 (before applying updates)
double det = 3.407025646103221e-10;
rc = qmckl_sherman_morrison(context, Dim, Dim, N_updates1, Updates1, Updates_index1, breakdown, Slater_inv1, &det);
rc = qmckl_sherman_morrison(context,
LDS,
Dim,
N_updates1,
Updates1,
Updates_index1,
breakdown,
Slater_inv1,
&det);
// Check that the determinant is updated properly
assert(fabs(det + 4.120398385068217e-10) < 1e-15);
@ -434,7 +443,7 @@ for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * Dim + j];
res[i * Dim + j] += Slater1[i * Dim + k] * Slater_inv1[k * LDS + j];
}
}
}
@ -519,10 +528,6 @@ assert(rc == QMCKL_SUCCESS);
*** C source
#+begin_src c :tangle (eval c) :comments org
#include <stdbool.h>
#include <math.h>
#include "qmckl.h"
qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -538,25 +543,29 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return QMCKL_NULL_CONTEXT;
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[2 * Dim];
// Compute C = (S^T)^{-1}U : Dim x 2
double __attribute__((aligned(8))) C[2 * Dim];
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < 2; j++) {
C[i * 2 + j] = 0;
for (uint64_t k = 0; k < Dim; k++) {
C[i * 2 + j] += Slater_inv[i * LDS + k] * Updates[Dim * j + k];
}
C[i * 2] = 0;
C[i * 2 + 1] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < LDS; k++) {
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
}
}
// Compute B = 1 + V * C
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
@ -569,30 +578,34 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
}
// Update det(S) when passed
if (determinant != NULL)
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2x2 inversion
double Binv[4], idet = 1.0 / det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// Compute tmp = B^{-1} x (V.S^{-1})
double tmp[2 * Dim];
for (uint64_t i = 0; i < 2; i++) {
for (uint64_t j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 2] * Slater_inv[row1 * LDS + j];
tmp[i * Dim + j] += Binv[i * 2 + 1] * Slater_inv[row2 * LDS + j];
}
// tmp = B^{-1}D : 2 x LDS
double __attribute__((aligned(8))) tmp[2 * LDS];
double* r1dim = &(Slater_inv[row1 * LDS]);
double* r2dim = &(Slater_inv[row2 * LDS]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[Dim + j];
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
}
}
@ -644,13 +657,13 @@ assert(Updates2 != NULL);
assert(Updates_index2 != NULL);
assert(Slater_inv2 != NULL);
det = -1.4432116661319376e-11;
rc = qmckl_woodbury_2(context, Dim, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
rc = qmckl_woodbury_2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
assert(fabs(det-2.367058141251457e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * Dim + j];
res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j];
}
}
}
@ -687,8 +700,8 @@ assert(rc == QMCKL_SUCCESS);
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
from applying the updates to the original matrix.
#pragma ivdep
#pragma vector aligned
#+NAME: qmckl_woodbury_3_args
| qmckl_context | context | in | Global state |
@ -730,10 +743,6 @@ assert(rc == QMCKL_SUCCESS);
*** C source
#+begin_src c :tangle (eval c) :comments org
#include <stdbool.h>
#include <math.h>
#include "qmckl.h"
qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -749,26 +758,32 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
,*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return QMCKL_NULL_CONTEXT;
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_3",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
const uint64_t row3 = (Updates_index[2] - 1);
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[3 * Dim];
// Compute C = (S^T)^{-1}U : Dim x 3
double __attribute__((aligned(8))) C[3 * Dim];
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < 3; j++) {
C[i * 3 + j] = 0;
for (uint64_t k = 0; k < Dim; k++) {
C[i * 3 + j] += Slater_inv[i * LDS + k] * Updates[Dim * j + k];
}
C[i * 3] = 0;
C[i * 3 + 1] = 0;
C[i * 3 + 2] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < LDS; k++) {
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
}
}
// Compute B = 1 + V.C
// Compute B = 1 + VC : 3 x 3
const double B0 = C[row1 * 3] + 1;
const double B1 = C[row1 * 3 + 1];
const double B2 = C[row1 * 3 + 2];
@ -788,11 +803,11 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
}
// Update det(Slater) if passed
if (determinant != NULL)
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 3x3 inversion
double Binv[9], idet = 1.0 / det;
// Compute B^{-1} with explicit formula for 3 x 3 inversion
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
Binv[0] = (B4 * B8 - B7 * B5) * idet;
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
Binv[2] = (B1 * B5 - B4 * B2) * idet;
@ -803,22 +818,29 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
Binv[8] = (B0 * B4 - B3 * B1) * idet;
// Compute tmp = B^{-1} x (V.S^{-1})
double tmp[3 * Dim];
for (uint64_t i = 0; i < 3; i++) {
for (uint64_t j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 3] * Slater_inv[row1 * LDS + j];
tmp[i * Dim + j] += Binv[i * 3 + 1] * Slater_inv[row2 * LDS + j];
tmp[i * Dim + j] += Binv[i * 3 + 2] * Slater_inv[row3 * LDS + j];
}
// tmp = B^{-1}D : 3 x LDS
double __attribute__((aligned(8))) tmp[3 * LDS];
double* r1dim = &(Slater_inv[row1 * LDS]);
double* r2dim = &(Slater_inv[row2 * LDS]);
double* r3dim = &(Slater_inv[row3 * LDS]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
tmp[LDS + j] =
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
tmp[2 * LDS + j] =
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
}
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[Dim + j];
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * Dim + j];
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
}
}
@ -870,13 +892,13 @@ assert(Updates3 != NULL);
assert(Updates_index3 != NULL);
assert(Slater_inv3_1 != NULL);
det = -1.23743195512859e-09;
rc = qmckl_woodbury_3(context, Dim, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
rc = qmckl_woodbury_3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * Dim + j];
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j];
}
}
}
@ -963,9 +985,6 @@ assert(rc == QMCKL_SUCCESS);
*** C source
#+begin_src c :tangle (eval c) :comments org
#include <stdbool.h>
#include "qmckl.h"
qmckl_exit_code qmckl_sherman_morrison_splitting(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -977,19 +996,24 @@ qmckl_exit_code qmckl_sherman_morrison_splitting(const qmckl_context context,
double* determinant) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return QMCKL_NULL_CONTEXT;
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_sherman_morrison_splitting",
NULL);
}
double later_updates[Dim * N_updates];
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
uint64_t later_index[N_updates];
uint64_t later = 0;
(void) qmckl_slagel_splitting(LDS, Dim, N_updates, Updates, Updates_index,
breakdown, Slater_inv, later_updates, later_index, &later, determinant);
(void) qmckl_slagel_splitting(
LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv,
later_updates, later_index, &later, determinant);
if (later > 0) {
(void) qmckl_sherman_morrison_splitting(context, LDS, Dim, later,
later_updates, later_index, breakdown, Slater_inv, determinant);
(void) qmckl_sherman_morrison_splitting(
context, LDS, Dim, later, later_updates, later_index, breakdown,
Slater_inv, determinant);
}
return QMCKL_SUCCESS;
@ -1041,13 +1065,13 @@ assert(Updates3 != NULL);
assert(Updates_index3 != NULL);
assert(Slater_inv3_2 != NULL);
det = -1.23743195512859e-09;
rc = qmckl_sherman_morrison_splitting(context, Dim, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
rc = qmckl_sherman_morrison_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det);
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * Dim + j];
res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j];
}
}
}
@ -1127,9 +1151,6 @@ assert(rc == QMCKL_SUCCESS);
*** C source
#+begin_src c :tangle (eval c) :comments org
#include <stdbool.h>
#include "qmckl.h"
qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -1141,59 +1162,108 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
double* determinant) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return QMCKL_NULL_CONTEXT;
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_sherman_morrison_smw32s",
NULL);
}
qmckl_exit_code rc;
uint64_t n_of_3blocks = N_updates / 3;
uint64_t remainder = N_updates % 3;
uint64_t length_3block = 3 * Dim;
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
// Woodbury 3x3 kernel
double later_updates[Dim * N_updates];
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
uint64_t later_index[N_updates];
uint64_t later = 0;
// Special case for 4 rank-1 updates: 2+2
if (N_updates == 4) {
qmckl_exit_code rc =
qmckl_woodbury_2(context, LDS, Dim, Updates, Updates_index,
breakdown, Slater_inv, determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates, Updates_index,
breakdown, Slater_inv,
later_updates + (LDS * later),
later_index + later, &l, determinant);
later += l;
}
rc = qmckl_woodbury_2(context, LDS, Dim, &Updates[2 * LDS],
&Updates_index[2], breakdown, Slater_inv,
determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
uint64_t l = 0;
rc = qmckl_slagel_splitting(
LDS, Dim, 2, &Updates[2 * LDS], &Updates_index[2], breakdown,
Slater_inv, later_updates + (LDS * later), later_index + later,
&l, determinant);
later += l;
}
if (later > 0) {
rc = qmckl_sherman_morrison_splitting(
context, LDS, Dim, later, later_updates, later_index, breakdown,
Slater_inv, determinant);
}
return QMCKL_SUCCESS;
}
// And for the other cases != 4
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates
// with Woodbury 3x3 kernel
uint64_t n_of_3blocks = N_updates / 3;
uint64_t remainder = N_updates % 3;
uint64_t length_3block = 3 * LDS;
if (n_of_3blocks > 0) {
for (uint64_t i = 0; i < n_of_3blocks; i++) {
const double *Updates_3block = &Updates[i * length_3block];
const uint64_t *Updates_index_3block = &Updates_index[i * 3];
rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
const double* Updates_3block = &Updates[i * length_3block];
const uint64_t* Updates_index_3block = &Updates_index[i * 3];
qmckl_exit_code rc = qmckl_woodbury_3(
context, LDS, Dim, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
uint64_t l = 0;
(void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
later = later + l;
rc = qmckl_slagel_splitting(
LDS, Dim, 3, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, later_updates + (LDS * later),
later_index + later, &l, determinant);
later += l;
}
}
}
// Apply last remaining block of 2 updates with Woodbury 2x2 kernel
if (remainder == 2) {
const double *Updates_2block = &Updates[n_of_3blocks * length_3block];
const uint64_t *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
rc = qmckl_woodbury_2(context, LDS, Dim, Updates_2block, Updates_index_2block, breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
const double* Updates_2block = &Updates[n_of_3blocks * length_3block];
const uint64_t* Updates_index_2block = &Updates_index[3 * n_of_3blocks];
qmckl_exit_code rc = qmckl_woodbury_2(
context, LDS, Dim, Updates_2block, Updates_index_2block,
breakdown, Slater_inv, determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
uint64_t l = 0;
(void) qmckl_slagel_splitting(LDS, Dim, 2, Updates_2block, Updates_index_2block,
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
later = later + l;
rc = qmckl_slagel_splitting(
LDS, Dim, 2, Updates_2block, Updates_index_2block, breakdown,
Slater_inv, later_updates + (LDS * later), later_index + later,
&l, determinant);
later += l;
}
}
// Apply last remaining update with slagel_splitting
else if (remainder == 1) {
const double *Updates_1block = &Updates[n_of_3blocks * length_3block];
const uint64_t *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
if (remainder == 1) {
const double* Updates_1block = &Updates[n_of_3blocks * length_3block];
const uint64_t* Updates_index_1block = &Updates_index[3 * n_of_3blocks];
uint64_t l = 0;
(void) qmckl_slagel_splitting(LDS, Dim, 1, Updates_1block, Updates_index_1block,
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
later = later + l;
(void) qmckl_slagel_splitting(
LDS, Dim, 1, Updates_1block, Updates_index_1block, breakdown,
Slater_inv, later_updates + (LDS * later), later_index + later, &l,
determinant);
later += l;
}
if (later > 0) {
(void) qmckl_sherman_morrison_splitting(context, LDS, Dim, later, later_updates, later_index, breakdown, Slater_inv, determinant);
(void) qmckl_sherman_morrison_splitting(
context, LDS, Dim, later, later_updates, later_index, breakdown,
Slater_inv, determinant);
}
return QMCKL_SUCCESS;
}
@ -1243,14 +1313,14 @@ assert(Updates5 != NULL);
assert(Updates_index5 != NULL);
assert(Slater_inv5 != NULL);
det = -3.186005284713128e-10;
rc = qmckl_sherman_morrison_smw32s(context, Dim, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5, &det);
rc = qmckl_sherman_morrison_smw32s(context, LDS, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5, &det);
assert(fabs(det + 5.260200118412903e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (unsigned int k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5[k * Dim + j];
res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5[k * LDS + j];
}
}
}
@ -1342,67 +1412,70 @@ These functions can only be used internally by the kernels in this module.
*** C source
#+begin_src c :tangle (eval c) :comments org
#include <stdbool.h>
#include <math.h>
#include "qmckl.h"
qmckl_exit_code qmckl_slagel_splitting(uint64_t LDS,
uint64_t Dim,
uint64_t N_updates,
const double *Updates,
const uint64_t *Updates_index,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double *Slater_inv,
double *later_updates,
uint64_t *later_index,
uint64_t *later,
double *determinant) {
// #ifdef DEBUG // Leave commented out since debugging information is not yet implemented in QMCkl.
// std::cerr << "Called slagel_splitting with " << N_updates << " updates" << std::endl;
// #endif
double* Slater_inv,
double* later_updates,
uint64_t* later_index,
uint64_t* later,
double* determinant) {
double C[Dim];
double D[Dim];
double __attribute__((aligned(8))) C[LDS];
double __attribute__((aligned(8))) D[LDS];
uint64_t l = 0;
// For each update
while (l < N_updates) {
// C = S^{-1} x U_l
for (uint64_t i = 0; i < Dim; i++) {
C[i] = 0;
for (uint64_t j = 0; j < Dim; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * Dim + j];
C[i] = 0.0f;
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
}
}
// Denominator
double den = 1 + C[Updates_index[l] - 1];
if (fabs(den) < breakdown) { // Here is decided to split the update, or not.
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve in later_updates
for (uint64_t i = 0; i < Dim; i++) {
later_updates[*later * Dim + i] = Updates[l * Dim + i] / 2.0;
C[i] /= 2.0;
const int cui = Updates_index[l] - 1;
double den = 1.0f + C[cui];
if (fabs(den) < breakdown) {
// U_l = U_l / 2: split the update in 2 equal halves and save the
// second halve in later_updates
IVDEP
ALIGNED
for (uint64_t i = 0; i < LDS; i++) {
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1 + C[Updates_index[l] - 1];
} // From here onwards we continue with applying the first havel of the update to Slater_inv
double iden = 1 / den;
den = 1.0f + C[cui];
} // From here onwards we continue with applying the first halve of the
// update to Slater_inv
double iden = 1.0f / den;
if (determinant != NULL)
if (determinant)
*determinant *= den;
// D = v^T x S^{-1}
for (uint64_t j = 0; j < Dim; j++) {
D[j] = Slater_inv[(Updates_index[l] - 1) * LDS + j];
// D = v^T x S^{-1} : 1 x LDS
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j];
}
// S^{-1} = S^{-1} - C x D / den
for (uint64_t i = 0; i < Dim; i++) {
for (uint64_t j = 0; j < Dim; j++) {
double update = C[i] * D[j] * iden;
IVDEP
ALIGNED
for (uint64_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
}

File diff suppressed because one or more lines are too long