mirror of
https://github.com/TREX-CoE/Sherman-Morrison.git
synced 2025-01-12 22:18:36 +01:00
Merge pull request #47 from pablooliveira/optimize-wb2
Optimize WB2 by inlining matmuls and simplifying copies
This commit is contained in:
commit
99fcaa60b6
@ -179,7 +179,7 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
|||||||
|
|
||||||
// Sherman-Morrison-Woodbury kernel 4
|
// Sherman-Morrison-Woodbury kernel 4
|
||||||
// WB2, SM2 mixing scheme
|
// WB2, SM2 mixing scheme
|
||||||
void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index) {
|
void SMWB4(double *Slater_inv, const unsigned int Dim, const unsigned int N_updates, double *Updates, unsigned int *Updates_index) {
|
||||||
#ifdef DEBUG2
|
#ifdef DEBUG2
|
||||||
std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl;
|
std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl;
|
||||||
showMatrix2(Updates_index, 1, N_updates, "Updates_index");
|
showMatrix2(Updates_index, 1, N_updates, "Updates_index");
|
||||||
|
@ -25,15 +25,8 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
|
|||||||
std::cerr << "Called Woodbury 2x2 kernel" << std::endl;
|
std::cerr << "Called Woodbury 2x2 kernel" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Compute D = V.S^{-1}
|
const unsigned int row1 = (Updates_index[0] - 1);
|
||||||
double D[2 * Dim];
|
const unsigned int row2 = (Updates_index[1] - 1);
|
||||||
unsigned int row1, row2;
|
|
||||||
row1 = Updates_index[0] - 1;
|
|
||||||
row2 = Updates_index[1] - 1;
|
|
||||||
for (unsigned int i = 0; i < Dim; i++) {
|
|
||||||
D[i] = Slater_inv[row1 * Dim + i];
|
|
||||||
D[Dim + i] = Slater_inv[row2 * Dim + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
|
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
|
||||||
// OF LAYOUT OF 'Updates' !!
|
// OF LAYOUT OF 'Updates' !!
|
||||||
@ -72,17 +65,21 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
|
|||||||
Binv[2] = -1.0 * idet * B[2];
|
Binv[2] = -1.0 * idet * B[2];
|
||||||
Binv[3] = idet * B[0];
|
Binv[3] = idet * B[0];
|
||||||
|
|
||||||
// Compute B^{-1} x D
|
// Compute tmp = B^{-1} x (V.S^{-1})
|
||||||
double tmp[2 * Dim];
|
double tmp[2 * Dim];
|
||||||
matMul2(Binv, D, tmp, 2, 2, Dim);
|
for(unsigned int i = 0; i < 2; i++) {
|
||||||
|
for(unsigned int j = 0; j < Dim; j++) {
|
||||||
|
tmp[i * Dim + j] = Binv[i * 2] * Slater_inv[row1*Dim + j];
|
||||||
|
tmp[i * Dim + j] += Binv[i * 2 + 1] * Slater_inv[row2*Dim + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Compute C x B^{-1} x D
|
// Compute (S + U V)^{-1} = S^{-1} - C x tmp
|
||||||
double tmp2[Dim * Dim];
|
for(unsigned int i = 0; i < Dim; i++) {
|
||||||
matMul2(C, tmp, tmp2, Dim, 2, Dim);
|
for(unsigned int j = 0; j < Dim; j++) {
|
||||||
|
Slater_inv[i * Dim + j] -= C[i * 2 ] * tmp[j];
|
||||||
// Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D
|
Slater_inv[i * Dim + j] -= C[i * 2 + 1] * tmp[Dim + j];
|
||||||
for (unsigned int i = 0; i < Dim * Dim; i++) {
|
}
|
||||||
Slater_inv[i] -= tmp2[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include "Woodbury.hpp"
|
#include "Woodbury.hpp"
|
||||||
#include "SMWB.hpp"
|
#include "SMWB.hpp"
|
||||||
|
|
||||||
// #define PERF
|
#define PERF
|
||||||
|
|
||||||
#ifdef PERF
|
#ifdef PERF
|
||||||
unsigned int repetition_number;
|
unsigned int repetition_number;
|
||||||
@ -186,7 +186,7 @@ int main(int argc, char **argv) {
|
|||||||
if (argc != 6) {
|
if (argc != 6) {
|
||||||
#else
|
#else
|
||||||
if (argc != 5) {
|
if (argc != 5) {
|
||||||
#endif
|
#endif
|
||||||
std::cerr << "Execute from within 'datasets/'" << std::endl;
|
std::cerr << "Execute from within 'datasets/'" << std::endl;
|
||||||
std::cerr
|
std::cerr
|
||||||
<< "usage: test_h5 <version> <start cycle> <stop cycle> <tolerance>"
|
<< "usage: test_h5 <version> <start cycle> <stop cycle> <tolerance>"
|
||||||
|
Loading…
Reference in New Issue
Block a user