Optimize WB2 by inlining matmuls and simplifying copies

This commit is contained in:
Pablo Oliveira 2021-07-08 12:53:50 +02:00
parent 61df6e2827
commit c30967e63d
3 changed files with 18 additions and 21 deletions

View File

@ -179,7 +179,7 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
// Sherman-Morrison-Woodbury kernel 4 // Sherman-Morrison-Woodbury kernel 4
// WB2, SM2 mixing scheme // WB2, SM2 mixing scheme
void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index) { void SMWB4(double *Slater_inv, const unsigned int Dim, const unsigned int N_updates, double *Updates, unsigned int *Updates_index) {
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl; std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl;
showMatrix2(Updates_index, 1, N_updates, "Updates_index"); showMatrix2(Updates_index, 1, N_updates, "Updates_index");

View File

@ -25,15 +25,8 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
std::cerr << "Called Woodbury 2x2 kernel" << std::endl; std::cerr << "Called Woodbury 2x2 kernel" << std::endl;
#endif #endif
// Compute D = V.S^{-1} const unsigned int row1 = (Updates_index[0] - 1);
double D[2 * Dim]; const unsigned int row2 = (Updates_index[1] - 1);
unsigned int row1, row2;
row1 = Updates_index[0] - 1;
row2 = Updates_index[1] - 1;
for (unsigned int i = 0; i < Dim; i++) {
D[i] = Slater_inv[row1 * Dim + i];
D[Dim + i] = Slater_inv[row2 * Dim + i];
}
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE // Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !! // OF LAYOUT OF 'Updates' !!
@ -72,17 +65,21 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
Binv[2] = -1.0 * idet * B[2]; Binv[2] = -1.0 * idet * B[2];
Binv[3] = idet * B[0]; Binv[3] = idet * B[0];
// Compute B^{-1} x D // Compute tmp = B^{-1} x (V.S^{-1})
double tmp[2 * Dim]; double tmp[2 * Dim];
matMul2(Binv, D, tmp, 2, 2, Dim); for(unsigned int i = 0; i < 2; i++) {
for(unsigned int j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 2] * Slater_inv[row1*Dim + j];
tmp[i * Dim + j] += Binv[i * 2 + 1] * Slater_inv[row2*Dim + j];
}
}
// Compute C x B^{-1} x D // Compute (S + U V)^{-1} = S^{-1} - C x tmp
double tmp2[Dim * Dim]; for(unsigned int i = 0; i < Dim; i++) {
matMul2(C, tmp, tmp2, Dim, 2, Dim); for(unsigned int j = 0; j < Dim; j++) {
Slater_inv[i * Dim + j] -= C[i * 2 ] * tmp[j];
// Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D Slater_inv[i * Dim + j] -= C[i * 2 + 1] * tmp[Dim + j];
for (unsigned int i = 0; i < Dim * Dim; i++) { }
Slater_inv[i] -= tmp2[i];
} }
return true; return true;

View File

@ -7,7 +7,7 @@
#include "Woodbury.hpp" #include "Woodbury.hpp"
#include "SMWB.hpp" #include "SMWB.hpp"
// #define PERF #define PERF
#ifdef PERF #ifdef PERF
unsigned int repetition_number; unsigned int repetition_number;