- Optimize WB3 by inlining matmuls and simplifying copies

- Occasional code restyling with 'clang-formant --style=LLVM'.
This commit is contained in:
François Coppens 2021-07-12 08:13:58 +02:00
parent bc0cd03f02
commit fa61b50bb0
12 changed files with 240 additions and 217 deletions

View File

@ -98,7 +98,8 @@ template <typename T> void matMul(T *A, T *B, T *C, unsigned int M) {
} }
template <typename T1, typename T2, typename T3> template <typename T1, typename T2, typename T3>
void matMul2(T1 *A, T2 *B, T3 *C, unsigned int M, unsigned int N, unsigned int P) { void matMul2(T1 *A, T2 *B, T3 *C, unsigned int M, unsigned int N,
unsigned int P) {
for (unsigned int i = 0; i < M; i++) { for (unsigned int i = 0; i < M; i++) {
for (unsigned int j = 0; j < P; j++) { for (unsigned int j = 0; j < P; j++) {
C[i * P + j] = 0; C[i * P + j] = 0;
@ -265,7 +266,6 @@ template <typename T> T residual_frobenius2(T *A, unsigned int Dim) {
return res; return res;
} }
template <typename T> T residual2(T *A, unsigned int Dim) { template <typename T> T residual2(T *A, unsigned int Dim) {
double res = 0.0; double res = 0.0;
for (unsigned int i = 0; i < Dim; i++) { for (unsigned int i = 0; i < Dim; i++) {

View File

@ -1,7 +1,8 @@
// Sherman-Morrison-Woodbury kernel 1 // Sherman-Morrison-Woodbury kernel 1
// WB2, WB3, SM2 mixing scheme // WB2, WB3, SM2 mixing scheme
void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SMWB1(double *Slater_inv, const unsigned int Dim,
double *Updates, unsigned int *Updates_index); const unsigned int N_updates, double *Updates,
unsigned int *Updates_index);
// // Sherman-Morrison-Woodbury kernel 2 // // Sherman-Morrison-Woodbury kernel 2
// // WB2, WB3, SM3 mixing scheme // // WB2, WB3, SM3 mixing scheme
@ -15,5 +16,6 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
// Sherman-Morrison-Woodbury kernel 4 // Sherman-Morrison-Woodbury kernel 4
// WB2, SM2 mixing scheme // WB2, SM2 mixing scheme
void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SMWB4(double *Slater_inv, const unsigned int Dim,
double *Updates, unsigned int *Updates_index); const unsigned int N_updates, double *Updates,
unsigned int *Updates_index);

View File

@ -8,7 +8,9 @@ void SM2(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index); double *Updates, unsigned int *Updates_index);
void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index, double *later_updates, unsigned int* later_index, unsigned int *later); double *Updates, unsigned int *Updates_index,
double *later_updates, unsigned int *later_index,
unsigned int *later);
// Sherman Morrison, leaving zero denominators for later // Sherman Morrison, leaving zero denominators for later
void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates,

View File

@ -1,7 +1,7 @@
// Woodbury 2x2 kernel // Woodbury 2x2 kernel
bool WB2(double *Slater_inv, unsigned int Dim, bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
double *Updates, unsigned int *Updates_index); unsigned int *Updates_index);
// Woodbury 3x3 kernel // Woodbury 3x3 kernel
bool WB3(double *Slater_inv, unsigned int Dim, bool WB3(double *Slater_inv, unsigned int Dim, double *Updates,
double *Updates, unsigned int *Updates_index); unsigned int *Updates_index);

View File

@ -1,16 +1,19 @@
#include "SMWB.hpp" #include "SMWB.hpp"
#include "Helpers.hpp"
#include "SM_Standard.hpp" #include "SM_Standard.hpp"
#include "Woodbury.hpp" #include "Woodbury.hpp"
#include "Helpers.hpp"
// #define DEBUG1 // #define DEBUG1
// #define DEBUG2 // #define DEBUG2
// Sherman-Morrison-Woodbury kernel 1 // Sherman-Morrison-Woodbury kernel 1
// WB2, WB3, SM2 mixing scheme // WB2, WB3, SM2 mixing scheme
void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index) { void SMWB1(double *Slater_inv, const unsigned int Dim,
const unsigned int N_updates, double *Updates,
unsigned int *Updates_index) {
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl; std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates
<< " updates" << std::endl;
showMatrix2(Updates_index, 1, N_updates, "Updates_index"); showMatrix2(Updates_index, 1, N_updates, "Updates_index");
showMatrix2(Updates, N_updates, Dim, "Updates"); showMatrix2(Updates, N_updates, Dim, "Updates");
#endif #endif
@ -21,7 +24,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
unsigned int length_2block = 2 * Dim; unsigned int length_2block = 2 * Dim;
unsigned int length_1block = 1 * Dim; unsigned int length_1block = 1 * Dim;
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
// Woodbury 3x3 kernel
double later_updates[Dim * N_updates]; double later_updates[Dim * N_updates];
unsigned int later_index[N_updates]; unsigned int later_index[N_updates];
unsigned int later = 0; unsigned int later = 0;
@ -33,49 +37,57 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
if (!ok) { // Send the entire block to SM2 if (!ok) { // Send the entire block to SM2
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Woodbury 3x3 kernel failed! Sending block to SM2" << std::endl; std::cerr << "Woodbury 3x3 kernel failed! Sending block to SM2"
<< std::endl;
showMatrix2(Updates_3block, 3, Dim, "Updates_3block"); showMatrix2(Updates_3block, 3, Dim, "Updates_3block");
showMatrix2(Updates_index_3block, 1, 3, "Updates_index_3block"); showMatrix2(Updates_index_3block, 1, 3, "Updates_index_3block");
#endif #endif
unsigned int l = 0; unsigned int l = 0;
SM2star(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block, later_updates + (Dim * later), later_index + later, &l); SM2star(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l; later = later + l;
} }
} }
} }
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel if (remainder ==
2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
double *Updates_2block = &Updates[n_of_3blocks * length_3block]; double *Updates_2block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
bool ok; bool ok;
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM2 if (!ok) { // Send the entire block to SM2
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2" << std::endl; std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2"
<< std::endl;
#endif #endif
unsigned int l = 0; unsigned int l = 0;
SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block, later_updates+(Dim * later), later_index + later, &l); SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l; later = later + l;
} }
} else if (remainder == 1) { // Apply last remaining update with SM2 } else if (remainder == 1) { // Apply last remaining update with SM2
double *Updates_1block = &Updates[n_of_3blocks * length_3block]; double *Updates_1block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks]; unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
unsigned int l = 0; unsigned int l = 0;
SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block, later_updates+(Dim * later), later_index + later, &l); SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l; later = later + l;
} }
if (later > 0) { if (later > 0) {
SM2(Slater_inv, Dim, later, later_updates, later_index); SM2(Slater_inv, Dim, later, later_updates, later_index);
} }
} }
// Sherman-Morrison-Woodbury kernel 4 // Sherman-Morrison-Woodbury kernel 4
// WB2, SM2 mixing scheme // WB2, SM2 mixing scheme
void SMWB4(double *Slater_inv, const unsigned int Dim, const unsigned int N_updates, double *Updates, unsigned int *Updates_index) { void SMWB4(double *Slater_inv, const unsigned int Dim,
const unsigned int N_updates, double *Updates,
unsigned int *Updates_index) {
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl; std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates
<< " updates" << std::endl;
showMatrix2(Updates_index, 1, N_updates, "Updates_index"); showMatrix2(Updates_index, 1, N_updates, "Updates_index");
showMatrix2(Updates, N_updates, Dim, "Updates"); showMatrix2(Updates, N_updates, Dim, "Updates");
#endif #endif
@ -85,7 +97,8 @@ void SMWB4(double *Slater_inv, const unsigned int Dim, const unsigned int N_upda
unsigned int length_2block = 2 * Dim; unsigned int length_2block = 2 * Dim;
unsigned int length_1block = 1 * Dim; unsigned int length_1block = 1 * Dim;
// Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with Woodbury 2x2 kernel // Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with
// Woodbury 2x2 kernel
double later_updates[Dim * N_updates]; double later_updates[Dim * N_updates];
unsigned int later_index[N_updates]; unsigned int later_index[N_updates];
unsigned int later = 0; unsigned int later = 0;
@ -97,14 +110,16 @@ void SMWB4(double *Slater_inv, const unsigned int Dim, const unsigned int N_upda
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM2 if (!ok) { // Send the entire block to SM2
#ifdef DEBUG1 #ifdef DEBUG1
std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2star" << std::endl; std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2star"
<< std::endl;
#endif #endif
#ifdef DEBUG2 #ifdef DEBUG2
showMatrix2(Updates_2block, 2, Dim, "Updates_2block"); showMatrix2(Updates_2block, 2, Dim, "Updates_2block");
showMatrix2(Updates_index_2block, 1, 2, "Updates_index_2block"); showMatrix2(Updates_index_2block, 1, 2, "Updates_index_2block");
#endif #endif
unsigned int l = 0; unsigned int l = 0;
SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block, later_updates + (Dim * later), later_index + later, &l); SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l; later = later + l;
} }
} }
@ -114,14 +129,14 @@ void SMWB4(double *Slater_inv, const unsigned int Dim, const unsigned int N_upda
double *Updates_1block = &Updates[n_of_2blocks * length_2block]; double *Updates_1block = &Updates[n_of_2blocks * length_2block];
unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks]; unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks];
unsigned int l = 0; unsigned int l = 0;
SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block, later_updates + (Dim * later), later_index + later, &l); SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block,
later_updates + (Dim * later), later_index + later, &l);
later = later + l; later = later + l;
} }
if (later > 0) { if (later > 0) {
SM2(Slater_inv, Dim, later, later_updates, later_index); SM2(Slater_inv, Dim, later, later_updates, later_index);
} }
} }
extern "C" { extern "C" {

View File

@ -123,7 +123,9 @@ void SM2(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
// Sherman Morrison, with J. Slagel splitting // Sherman Morrison, with J. Slagel splitting
// http://hdl.handle.net/10919/52966 // http://hdl.handle.net/10919/52966
void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index, double *later_updates, unsigned int* later_index, unsigned int *later) { double *Updates, unsigned int *Updates_index,
double *later_updates, unsigned int *later_index,
unsigned int *later) {
#ifdef DEBUG1 #ifdef DEBUG1
std::cerr << "Called SM2* with " << N_updates << " updates" << std::endl; std::cerr << "Called SM2* with " << N_updates << " updates" << std::endl;
#endif #endif
@ -179,7 +181,6 @@ void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
} }
} }
// Sherman Morrison, leaving zero denominators for later // Sherman Morrison, leaving zero denominators for later
void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index) { double *Updates, unsigned int *Updates_index) {

View File

@ -52,7 +52,8 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
double det = B[0] * B[3] - B[1] * B[2]; double det = B[0] * B[3] - B[1] * B[2];
if (std::fabs(det) < threshold()) { if (std::fabs(det) < threshold()) {
#ifdef DEBUG1 #ifdef DEBUG1
std::cerr << "Determinant too close to zero! No inverse found." << std::endl; std::cerr << "Determinant too close to zero! No inverse found."
<< std::endl;
std::cerr << "Determinant = " << det << std::endl; std::cerr << "Determinant = " << det << std::endl;
#endif #endif
return false; return false;
@ -96,23 +97,10 @@ bool WB3(double *Slater_inv, unsigned int Dim, double *Updates,
#ifdef DEBUG1 #ifdef DEBUG1
std::cerr << "Called Woodbury 3x3 kernel" << std::endl; std::cerr << "Called Woodbury 3x3 kernel" << std::endl;
#endif #endif
#ifdef DEBUG2
showMatrix2(Slater_inv, Dim, Dim, "Slater_inv BEFORE update");
showMatrix2(Updates, 3, Dim, "Updates");
showMatrix2(Updates_index, 1, 3, "Updates_index");
#endif
// Compute D = V * S^{-1} const unsigned int row1 = (Updates_index[0] - 1);
double D[3 * Dim]; const unsigned int row2 = (Updates_index[1] - 1);
unsigned int row1, row2, row3; const unsigned int row3 = (Updates_index[2] - 1);
row1 = Updates_index[0] - 1;
row2 = Updates_index[1] - 1;
row3 = Updates_index[2] - 1;
for (unsigned int i = 0; i < Dim; i++) {
D[i] = Slater_inv[row1 * Dim + i];
D[Dim + i] = Slater_inv[row2 * Dim + i];
D[2 * Dim + i] = Slater_inv[row3 * Dim + i];
}
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE // Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !! // OF LAYOUT OF 'Updates' !!
@ -128,9 +116,6 @@ bool WB3(double *Slater_inv, unsigned int Dim, double *Updates,
#ifdef DEBUG2 #ifdef DEBUG2
showMatrix2(C, Dim, 3, "C = S_inv * U"); showMatrix2(C, Dim, 3, "C = S_inv * U");
#endif
#ifdef DEBUG2
showMatrix2(D, 3, Dim, "D = V * S_inv"); showMatrix2(D, 3, Dim, "D = V * S_inv");
#endif #endif
@ -154,14 +139,14 @@ bool WB3(double *Slater_inv, unsigned int Dim, double *Updates,
// Check if determinant of B is not too close to zero // Check if determinant of B is not too close to zero
double det; double det;
det = B[0] * (B[4] * B[8] - B[5] * B[7]) - det = B[0] * (B[4] * B[8] - B[5] * B[7]) -
B[1] * (B[3] * B[8] - B[5] * B[6]) + B[1] * (B[3] * B[8] - B[5] * B[6]) + B[2] * (B[3] * B[7] - B[4] * B[6]);
B[2] * (B[3] * B[7] - B[4] * B[6]);
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Determinant of B = " << det << std::endl; std::cerr << "Determinant of B = " << det << std::endl;
#endif #endif
if (std::fabs(det) < threshold()) { if (std::fabs(det) < threshold()) {
#ifdef DEBUG1 #ifdef DEBUG1
std::cerr << "Determinant too close to zero! No inverse found." << std::endl; std::cerr << "Determinant too close to zero! No inverse found."
<< std::endl;
std::cerr << "Determinant = " << det << std::endl; std::cerr << "Determinant = " << det << std::endl;
#endif #endif
return false; return false;
@ -180,29 +165,32 @@ bool WB3(double *Slater_inv, unsigned int Dim, double *Updates,
Binv[8] = (B[0] * B[4] - B[3] * B[1]) * idet; Binv[8] = (B[0] * B[4] - B[3] * B[1]) * idet;
#ifdef DEBUG2 #ifdef DEBUG2
std::cerr << "Conditioning number of B = " << condition1(B, Binv, 3) << std::endl; std::cerr << "Conditioning number of B = " << condition1(B, Binv, 3)
<< std::endl;
showMatrix2(Binv, 3, 3, "Binv"); showMatrix2(Binv, 3, 3, "Binv");
#endif #endif
// Compute B^{-1} x D // Compute tmp = B^{-1} x (V.S^{-1})
double tmp[3 * Dim]; double tmp[3 * Dim];
matMul2(Binv, D, tmp, 3, 3, Dim); for (unsigned int i = 0; i < 3; i++) {
for (unsigned int j = 0; j < Dim; j++) {
tmp[i * Dim + j] = Binv[i * 3] * Slater_inv[row1 * Dim + j];
tmp[i * Dim + j] += Binv[i * 3 + 1] * Slater_inv[row2 * Dim + j];
tmp[i * Dim + j] += Binv[i * 3 + 2] * Slater_inv[row3 * Dim + j];
}
}
#ifdef DEBUG2 #ifdef DEBUG2
showMatrix2(tmp, 3, Dim, "tmp = Binv * D"); showMatrix2(tmp, 3, Dim, "tmp = Binv * D");
#endif #endif
// Compute C x B^{-1} x D // Compute (S + U V)^{-1} = S^{-1} - C x tmp
double tmp2[Dim * Dim]; for (unsigned int i = 0; i < Dim; i++) {
matMul2(C, tmp, tmp2, Dim, 3, Dim); for (unsigned int j = 0; j < Dim; j++) {
Slater_inv[i * Dim + j] -= C[i * 3] * tmp[j];
#ifdef DEBUG2 Slater_inv[i * Dim + j] -= C[i * 3 + 1] * tmp[Dim + j];
showMatrix2(tmp2, Dim, Dim, "tmp2 = C * tmp"); Slater_inv[i * Dim + j] -= C[i * 3 + 2] * tmp[2 * Dim + j];
#endif }
// Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D
for (unsigned int i = 0; i < Dim * Dim; i++) {
Slater_inv[i] -= tmp2[i];
} }
#ifdef DEBUG2 #ifdef DEBUG2

View File

@ -2,10 +2,10 @@
#include "hdf5/serial/hdf5.h" #include "hdf5/serial/hdf5.h"
#include "Helpers.hpp" #include "Helpers.hpp"
#include "SMWB.hpp"
#include "SM_Maponi.hpp" #include "SM_Maponi.hpp"
#include "SM_Standard.hpp" #include "SM_Standard.hpp"
#include "Woodbury.hpp" #include "Woodbury.hpp"
#include "SMWB.hpp"
#include <fstream> #include <fstream>
#include <vector> #include <vector>
@ -86,48 +86,57 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
double *slater_inverse_nonpersistent = new double[dim * dim]; double *slater_inverse_nonpersistent = new double[dim * dim];
if (version == "sm1") { if (version == "sm1") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} }
} else if (version == "sm2") { } else if (version == "sm2") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
SM2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SM2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} }
} else if (version == "sm3") { } else if (version == "sm3") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
SM3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SM3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} }
} else if (version == "sm4") { } else if (version == "sm4") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
SM4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SM4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} }
} else if (version == "wb2") { } else if (version == "wb2") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
WB2(slater_inverse_nonpersistent, dim, u, col_update_index); WB2(slater_inverse_nonpersistent, dim, u, col_update_index);
} }
} else if (version == "wb3") { } else if (version == "wb3") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
WB3(slater_inverse_nonpersistent, dim, u, col_update_index); WB3(slater_inverse_nonpersistent, dim, u, col_update_index);
} }
} else if (version == "smwb1") { } else if (version == "smwb1") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} }
} else if (version == "smwb4") { } else if (version == "smwb4") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} }
#ifdef MKL #ifdef MKL
} else if (version == "lapack") { } else if (version == "lapack") {
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_matrix, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_matrix,
dim * dim * sizeof(double));
inverse(slater_inverse_nonpersistent, dim); inverse(slater_inverse_nonpersistent, dim);
} }
#endif // MKL #endif // MKL
@ -135,7 +144,8 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
std::cerr << "Unknown version " << version << std::endl; std::cerr << "Unknown version " << version << std::endl;
exit(1); exit(1);
} }
std::memcpy(slater_inverse, slater_inverse_nonpersistent, dim * dim * sizeof(double)); std::memcpy(slater_inverse, slater_inverse_nonpersistent,
dim * dim * sizeof(double));
delete[] slater_inverse_nonpersistent; delete[] slater_inverse_nonpersistent;
#else // No performance measurements repetition #else // No performance measurements repetition
if (version == "maponia3") { if (version == "maponia3") {
@ -210,8 +220,7 @@ int main(int argc, char **argv) {
#else #else
if (argc != 4) { if (argc != 4) {
std::cerr << "Execute from within 'datasets/'" << std::endl; std::cerr << "Execute from within 'datasets/'" << std::endl;
std::cerr std::cerr << "usage: test_h5 <version> <cycle file> <tolerance>"
<< "usage: test_h5 <version> <cycle file> <tolerance>"
<< std::endl; << std::endl;
return 1; return 1;
} }
@ -221,7 +230,8 @@ int main(int argc, char **argv) {
std::ifstream cyclefile(cyclefile_name); std::ifstream cyclefile(cyclefile_name);
std::vector<int> cycles; std::vector<int> cycles;
unsigned int cycle; unsigned int cycle;
while (cyclefile >> cycle) cycles.push_back(cycle); while (cyclefile >> cycle)
cycles.push_back(cycle);
double tolerance = std::stod(argv[3]); double tolerance = std::stod(argv[3]);
H5File file(FILE_NAME, H5F_ACC_RDONLY); H5File file(FILE_NAME, H5F_ACC_RDONLY);

View File

@ -2,10 +2,10 @@
#include "hdf5/serial/hdf5.h" #include "hdf5/serial/hdf5.h"
#include "Helpers.hpp" #include "Helpers.hpp"
#include "SMWB.hpp"
#include "SM_Maponi.hpp" #include "SM_Maponi.hpp"
#include "SM_Standard.hpp" #include "SM_Standard.hpp"
#include "Woodbury.hpp" #include "Woodbury.hpp"
#include "SMWB.hpp"
#define PERF #define PERF
@ -82,11 +82,14 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
std::cout << "# of reps. = " << repetition_number << std::endl; std::cout << "# of reps. = " << repetition_number << std::endl;
double *slater_inverse_nonpersistent = new double[dim * dim]; double *slater_inverse_nonpersistent = new double[dim * dim];
for (unsigned int i = 0; i < repetition_number; i++) { for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); std::memcpy(slater_inverse_nonpersistent, slater_inverse,
dim * dim * sizeof(double));
if (version == "maponia3") { if (version == "maponia3") {
MaponiA3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); MaponiA3(slater_inverse_nonpersistent, dim, nupdates, u,
col_update_index);
} else if (version == "maponia3s") { } else if (version == "maponia3s") {
MaponiA3S(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); MaponiA3S(slater_inverse_nonpersistent, dim, nupdates, u,
col_update_index);
} else if (version == "sm1") { } else if (version == "sm1") {
SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "sm2") { } else if (version == "sm2") {
@ -102,14 +105,17 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
} else if (version == "smwb1") { } else if (version == "smwb1") {
SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
// } else if (version == "smwb2") { // } else if (version == "smwb2") {
// SMWB2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); // SMWB2(slater_inverse_nonpersistent, dim, nupdates, u,
// col_update_index);
// } else if (version == "smwb3") { // } else if (version == "smwb3") {
// SMWB3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); // SMWB3(slater_inverse_nonpersistent, dim, nupdates, u,
// col_update_index);
} else if (version == "smwb4") { } else if (version == "smwb4") {
SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
#ifdef MKL #ifdef MKL
} else if (version == "lapack") { } else if (version == "lapack") {
memcpy(slater_inverse_nonpersistent, slater_matrix, dim * dim * sizeof(double)); memcpy(slater_inverse_nonpersistent, slater_matrix,
dim * dim * sizeof(double));
inverse(slater_inverse_nonpersistent, dim); inverse(slater_inverse_nonpersistent, dim);
#endif // MKL #endif // MKL
} else { } else {
@ -117,7 +123,8 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
exit(1); exit(1);
} }
} }
std::memcpy(slater_inverse, slater_inverse_nonpersistent, dim * dim * sizeof(double)); std::memcpy(slater_inverse, slater_inverse_nonpersistent,
dim * dim * sizeof(double));
delete[] slater_inverse_nonpersistent; delete[] slater_inverse_nonpersistent;
#else #else
if (version == "maponia3") { if (version == "maponia3") {

View File

@ -174,9 +174,7 @@ int main(int argc, char **argv) {
} else { } else {
std::cerr << "failed -- cycle " << std::to_string(i) << std::endl; std::cerr << "failed -- cycle " << std::to_string(i) << std::endl;
} }
} }
} }
vfc_dump_probes(&probes); vfc_dump_probes(&probes);

View File

@ -11,5 +11,5 @@ fi
for ext in c cc cpp h hpp for ext in c cc cpp h hpp
do do
find $SMROOT -type f -iname "*.${ext}" -exec echo "$FORMATER $STYLE" {} \; find $SMROOT -type f -iname "*.${ext}" -exec $FORMATER $STYLE {} \;
done done