diff --git a/Makefile b/Makefile index aa2d9b6..4715fba 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ ifeq ($(ENV),INTEL) CXX = icpx FC = ifort - ARCH = -march=native + ARCH = -xCORE-AVX2 OPT = -O3 DEBUG = -g -debug full else ifeq ($(ENV),LLVM) @@ -14,8 +14,9 @@ else ifeq ($(ENV),LLVM) else ifeq ($(ENV),GNU) CXX = g++ FC = gfortran - ARCH = -mavx - OPT = -O3 + # ARCH = -mavx + ARCH = + OPT = -O1 DEBUG = -g else $(error No valid compiler environment set in $$ENV. \ diff --git a/include/Helpers.hpp b/include/Helpers.hpp index 41b320a..9ada9a1 100644 --- a/include/Helpers.hpp +++ b/include/Helpers.hpp @@ -8,6 +8,9 @@ #include #endif +#include + + // #define DEBUG #ifndef THRESHOLD #define THRESHOLD 1e-3 @@ -86,7 +89,8 @@ template T *transpose(T *A, unsigned int M) { return B; } -template void matMul(T *A, T *B, T *C, unsigned int M) { +template +void matMul(T *A, T *B, T *C, unsigned int M) { memset(C, 0, M * M * sizeof(T)); for (unsigned int i = 0; i < M; i++) { for (unsigned int j = 0; j < M; j++) { @@ -263,7 +267,7 @@ template T residual_frobenius2(T *A, unsigned int Dim) { res += delta * delta; } } - return res; + return sqrt(res); } template T residual2(T *A, unsigned int Dim) { diff --git a/include/SMWB.hpp b/include/SMWB.hpp index b117aef..90206ab 100644 --- a/include/SMWB.hpp +++ b/include/SMWB.hpp @@ -1,11 +1,17 @@ -// Sherman-Morrison-Woodbury kernel 1 -// WB2, WB3, SM2 mixing scheme -void SMWB1(double *Slater_inv, const unsigned int Dim, - const unsigned int N_updates, double *Updates, - unsigned int *Updates_index); - -// Sherman-Morrison-Woodbury kernel 2 +// Sherman-Morrison-Woodbury kernel WB2s // WB2, SM2 mixing scheme -void SMWB2(double *Slater_inv, const unsigned int Dim, +void WB2s(double *Slater_inv, const unsigned int Dim, const unsigned int N_updates, double *Updates, - unsigned int *Updates_index); + unsigned int *Updates_index, const double breakdown); + +// Sherman-Morrison-Woodbury kernel WB3s +// WB3, SM2 mixing scheme +void WB3s(double *Slater_inv, const unsigned int Dim, + const unsigned int N_updates, double *Updates, + unsigned int *Updates_index, const double breakdown); + +// Sherman-Morrison-Woodbury kernel WB32s +// WB3, WB2, SM2 mixing scheme +void WB32s(double *Slater_inv, const unsigned int Dim, + const unsigned int N_updates, double *Updates, + unsigned int *Updates_index, const double breakdown); diff --git a/include/SM_Standard.hpp b/include/SM_Standard.hpp index c004980..cd18954 100644 --- a/include/SM_Standard.hpp +++ b/include/SM_Standard.hpp @@ -1,22 +1,22 @@ // Naïve Sherman Morrison void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index); + double *Updates, unsigned int *Updates_index, const double breakdown); // Sherman Morrison, with J. Slagel splitting // http://hdl.handle.net/10919/52966 void SM2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index); + double *Updates, unsigned int *Updates_index, const double breakdown); void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index, double *later_updates, unsigned int *later_index, - unsigned int *later); + unsigned int *later, const double breakdown); // Sherman Morrison, leaving zero denominators for later void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index); + double *Updates, unsigned int *Updates_index, const double breakdown); // Sherman Morrison (SM3+SM2), leaving zero denominators for later (SM3), and // when none are left falling back on Splitting (SM2) void SM4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index); + double *Updates, unsigned int *Updates_index, const double breakdown); diff --git a/include/Woodbury.hpp b/include/Woodbury.hpp index 4ba72e6..93720fc 100644 --- a/include/Woodbury.hpp +++ b/include/Woodbury.hpp @@ -1,7 +1,7 @@ // Woodbury 2x2 kernel bool WB2(double *Slater_inv, const unsigned int Dim, double *Updates, - const unsigned int *Updates_index); + const unsigned int *Updates_index, const double breakdown); // Woodbury 3x3 kernel bool WB3(double *Slater_inv, const unsigned int Dim, double *Updates, - const unsigned int *Updates_index); + const unsigned int *Updates_index, const double breakdown); diff --git a/qmckl b/qmckl index 41be86f..a5e58c8 160000 --- a/qmckl +++ b/qmckl @@ -1 +1 @@ -Subproject commit 41be86fe594f561247986cd75dc4557e0bc5e6de +Subproject commit a5e58c80d70978c12d24ff47b2362c9145af325c diff --git a/src/SMWB.cpp b/src/SMWB.cpp index 4006cf8..e224efb 100644 --- a/src/SMWB.cpp +++ b/src/SMWB.cpp @@ -3,88 +3,14 @@ #include "SM_Standard.hpp" #include "Woodbury.hpp" -// #define DEBUG1 +#define DEBUG1 // #define DEBUG2 -// Sherman-Morrison-Woodbury kernel 1 -// WB2, WB3, SM2 mixing scheme -void SMWB1(double *Slater_inv, const unsigned int Dim, - const unsigned int N_updates, double *Updates, - unsigned int *Updates_index) { -#ifdef DEBUG2 - std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates - << " updates" << std::endl; - showMatrix2(Updates_index, 1, N_updates, "Updates_index"); - showMatrix2(Updates, N_updates, Dim, "Updates"); -#endif - - unsigned int n_of_3blocks = N_updates / 3; - unsigned int remainder = N_updates % 3; - unsigned int length_3block = 3 * Dim; - unsigned int length_2block = 2 * Dim; - unsigned int length_1block = 1 * Dim; - - // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with - // Woodbury 3x3 kernel - double later_updates[Dim * N_updates]; - unsigned int later_index[N_updates]; - unsigned int later = 0; - if (n_of_3blocks > 0) { - for (unsigned int i = 0; i < n_of_3blocks; i++) { - double *Updates_3block = &Updates[i * length_3block]; - unsigned int *Updates_index_3block = &Updates_index[i * 3]; - bool ok; - ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); - if (!ok) { // Send the entire block to SM2 -#ifdef DEBUG2 - std::cerr << "Woodbury 3x3 kernel failed! Sending block to SM2" - << std::endl; - showMatrix2(Updates_3block, 3, Dim, "Updates_3block"); - showMatrix2(Updates_index_3block, 1, 3, "Updates_index_3block"); -#endif - unsigned int l = 0; - SM2star(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block, - later_updates + (Dim * later), later_index + later, &l); - later = later + l; - } - } - } - - if (remainder == - 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel - double *Updates_2block = &Updates[n_of_3blocks * length_3block]; - unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; - bool ok; - ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); - if (!ok) { // Send the entire block to SM2 -#ifdef DEBUG2 - std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2" - << std::endl; -#endif - unsigned int l = 0; - SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block, - later_updates + (Dim * later), later_index + later, &l); - later = later + l; - } - } else if (remainder == 1) { // Apply last remaining update with SM2 - double *Updates_1block = &Updates[n_of_3blocks * length_3block]; - unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks]; - unsigned int l = 0; - SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block, - later_updates + (Dim * later), later_index + later, &l); - later = later + l; - } - - if (later > 0) { - SM2(Slater_inv, Dim, later, later_updates, later_index); - } -} - -// Sherman-Morrison-Woodbury kernel 2 +// Sherman-Morrison-Woodbury kernel WB2s // WB2, SM2 mixing scheme -void SMWB2(double *Slater_inv, const unsigned int Dim, +void WB2s(double *Slater_inv, const unsigned int Dim, const unsigned int N_updates, double *Updates, - unsigned int *Updates_index) { + unsigned int *Updates_index, const double breakdown) { #ifdef DEBUG2 std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl; @@ -95,7 +21,6 @@ void SMWB2(double *Slater_inv, const unsigned int Dim, unsigned int n_of_2blocks = N_updates / 2; unsigned int remainder = N_updates % 2; unsigned int length_2block = 2 * Dim; - unsigned int length_1block = 1 * Dim; // Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with // Woodbury 2x2 kernel @@ -107,10 +32,10 @@ void SMWB2(double *Slater_inv, const unsigned int Dim, double *Updates_2block = &Updates[i * length_2block]; unsigned int *Updates_index_2block = &Updates_index[i * 2]; bool ok; - ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); + ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block, breakdown); if (!ok) { // Send the entire block to SM2 #ifdef DEBUG1 - std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2star" + std::cerr << "Woodbury 2x2 block failed! Sending to SM w/ US" << std::endl; #endif #ifdef DEBUG2 @@ -119,33 +44,169 @@ void SMWB2(double *Slater_inv, const unsigned int Dim, #endif unsigned int l = 0; SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block, - later_updates + (Dim * later), later_index + later, &l); + later_updates + (Dim * later), later_index + later, &l, breakdown); later = later + l; } } } - if (remainder == 1) { // Apply last remaining update with SM2 + if (remainder != 0) { // Apply last remaining update with SM2 double *Updates_1block = &Updates[n_of_2blocks * length_2block]; unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks]; unsigned int l = 0; - SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block, - later_updates + (Dim * later), later_index + later, &l); + SM2star(Slater_inv, Dim, remainder, Updates_1block, Updates_index_1block, + later_updates + (Dim * later), later_index + later, &l, breakdown); later = later + l; } if (later > 0) { - SM2(Slater_inv, Dim, later, later_updates, later_index); + SM2(Slater_inv, Dim, later, later_updates, later_index, breakdown); } } +// Sherman-Morrison-Woodbury kernel WB3s +// WB3, SM2 mixing scheme +void WB3s(double *Slater_inv, const unsigned int Dim, + const unsigned int N_updates, double *Updates, + unsigned int *Updates_index, const double breakdown) { +#ifdef DEBUG2 + std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates + << " updates" << std::endl; + showMatrix2(Updates_index, 1, N_updates, "Updates_index"); + showMatrix2(Updates, N_updates, Dim, "Updates"); +#endif + + unsigned int n_of_3blocks = N_updates / 3; + unsigned int remainder = N_updates % 3; + unsigned int length_3block = 3 * Dim; + + // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with + // Woodbury 3x3 kernel + double later_updates[Dim * N_updates]; + unsigned int later_index[N_updates]; + unsigned int later = 0; + if (n_of_3blocks > 0) { + for (unsigned int i = 0; i < n_of_3blocks; i++) { + double *Updates_3block = &Updates[i * length_3block]; + unsigned int *Updates_index_3block = &Updates_index[i * 3]; + bool ok; + ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block, breakdown); + if (!ok) { // Send the entire block to SM2 +#ifdef DEBUG1 + std::cerr << "Woodbury 3x3 block failed! Sending to SM w/ US" + << std::endl; +#endif +#ifdef DEBUG2 + showMatrix2(Updates_3block, 3, Dim, "Updates_3block"); + showMatrix2(Updates_index_3block, 1, 3, "Updates_index_3block"); +#endif + unsigned int l = 0; + SM2star(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block, + later_updates + (Dim * later), later_index + later, &l, breakdown); + later = later + l; + } + } + } + + if (remainder != 0) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel + double *Updates_2block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; + unsigned int l = 0; + SM2star(Slater_inv, Dim, remainder, Updates_2block, Updates_index_2block, + later_updates + (Dim * later), later_index + later, &l, breakdown); + later = later + l; + } + + if (later > 0) { + SM2(Slater_inv, Dim, later, later_updates, later_index, breakdown); + } +} + +// Sherman-Morrison-Woodbury kernel WB32s +// WB3, WB2, SM2 mixing scheme +void WB32s(double *Slater_inv, const unsigned int Dim, + const unsigned int N_updates, double *Updates, + unsigned int *Updates_index, const double breakdown) { +#ifdef DEBUG2 + std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates + << " updates" << std::endl; + showMatrix2(Updates_index, 1, N_updates, "Updates_index"); + showMatrix2(Updates, N_updates, Dim, "Updates"); +#endif + + unsigned int n_of_3blocks = N_updates / 3; + unsigned int remainder = N_updates % 3; + unsigned int length_3block = 3 * Dim; + + // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with + // Woodbury 3x3 kernel + double later_updates[Dim * N_updates]; + unsigned int later_index[N_updates]; + unsigned int later = 0; + if (n_of_3blocks > 0) { + for (unsigned int i = 0; i < n_of_3blocks; i++) { + double *Updates_3block = &Updates[i * length_3block]; + unsigned int *Updates_index_3block = &Updates_index[i * 3]; + bool ok; + ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block, breakdown); + if (!ok) { // Send the entire block to SM2 +#ifdef DEBUG1 + std::cerr << "Woodbury 3x3 block failed! Sending to SM w/ US" + << std::endl; +#endif +#ifdef DEBUG2 + showMatrix2(Updates_3block, 3, Dim, "Updates_3block"); + showMatrix2(Updates_index_3block, 1, 3, "Updates_index_3block"); +#endif + unsigned int l = 0; + SM2star(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block, + later_updates + (Dim * later), later_index + later, &l, breakdown); + later = later + l; + } + } + } + + if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel + double *Updates_2block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; + bool ok; + ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block, breakdown); + if (!ok) { // Send the entire block to SM2 +#ifdef DEBUG1 + std::cerr << "Woodbury 2x2 block failed! Sending to SM w/ US" + << std::endl; +#endif + unsigned int l = 0; + SM2star(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block, + later_updates + (Dim * later), later_index + later, &l, breakdown); + later = later + l; + } + } else if (remainder == 1) { // Apply last remaining update with SM2 + double *Updates_1block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks]; + unsigned int l = 0; + SM2star(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block, + later_updates + (Dim * later), later_index + later, &l, breakdown); + later = later + l; + } + + if (later > 0) { + SM2(Slater_inv, Dim, later, later_updates, later_index, breakdown); + } +} + + extern "C" { -void SMWB1_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, - double **linUpdates, unsigned int **Updates_index) { - SMWB1(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); +void WB2s_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + WB2s(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); } -void SMWB2_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, - double **linUpdates, unsigned int **Updates_index) { - SMWB2(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); +void WB3s_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + WB3s(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); +} +void WB32s_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + WB32s(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); } } diff --git a/src/SM_Standard.cpp b/src/SM_Standard.cpp index 8545e9a..3b0a027 100644 --- a/src/SM_Standard.cpp +++ b/src/SM_Standard.cpp @@ -6,8 +6,12 @@ // #define DEBUG1 // Naïve Sherman Morrison -void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index) { +void SM1(double *Slater_inv, + unsigned int Dim, + unsigned int N_updates, + double *Updates, + unsigned int *Updates_index, + const double breakdown) { #ifdef DEBUG1 std::cerr << "Called SM1 with " << N_updates << " updates" << std::endl; #endif @@ -28,7 +32,7 @@ void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // Denominator double den = 1 + C[Updates_index[l] - 1]; - if (std::fabs(den) < threshold()) { + if (std::fabs(den) < breakdown) { #ifdef DEBUG1 std::cerr << "Breakdown condition triggered at " << Updates_index[l] << std::endl; @@ -51,12 +55,13 @@ void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, l += 1; } + } // Sherman Morrison, with J. Slagel splitting // http://hdl.handle.net/10919/52966 void SM2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index) { + double *Updates, unsigned int *Updates_index, const double breakdown) { #ifdef DEBUG1 std::cerr << "Called SM2 with " << N_updates << " updates" << std::endl; #endif @@ -66,10 +71,10 @@ void SM2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, unsigned int later = 0; SM2star(Slater_inv, Dim, N_updates, Updates, Updates_index, later_updates, - later_index, &later); + later_index, &later, breakdown); if (later > 0) { - SM2(Slater_inv, Dim, later, later_updates, later_index); + SM2(Slater_inv, Dim, later, later_updates, later_index, breakdown); } } @@ -78,7 +83,7 @@ void SM2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index, double *later_updates, unsigned int *later_index, - unsigned int *later) { + unsigned int *later, const double breakdown) { #ifdef DEBUG1 std::cerr << "Called SM2* with " << N_updates << " updates" << std::endl; #endif @@ -99,7 +104,7 @@ void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // Denominator double den = 1 + C[Updates_index[l] - 1]; - if (std::fabs(den) < threshold()) { + if (std::fabs(den) < breakdown) { #ifdef DEBUG1 std::cerr << "Breakdown condition triggered at " << Updates_index[l] << std::endl; @@ -136,7 +141,7 @@ void SM2star(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // Sherman Morrison, leaving zero denominators for later void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index) { + double *Updates, unsigned int *Updates_index, const double breakdown) { #ifdef DEBUG1 std::cerr << "Called SM3 with " << N_updates << " updates" << std::endl; #endif @@ -161,7 +166,7 @@ void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // Denominator double den = 1 + C[Updates_index[l] - 1]; - if (std::fabs(den) < threshold()) { + if (std::fabs(den) < breakdown) { #ifdef DEBUG1 std::cerr << "Breakdown condition triggered at " << Updates_index[l] << std::endl; @@ -201,7 +206,7 @@ void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, } // If some have failed, make a recursive call else if (later > 0) { - SM3(Slater_inv, Dim, later, later_updates, later_index); + SM3(Slater_inv, Dim, later, later_updates, later_index, breakdown); } } @@ -209,7 +214,7 @@ void SM3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // Leave zero denominators for later (SM3), and when none are left then split // (SM2) void SM4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, - double *Updates, unsigned int *Updates_index) { + double *Updates, unsigned int *Updates_index, const double breakdown) { #ifdef DEBUG1 std::cerr << "Called SM4 with " << N_updates << " updates" << std::endl; #endif @@ -234,7 +239,7 @@ void SM4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // Denominator double den = 1 + C[Updates_index[l] - 1]; - if (std::fabs(den) < threshold()) { + if (std::fabs(den) < breakdown) { #ifdef DEBUG1 std::cerr << "Breakdown condition triggered at " << Updates_index[l] << std::endl; @@ -266,32 +271,32 @@ void SM4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, // If all the updates have failed, fall back on splitting (SM2) if (later == N_updates) { - SM2(Slater_inv, Dim, later, later_updates, later_index); + SM2(Slater_inv, Dim, later, later_updates, later_index, breakdown); } // If some have failed, make a recursive call else if (later > 0) { - SM4(Slater_inv, Dim, later, later_updates, later_index); + SM4(Slater_inv, Dim, later, later_updates, later_index, breakdown); } } extern "C" { void SM1_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, - double **linUpdates, unsigned int **Updates_index) { - SM1(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + SM1(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); } void SM2_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, - double **linUpdates, unsigned int **Updates_index) { - SM2(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + SM2(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); } void SM3_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, - double **linUpdates, unsigned int **Updates_index) { - SM3(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + SM3(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); } void SM4_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, - double **linUpdates, unsigned int **Updates_index) { - SM4(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); + double **linUpdates, unsigned int **Updates_index, const double breakdown) { + SM4(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index, breakdown); } } diff --git a/src/Woodbury.cpp b/src/Woodbury.cpp index 9c97b96..7f878f9 100644 --- a/src/Woodbury.cpp +++ b/src/Woodbury.cpp @@ -15,7 +15,7 @@ // Woodbury 2x2 kernel bool WB2(double *Slater_inv, const unsigned int Dim, double *Updates, - const unsigned int *Updates_index) { + const unsigned int *Updates_index, const double breakdown) { /* C := S^{-1} * U, dim x 2 B := 1 + V * C, 2 x 2 @@ -48,7 +48,7 @@ bool WB2(double *Slater_inv, const unsigned int Dim, double *Updates, // Check if determinant of inverted matrix is not zero double det = B0 * B3 - B1 * B2; - if (std::fabs(det) < threshold()) { + if (std::fabs(det) < breakdown) { #ifdef DEBUG1 std::cerr << "Determinant too close to zero! No inverse found." << std::endl; @@ -86,7 +86,7 @@ bool WB2(double *Slater_inv, const unsigned int Dim, double *Updates, // Woodbury 3x3 kernel bool WB3(double *Slater_inv, const unsigned int Dim, double *Updates, - const unsigned int *Updates_index) { + const unsigned int *Updates_index, const double breakdown) { /* C := S^{-1} * U, dim x 3 B := 1 + V * C, 3 x 3 @@ -139,7 +139,7 @@ bool WB3(double *Slater_inv, const unsigned int Dim, double *Updates, #ifdef DEBUG2 std::cerr << "Determinant of B = " << det << std::endl; #endif - if (std::fabs(det) < threshold()) { + if (std::fabs(det) < breakdown) { #ifdef DEBUG1 std::cerr << "Determinant too close to zero! No inverse found." << std::endl; @@ -197,15 +197,15 @@ bool WB3(double *Slater_inv, const unsigned int Dim, double *Updates, extern "C" { bool WB2_f(double **linSlater_inv, unsigned int *Dim, double **linUpdates, - unsigned int **Updates_index) { + unsigned int **Updates_index, const double breakdown) { bool ok; - ok = WB2(*linSlater_inv, *Dim, *linUpdates, *Updates_index); + ok = WB2(*linSlater_inv, *Dim, *linUpdates, *Updates_index, breakdown); return ok; } bool WB3_f(double **linSlater_inv, unsigned int *Dim, double **linUpdates, - unsigned int **Updates_index) { + unsigned int **Updates_index, const double breakdown) { bool ok; - ok = WB3(*linSlater_inv, *Dim, *linUpdates, *Updates_index); + ok = WB3(*linSlater_inv, *Dim, *linUpdates, *Updates_index, breakdown); return ok; } } diff --git a/tests/qmckl_test_h5.cpp b/tests/qmckl_test_h5.cpp index 49a511c..3ae3914 100644 --- a/tests/qmckl_test_h5.cpp +++ b/tests/qmckl_test_h5.cpp @@ -3,7 +3,6 @@ #include "Helpers.hpp" #include "qmckl.h" - #include #include #include @@ -16,9 +15,9 @@ unsigned int repetition_number; const H5std_string FILE_NAME("dataset.hdf5"); -void read_int(H5::H5File file, std::string key, uint64_t *data) { +void read_int(H5::H5File file, std::string key, unsigned int *data) { H5::DataSet ds = file.openDataSet(key); - ds.read(data, H5::PredType::STD_U64LE); + ds.read(data, H5::PredType::STD_U32LE); ds.close(); } @@ -35,10 +34,12 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown std::string group = "cycle_" + std::to_string(cycle); unsigned int col, i, j; + unsigned int dim_32, nupdates_32; uint64_t dim, nupdates; - read_int(file, group + "/slater_matrix_dim", &dim); - read_int(file, group + "/nupdates", &nupdates); + read_int(file, group + "/slater_matrix_dim", &dim_32); + read_int(file, group + "/nupdates", &nupdates_32); + dim = dim_32; nupdates = nupdates_32; double *slater_matrix = new double[dim * dim]; read_double(file, group + "/slater_matrix", slater_matrix); @@ -46,8 +47,12 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown double *slater_inverse = new double[dim * dim]; read_double(file, group + "/slater_inverse", slater_inverse); + unsigned int *temp = new unsigned int[nupdates]; uint64_t *col_update_index = new uint64_t[nupdates]; - read_int(file, group + "/col_update_index", col_update_index); + read_int(file, group + "/col_update_index", temp); + for (i = 0; i < nupdates; i++) { + col_update_index[i] = temp[i]; + } double *updates = new double[nupdates * dim]; read_double(file, group + "/updates", updates); @@ -74,7 +79,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown std::cout << "# of reps. = " << repetition_number << std::endl; double *slater_inverse_nonpersistent = new double[dim * dim]; - if (version == "qmckl_sm1") { + if (version == "sm1") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -82,7 +87,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown u, col_update_index, breakdown, slater_inverse_nonpersistent); } } - else if (version == "qmckl_wb2") { + else if (version == "wb2") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -90,7 +95,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown u, col_update_index, breakdown, slater_inverse_nonpersistent); } } - else if (version == "qmckl_wb3") { + else if (version == "wb3") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -98,7 +103,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown u, col_update_index, breakdown, slater_inverse_nonpersistent); } } - else if (version == "qmckl_sm2") { + else if (version == "sm2") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -106,7 +111,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown u, col_update_index, breakdown, slater_inverse_nonpersistent); } } - else if (version == "qmckl_wb2s") { + else if (version == "wb2s") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -114,7 +119,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown u, col_update_index, breakdown, slater_inverse_nonpersistent); } } - else if (version == "qmckl_wb3s") { + else if (version == "wb3s") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -122,7 +127,7 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown u, col_update_index, breakdown, slater_inverse_nonpersistent); } } - else if (version == "qmckl_wb32s") { + else if (version == "wb32s") { for (unsigned int i = 0; i < repetition_number; i++) { memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); @@ -138,21 +143,21 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown dim * dim * sizeof(double)); delete[] slater_inverse_nonpersistent; #else // No performance measurements repetition - if (version == "qmckl_sm1") { + if (version == "sm1") { qmckl_context context; context = qmckl_context_create(); qmckl_exit_code rc; rc = qmckl_sherman_morrison_c(context, dim, nupdates, u, col_update_index, breakdown, slater_inverse); } - else if (version == "qmckl_wb2") { + else if (version == "wb2") { qmckl_context context; context = qmckl_context_create(); qmckl_exit_code rc; rc = qmckl_woodbury_2_c(context, dim, u, col_update_index, breakdown, slater_inverse); } - else if (version == "qmckl_wb3") { + else if (version == "wb3") { qmckl_context context; context = qmckl_context_create(); qmckl_exit_code rc; @@ -166,8 +171,68 @@ int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown #endif // PERF delete[] u, col_update_index; + showMatrix(slater_matrix, dim, "Slater Matrix"); + showMatrix(slater_inverse, dim, "Slater Inverse"); double *res = new double[dim * dim]{0}; - matMul(slater_matrix, slater_inverse, res, dim); + { + for (unsigned int i = 0; i < dim; i++) { + for (unsigned int j = 0; j < dim; j++) { + for (unsigned int k = 0; k < dim; k++) { + res[i * dim + j] += slater_matrix[i * dim + k] * slater_inverse[k * dim + j]; + } + } + } + } + + //matMul2(slater_matrix, slater_inverse, res, dim_32, dim_32, dim_32); + // + // + for (unsigned int i = 0; i < dim; i++) { + printf("["); + for (unsigned int j = 0; j < dim; j++) { + if (slater_matrix[i * dim + j] >= 0) { + printf(" %17.10e,", slater_matrix[i * dim + j]); + } else { + printf(" %17.10e,", slater_matrix[i * dim + j]); + } + } + printf(" ],\n"); + } + printf("\n\n"); + // + // + // + // + for (unsigned int i = 0; i < dim; i++) { + printf("["); + for (unsigned int j = 0; j < dim; j++) { + if (slater_inverse[i * dim + j] >= 0) { + printf(" %17.10e,", slater_inverse[i * dim + j]); + } else { + printf(" %17.10e,", slater_inverse[i * dim + j]); + } + } + printf(" ],\n"); + } + printf("\n\n"); + // + // + // + // + for (unsigned int i = 0; i < dim; i++) { + printf("["); + for (unsigned int j = 0; j < dim; j++) { + if (res[i * dim + j] >= 0) { + printf(" %17.10e,", res[i * dim + j]); + } else { + printf(" %17.10e,", res[i * dim + j]); + } + } + printf(" ],\n"); + } + printf("\n\n"); + // + // bool ok = is_identity(res, dim, tolerance); double res_max = residual_max(res, dim); double res2 = residual_frobenius2(res, dim); diff --git a/tests/test_h5.cpp b/tests/test_h5.cpp index d0a151f..0774f2c 100644 --- a/tests/test_h5.cpp +++ b/tests/test_h5.cpp @@ -13,31 +13,29 @@ unsigned int repetition_number; #endif -using namespace H5; - -// #define DEBUG - const H5std_string FILE_NAME("dataset.hdf5"); -void read_int(H5File file, std::string key, unsigned int *data) { - DataSet ds = file.openDataSet(key); - ds.read(data, PredType::STD_U32LE); +void read_int(H5::H5File file, std::string key, unsigned int *data) { + H5::DataSet ds = file.openDataSet(key); + ds.read(data, H5::PredType::STD_U32LE); ds.close(); } -void read_double(H5File file, std::string key, double *data) { - DataSet ds = file.openDataSet(key); - ds.read(data, PredType::IEEE_F64LE); +void read_double(H5::H5File file, std::string key, double *data) { + H5::DataSet ds = file.openDataSet(key); + ds.read(data, H5::PredType::IEEE_F64LE); ds.close(); } -int test_cycle(H5File file, int cycle, std::string version, double tolerance) { +int test_cycle(H5::H5File file, int cycle, std::string version, double breakdown, double tolerance) { /* Read the data */ std::string group = "cycle_" + std::to_string(cycle); unsigned int dim, nupdates, col, i, j; + + read_int(file, group + "/slater_matrix_dim", &dim); read_int(file, group + "/nupdates", &nupdates); @@ -56,9 +54,6 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { double *u = new double[nupdates * dim]; /* Test */ -#ifdef DEBUG2 - showMatrix(slater_inverse, dim, "OLD Inverse"); -#endif // Transform replacement updates in 'updates[]' into additive updates in 'u[]' for (j = 0; j < nupdates; j++) { @@ -69,54 +64,79 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { slater_matrix[i * dim + (col - 1)] = updates[i + j * dim]; } } - -#ifdef DEBUG2 - showMatrix(slater_matrix, dim, "OLD Slater"); -#endif - -#ifdef DEBUG2 - showMatrix(u, dim, "Updates"); -#endif + delete[] updates; #ifdef PERF std::cout << "# of reps. = " << repetition_number << std::endl; double *slater_inverse_nonpersistent = new double[dim * dim]; - for (unsigned int i = 0; i < repetition_number; i++) { - std::memcpy(slater_inverse_nonpersistent, slater_inverse, - dim * dim * sizeof(double)); - if (version == "maponia3") { - MaponiA3(slater_inverse_nonpersistent, dim, nupdates, u, - col_update_index); - } else if (version == "maponia3s") { - MaponiA3S(slater_inverse_nonpersistent, dim, nupdates, u, - col_update_index); - } else if (version == "sm1") { - SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); - } else if (version == "sm2") { - SM2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); - } else if (version == "sm3") { - SM3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); - } else if (version == "sm4") { - SM4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); - } else if (version == "wb2") { - WB2(slater_inverse_nonpersistent, dim, u, col_update_index); - } else if (version == "wb3") { - WB3(slater_inverse_nonpersistent, dim, u, col_update_index); - } else if (version == "smwb1") { - SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); - } else if (version == "smwb2") { - SMWB2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); -#ifdef MKL - } else if (version == "lapack") { - memcpy(slater_inverse_nonpersistent, slater_matrix, - dim * dim * sizeof(double)); - inverse(slater_inverse_nonpersistent, dim); -#endif // MKL - } else { - std::cerr << "Unknown version " << version << std::endl; - exit(1); + + if (version == "sm1") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + SM1(slater_inverse_nonpersistent, dim, nupdates, + u, col_update_index, breakdown); } } + else if (version == "wb2") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + WB2(slater_inverse_nonpersistent, dim, + u, col_update_index, breakdown); + } + } + else if (version == "wb3") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + WB3(slater_inverse_nonpersistent, dim, + u, col_update_index, breakdown); + } + } + else if (version == "sm2") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + SM2(slater_inverse_nonpersistent, dim, nupdates, + u, col_update_index, breakdown); + } + } + else if (version == "wb2s") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + WB2s(slater_inverse_nonpersistent, dim, nupdates, + u, col_update_index, breakdown); + } + } + else if (version == "wb3s") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + WB3s(slater_inverse_nonpersistent, dim, nupdates, + u, col_update_index, breakdown); + } + } + else if (version == "wb32s") { + for (unsigned int i = 0; i < repetition_number; i++) { + memcpy(slater_inverse_nonpersistent, slater_inverse, + dim * dim * sizeof(double)); + WB32s(slater_inverse_nonpersistent, dim, nupdates, + u, col_update_index, breakdown); + } + } +#ifdef MKL + else if (version == "lapack") { + memcpy(slater_inverse_nonpersistent, slater_matrix, + dim * dim * sizeof(double)); + inverse(slater_inverse_nonpersistent, dim); + } +#endif // MKL + else { + std::cerr << "Unknown version " << version << std::endl; + exit(1); + } std::memcpy(slater_inverse, slater_inverse_nonpersistent, dim * dim * sizeof(double)); delete[] slater_inverse_nonpersistent; @@ -137,10 +157,12 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { WB2(slater_inverse, dim, u, col_update_index); } else if (version == "wb3") { WB3(slater_inverse, dim, u, col_update_index); - } else if (version == "smwb1") { - SMWB1(slater_inverse, dim, nupdates, u, col_update_index); - } else if (version == "smwb4") { - SMWB4(slater_inverse, dim, nupdates, u, col_update_index); + } else if (version == "wb2s") { + WB2s(slater_inverse, dim, nupdates, u, col_update_index); + } else if (version == "wb3s") { + WB3s(slater_inverse, dim, nupdates, u, col_update_index); + } else if (version == "wb32s") { + WB32s(slater_inverse, dim, nupdates, u, col_update_index); #ifdef MKL } else if (version == "lapack") { memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double)); @@ -151,17 +173,70 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { exit(1); } #endif // PERF + delete[] u, col_update_index; -#ifdef DEBUG2 - showMatrix(slater_matrix, dim, "NEW Slater"); -#endif - -#ifdef DEBUG2 - showMatrix(slater_inverse, dim, "NEW Inverse"); -#endif - + showMatrix(slater_matrix, dim, "Slater Matrix"); + showMatrix(slater_inverse, dim, "Slater Inverse"); double *res = new double[dim * dim]{0}; - matMul(slater_matrix, slater_inverse, res, dim); + { + for (unsigned int i = 0; i < dim; i++) { + for (unsigned int j = 0; j < dim; j++) { + for (unsigned int k = 0; k < dim; k++) { + res[i * dim + j] += slater_matrix[i * dim + k] * slater_inverse[k * dim + j]; + } + } + } + } + + //matMul2(slater_matrix, slater_inverse, res, dim, dim, dim); + // + // + for (unsigned int i = 0; i < dim; i++) { + printf("["); + for (unsigned int j = 0; j < dim; j++) { + if (slater_matrix[i * dim + j] >= 0) { + printf(" %17.10e,", slater_matrix[i * dim + j]); + } else { + printf(" %17.10e,", slater_matrix[i * dim + j]); + } + } + printf(" ],\n"); + } + printf("\n\n"); + // + // + // + // + for (unsigned int i = 0; i < dim; i++) { + printf("["); + for (unsigned int j = 0; j < dim; j++) { + if (slater_inverse[i * dim + j] >= 0) { + printf(" %17.10e,", slater_inverse[i * dim + j]); + } else { + printf(" %17.10e,", slater_inverse[i * dim + j]); + } + } + printf(" ],\n"); + } + printf("\n\n"); + // + // + // + // + for (unsigned int i = 0; i < dim; i++) { + printf("["); + for (unsigned int j = 0; j < dim; j++) { + if (res[i * dim + j] >= 0) { + printf(" %17.10e,", res[i * dim + j]); + } else { + printf(" %17.10e,", res[i * dim + j]); + } + } + printf(" ],\n"); + } + printf("\n\n"); + // + // bool ok = is_identity(res, dim, tolerance); double res_max = residual_max(res, dim); double res2 = residual_frobenius2(res, dim); @@ -173,25 +248,25 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { showMatrix(res, dim, "Result"); #endif - delete[] res, updates, u, col_update_index, slater_matrix, slater_inverse; + delete[] res, slater_matrix, slater_inverse; return ok; } int main(int argc, char **argv) { #ifdef PERF - if (argc != 6) { + if (argc != 7) { std::cerr << "Execute from within 'datasets/'" << std::endl; std::cerr - << "usage: test_h5 " + << "usage: test_h5 " << std::endl; return 1; } #else - if (argc != 5) { + if (argc != 6) { std::cerr << "Execute from within 'datasets/'" << std::endl; std::cerr - << "usage: test_h5 " + << "usage: test_h5 " << std::endl; return 1; } @@ -200,16 +275,17 @@ int main(int argc, char **argv) { std::string version(argv[1]); int start_cycle = std::stoi(argv[2]); int stop_cycle = std::stoi(argv[3]); - double tolerance = std::stod(argv[4]); - H5File file(FILE_NAME, H5F_ACC_RDONLY); + double breakdown = std::stod(argv[4]); + double tolerance = std::stod(argv[5]); + H5::H5File file(FILE_NAME, H5F_ACC_RDONLY); #ifdef PERF - repetition_number = std::stoi(argv[5]); + repetition_number = std::stoi(argv[6]); #endif bool ok; for (int cycle = start_cycle; cycle < stop_cycle + 1; cycle++) { - ok = test_cycle(file, cycle, version, tolerance); + ok = test_cycle(file, cycle, version, breakdown, tolerance); if (ok) { std::cerr << "ok -- cycle " << std::to_string(cycle) << std::endl; } else {