diff --git a/Makefile b/Makefile index 5579147..e4a1eb8 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ ifeq ($(ENV),INTEL) FC = ifort ARCH = -xCORE-AVX2 OPT = -O2 - DEBUG = -debug full + DEBUG = -g -debug full else ifeq ($(ENV),LLVM) CXX = clang++ FC = flang diff --git a/src/SMWB.cpp b/src/SMWB.cpp index c353259..b354a2d 100644 --- a/src/SMWB.cpp +++ b/src/SMWB.cpp @@ -21,14 +21,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel if (n_of_3blocks > 0) { for (unsigned int i = 0; i < n_of_3blocks; i++) { - double Updates_3block[length_3block]; - unsigned int Updates_index_3block[3]; - Updates_index_3block[0] = Updates_index[3 * i + 0]; - Updates_index_3block[1] = Updates_index[3 * i + 1]; - Updates_index_3block[2] = Updates_index[3 * i + 2]; - for (unsigned int j = 0; j < length_3block; j++) { - Updates_3block[j] = Updates[i * length_3block + j]; - } + double *Updates_3block = &Updates[i * length_3block]; + unsigned int *Updates_index_3block = &Updates_index[i * 3]; bool ok; ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); if (!ok) { // Send the entire block to SM2 @@ -41,16 +35,10 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double } } } -/// Convert asrray copies to pointers if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel - double Updates_2block[length_2block]; - unsigned int Updates_index_2block[2]; - Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0]; - Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1]; - for (unsigned int i = 0; i < length_2block; i++) { - Updates_2block[i] = Updates[n_of_3blocks * length_3block + i]; - } + double *Updates_2block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; bool ok; ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); if (!ok) { // Send the entire block to SM2 @@ -60,12 +48,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double SM2(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block); } } else if (remainder == 1) { // Apply last remaining update with SM2 - double Updates_1block[length_1block]; - unsigned int Updates_index_1block[1]; - Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0]; - for (unsigned int i = 0; i < length_1block; i++) { - Updates_1block[i] = Updates[n_of_3blocks * length_3block + i]; - } + double *Updates_1block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks]; SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block); } else { // remainder == 0 // Nothing left to do. @@ -93,14 +77,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel if (n_of_3blocks > 0) { for (unsigned int i = 0; i < n_of_3blocks; i++) { - double Updates_3block[length_3block]; - unsigned int Updates_index_3block[3]; - Updates_index_3block[0] = Updates_index[3 * i + 0]; - Updates_index_3block[1] = Updates_index[3 * i + 1]; - Updates_index_3block[2] = Updates_index[3 * i + 2]; - for (unsigned int j = 0; j < length_3block; j++) { - Updates_3block[j] = Updates[i * length_3block + j]; - } + double *Updates_3block = &Updates[i * length_3block]; + unsigned int *Updates_index_3block = &Updates_index[i * 3]; bool ok; ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); if (!ok) { // Send the entire block to SM3 @@ -115,13 +93,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double } if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel - double Updates_2block[length_2block]; - unsigned int Updates_index_2block[2]; - Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0]; - Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1]; - for (unsigned int i = 0; i < length_2block; i++) { - Updates_2block[i] = Updates[n_of_3blocks * length_3block + i]; - } + double *Updates_2block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; bool ok; ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); if (!ok) { // Send the entire block to SM3 @@ -129,12 +102,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double SM3(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block); } } else if (remainder == 1) { // Apply last remaining update with SM3 - double Updates_1block[length_1block]; - unsigned int Updates_index_1block[1]; - Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0]; - for (unsigned int i = 0; i < length_1block; i++) { - Updates_1block[i] = Updates[n_of_3blocks * length_3block + i]; - } + double *Updates_1block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks]; SM3(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block); } else { // remainder == 0 // Nothing left to do. @@ -160,14 +129,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel if (n_of_3blocks > 0) { for (unsigned int i = 0; i < n_of_3blocks; i++) { - double Updates_3block[length_3block]; - unsigned int Updates_index_3block[3]; - Updates_index_3block[0] = Updates_index[3 * i + 0]; - Updates_index_3block[1] = Updates_index[3 * i + 1]; - Updates_index_3block[2] = Updates_index[3 * i + 2]; - for (unsigned int j = 0; j < length_3block; j++) { - Updates_3block[j] = Updates[i * length_3block + j]; - } + double *Updates_3block = &Updates[i * length_3block]; + unsigned int *Updates_index_3block = &Updates_index[i * 3]; bool ok; ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); if (!ok) { // Send the entire block to SM4 @@ -182,13 +145,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double } if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel - double Updates_2block[length_2block]; - unsigned int Updates_index_2block[2]; - Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0]; - Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1]; - for (unsigned int i = 0; i < length_2block; i++) { - Updates_2block[i] = Updates[n_of_3blocks * length_3block + i]; - } + double *Updates_2block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks]; bool ok; ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); if (!ok) { // Send the entire block to SM4 @@ -196,12 +154,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double SM4(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block); } } else if (remainder == 1) { // Apply last remaining update with SM4 - double Updates_1block[length_1block]; - unsigned int Updates_index_1block[1]; - Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0]; - for (unsigned int i = 0; i < length_1block; i++) { - Updates_1block[i] = Updates[n_of_3blocks * length_3block + i]; - } + double *Updates_1block = &Updates[n_of_3blocks * length_3block]; + unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks]; SM4(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block); } else { // remainder == 0 // Nothing left to do. @@ -222,16 +176,11 @@ void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double unsigned int length_2block = 2 * Dim; unsigned int length_1block = 1 * Dim; - // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel + // Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with Woodbury 2x2 kernel if (n_of_2blocks > 0) { for (unsigned int i = 0; i < n_of_2blocks; i++) { - double Updates_2block[length_2block]; - unsigned int Updates_index_2block[2]; - Updates_index_2block[0] = Updates_index[2 * i + 0]; - Updates_index_2block[1] = Updates_index[2 * i + 1]; - for (unsigned int j = 0; j < length_2block; j++) { - Updates_2block[j] = Updates[i * length_2block + j]; - } + double *Updates_2block = &Updates[i * length_2block]; + unsigned int *Updates_index_2block = &Updates_index[i * 2]; bool ok; ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); if (!ok) { // Send the entire block to SM2 @@ -246,12 +195,8 @@ void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double } if (remainder == 1) { // Apply last remaining update with SM4 - double Updates_1block[length_1block]; - unsigned int Updates_index_1block[1]; - Updates_index_1block[0] = Updates_index[2 * n_of_2blocks + 0]; - for (unsigned int i = 0; i < length_1block; i++) { - Updates_1block[i] = Updates[n_of_2blocks * length_2block + i]; - } + double *Updates_1block = &Updates[n_of_2blocks * length_2block]; + unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks]; SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block); } else { // remainder == 0 // Nothing left to do. diff --git a/tests/test_h5.cpp b/tests/test_h5.cpp index f3509c6..55cd6fd 100644 --- a/tests/test_h5.cpp +++ b/tests/test_h5.cpp @@ -4,8 +4,15 @@ #include "Helpers.hpp" #include "SM_Maponi.hpp" #include "SM_Standard.hpp" +#include "Woodbury.hpp" #include "SMWB.hpp" +// #define PERF + +#ifdef PERF +unsigned int repetition_number; +#endif + using namespace H5; // #define DEBUG @@ -71,6 +78,48 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { showMatrix(u, dim, "Updates"); #endif +#ifdef PERF + std::cout << "# of reps. = " << repetition_number << std::endl; + double *slater_inverse_nonpersistent = new double[dim * dim]; + for (unsigned int i = 0; i < repetition_number; i++) { + std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double)); + if (version == "maponia3") { + MaponiA3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "maponia3s") { + MaponiA3S(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "sm1") { + SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "sm2") { + SM2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "sm3") { + SM3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "sm4") { + SM4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "wb2") { + WB2(slater_inverse_nonpersistent, dim, u, col_update_index); + } else if (version == "wb3") { + WB3(slater_inverse_nonpersistent, dim, u, col_update_index); + } else if (version == "smwb1") { + SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "smwb2") { + SMWB2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "smwb3") { + SMWB3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); + } else if (version == "smwb4") { + SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index); +#ifdef MKL + } else if (version == "lapack") { + memcpy(slater_inverse_nonpersistent, slater_matrix, dim * dim * sizeof(double)); + inverse(slater_inverse_nonpersistent, dim); +#endif // MKL + } else { + std::cerr << "Unknown version " << version << std::endl; + exit(1); + } + } + std::memcpy(slater_inverse, slater_inverse_nonpersistent, dim * dim * sizeof(double)); + delete[] slater_inverse_nonpersistent; +#else if (version == "maponia3") { MaponiA3(slater_inverse, dim, nupdates, u, col_update_index); } else if (version == "maponia3s") { @@ -83,6 +132,10 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { SM3(slater_inverse, dim, nupdates, u, col_update_index); } else if (version == "sm4") { SM4(slater_inverse, dim, nupdates, u, col_update_index); + } else if (version == "wb2") { + WB2(slater_inverse, dim, u, col_update_index); + } else if (version == "wb3") { + WB3(slater_inverse, dim, u, col_update_index); } else if (version == "smwb1") { SMWB1(slater_inverse, dim, nupdates, u, col_update_index); } else if (version == "smwb2") { @@ -95,11 +148,12 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { } else if (version == "lapack") { memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double)); inverse(slater_inverse, dim); -#endif +#endif // MKL } else { std::cerr << "Unknown version " << version << std::endl; exit(1); } +#endif // PERF #ifdef DEBUG2 showMatrix(slater_matrix, dim, "NEW Slater"); @@ -112,22 +166,9 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { double *res = new double[dim * dim]{0}; matMul(slater_matrix, slater_inverse, res, dim); bool ok = is_identity(res, dim, tolerance); - double res_max = residual_max(res, dim); double res2 = residual_frobenius2(res, dim); - // double det; - // double **tmp = new double *[dim]; - // for (int i = 0; i < dim; i++) { - // tmp[i] = new double[dim]; - // for (int j = 0; j < dim; j++) { - // tmp[i][j] = res[i * dim + j]; - // } - // } - // det = determinant(tmp, dim); - // delete[] tmp; - // std::cout << "Residual = " << version << " " << cycle << " " << res_max << - // " " - // << res2 << " " << det << std::endl; + std::cout << "Residual = " << version << " " << cycle << " " << res_max << " " << res2 << std::endl; @@ -141,7 +182,11 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { } int main(int argc, char **argv) { +#ifdef PERF + if (argc != 6) { +#else if (argc != 5) { +#endif std::cerr << "Execute from within 'datasets/'" << std::endl; std::cerr << "usage: test_h5 " @@ -154,6 +199,10 @@ int main(int argc, char **argv) { double tolerance = std::stod(argv[4]); H5File file(FILE_NAME, H5F_ACC_RDONLY); +#ifdef PERF + repetition_number = std::stoi(argv[5]); +#endif + bool ok; for (int cycle = start_cycle; cycle < stop_cycle + 1; cycle++) { ok = test_cycle(file, cycle, version, tolerance);