mirror of
https://github.com/TREX-CoE/Sherman-Morrison.git
synced 2025-01-12 22:18:36 +01:00
- Replaced the copy-arrays in the combined SMWB kernels with pointers
- Added loop in test_cycle() to repeat a single update cycle n-times for performance testing.
This commit is contained in:
parent
a4397f1496
commit
7b9b176545
2
Makefile
2
Makefile
@ -4,7 +4,7 @@ ifeq ($(ENV),INTEL)
|
||||
FC = ifort
|
||||
ARCH = -xCORE-AVX2
|
||||
OPT = -O2
|
||||
DEBUG = -debug full
|
||||
DEBUG = -g -debug full
|
||||
else ifeq ($(ENV),LLVM)
|
||||
CXX = clang++
|
||||
FC = flang
|
||||
|
101
src/SMWB.cpp
101
src/SMWB.cpp
@ -21,14 +21,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
|
||||
if (n_of_3blocks > 0) {
|
||||
for (unsigned int i = 0; i < n_of_3blocks; i++) {
|
||||
double Updates_3block[length_3block];
|
||||
unsigned int Updates_index_3block[3];
|
||||
Updates_index_3block[0] = Updates_index[3 * i + 0];
|
||||
Updates_index_3block[1] = Updates_index[3 * i + 1];
|
||||
Updates_index_3block[2] = Updates_index[3 * i + 2];
|
||||
for (unsigned int j = 0; j < length_3block; j++) {
|
||||
Updates_3block[j] = Updates[i * length_3block + j];
|
||||
}
|
||||
double *Updates_3block = &Updates[i * length_3block];
|
||||
unsigned int *Updates_index_3block = &Updates_index[i * 3];
|
||||
bool ok;
|
||||
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
|
||||
if (!ok) { // Send the entire block to SM2
|
||||
@ -41,16 +35,10 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Convert asrray copies to pointers
|
||||
|
||||
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
||||
double Updates_2block[length_2block];
|
||||
unsigned int Updates_index_2block[2];
|
||||
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
|
||||
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
|
||||
for (unsigned int i = 0; i < length_2block; i++) {
|
||||
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
|
||||
}
|
||||
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||
bool ok;
|
||||
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
|
||||
if (!ok) { // Send the entire block to SM2
|
||||
@ -60,12 +48,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
SM2(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
|
||||
}
|
||||
} else if (remainder == 1) { // Apply last remaining update with SM2
|
||||
double Updates_1block[length_1block];
|
||||
unsigned int Updates_index_1block[1];
|
||||
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
|
||||
for (unsigned int i = 0; i < length_1block; i++) {
|
||||
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
|
||||
}
|
||||
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
|
||||
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
||||
SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
|
||||
} else { // remainder == 0
|
||||
// Nothing left to do.
|
||||
@ -93,14 +77,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
|
||||
if (n_of_3blocks > 0) {
|
||||
for (unsigned int i = 0; i < n_of_3blocks; i++) {
|
||||
double Updates_3block[length_3block];
|
||||
unsigned int Updates_index_3block[3];
|
||||
Updates_index_3block[0] = Updates_index[3 * i + 0];
|
||||
Updates_index_3block[1] = Updates_index[3 * i + 1];
|
||||
Updates_index_3block[2] = Updates_index[3 * i + 2];
|
||||
for (unsigned int j = 0; j < length_3block; j++) {
|
||||
Updates_3block[j] = Updates[i * length_3block + j];
|
||||
}
|
||||
double *Updates_3block = &Updates[i * length_3block];
|
||||
unsigned int *Updates_index_3block = &Updates_index[i * 3];
|
||||
bool ok;
|
||||
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
|
||||
if (!ok) { // Send the entire block to SM3
|
||||
@ -115,13 +93,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
}
|
||||
|
||||
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
||||
double Updates_2block[length_2block];
|
||||
unsigned int Updates_index_2block[2];
|
||||
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
|
||||
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
|
||||
for (unsigned int i = 0; i < length_2block; i++) {
|
||||
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
|
||||
}
|
||||
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||
bool ok;
|
||||
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
|
||||
if (!ok) { // Send the entire block to SM3
|
||||
@ -129,12 +102,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
SM3(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
|
||||
}
|
||||
} else if (remainder == 1) { // Apply last remaining update with SM3
|
||||
double Updates_1block[length_1block];
|
||||
unsigned int Updates_index_1block[1];
|
||||
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
|
||||
for (unsigned int i = 0; i < length_1block; i++) {
|
||||
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
|
||||
}
|
||||
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
|
||||
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
||||
SM3(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
|
||||
} else { // remainder == 0
|
||||
// Nothing left to do.
|
||||
@ -160,14 +129,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
|
||||
if (n_of_3blocks > 0) {
|
||||
for (unsigned int i = 0; i < n_of_3blocks; i++) {
|
||||
double Updates_3block[length_3block];
|
||||
unsigned int Updates_index_3block[3];
|
||||
Updates_index_3block[0] = Updates_index[3 * i + 0];
|
||||
Updates_index_3block[1] = Updates_index[3 * i + 1];
|
||||
Updates_index_3block[2] = Updates_index[3 * i + 2];
|
||||
for (unsigned int j = 0; j < length_3block; j++) {
|
||||
Updates_3block[j] = Updates[i * length_3block + j];
|
||||
}
|
||||
double *Updates_3block = &Updates[i * length_3block];
|
||||
unsigned int *Updates_index_3block = &Updates_index[i * 3];
|
||||
bool ok;
|
||||
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
|
||||
if (!ok) { // Send the entire block to SM4
|
||||
@ -182,13 +145,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
}
|
||||
|
||||
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
||||
double Updates_2block[length_2block];
|
||||
unsigned int Updates_index_2block[2];
|
||||
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
|
||||
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
|
||||
for (unsigned int i = 0; i < length_2block; i++) {
|
||||
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
|
||||
}
|
||||
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||
bool ok;
|
||||
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
|
||||
if (!ok) { // Send the entire block to SM4
|
||||
@ -196,12 +154,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
SM4(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
|
||||
}
|
||||
} else if (remainder == 1) { // Apply last remaining update with SM4
|
||||
double Updates_1block[length_1block];
|
||||
unsigned int Updates_index_1block[1];
|
||||
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
|
||||
for (unsigned int i = 0; i < length_1block; i++) {
|
||||
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
|
||||
}
|
||||
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
|
||||
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
||||
SM4(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
|
||||
} else { // remainder == 0
|
||||
// Nothing left to do.
|
||||
@ -222,16 +176,11 @@ void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
unsigned int length_2block = 2 * Dim;
|
||||
unsigned int length_1block = 1 * Dim;
|
||||
|
||||
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
|
||||
// Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with Woodbury 2x2 kernel
|
||||
if (n_of_2blocks > 0) {
|
||||
for (unsigned int i = 0; i < n_of_2blocks; i++) {
|
||||
double Updates_2block[length_2block];
|
||||
unsigned int Updates_index_2block[2];
|
||||
Updates_index_2block[0] = Updates_index[2 * i + 0];
|
||||
Updates_index_2block[1] = Updates_index[2 * i + 1];
|
||||
for (unsigned int j = 0; j < length_2block; j++) {
|
||||
Updates_2block[j] = Updates[i * length_2block + j];
|
||||
}
|
||||
double *Updates_2block = &Updates[i * length_2block];
|
||||
unsigned int *Updates_index_2block = &Updates_index[i * 2];
|
||||
bool ok;
|
||||
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
|
||||
if (!ok) { // Send the entire block to SM2
|
||||
@ -246,12 +195,8 @@ void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
|
||||
}
|
||||
|
||||
if (remainder == 1) { // Apply last remaining update with SM4
|
||||
double Updates_1block[length_1block];
|
||||
unsigned int Updates_index_1block[1];
|
||||
Updates_index_1block[0] = Updates_index[2 * n_of_2blocks + 0];
|
||||
for (unsigned int i = 0; i < length_1block; i++) {
|
||||
Updates_1block[i] = Updates[n_of_2blocks * length_2block + i];
|
||||
}
|
||||
double *Updates_1block = &Updates[n_of_2blocks * length_2block];
|
||||
unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks];
|
||||
SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
|
||||
} else { // remainder == 0
|
||||
// Nothing left to do.
|
||||
|
@ -4,8 +4,15 @@
|
||||
#include "Helpers.hpp"
|
||||
#include "SM_Maponi.hpp"
|
||||
#include "SM_Standard.hpp"
|
||||
#include "Woodbury.hpp"
|
||||
#include "SMWB.hpp"
|
||||
|
||||
// #define PERF
|
||||
|
||||
#ifdef PERF
|
||||
unsigned int repetition_number;
|
||||
#endif
|
||||
|
||||
using namespace H5;
|
||||
|
||||
// #define DEBUG
|
||||
@ -71,6 +78,48 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
|
||||
showMatrix(u, dim, "Updates");
|
||||
#endif
|
||||
|
||||
#ifdef PERF
|
||||
std::cout << "# of reps. = " << repetition_number << std::endl;
|
||||
double *slater_inverse_nonpersistent = new double[dim * dim];
|
||||
for (unsigned int i = 0; i < repetition_number; i++) {
|
||||
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double));
|
||||
if (version == "maponia3") {
|
||||
MaponiA3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "maponia3s") {
|
||||
MaponiA3S(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "sm1") {
|
||||
SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "sm2") {
|
||||
SM2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "sm3") {
|
||||
SM3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "sm4") {
|
||||
SM4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "wb2") {
|
||||
WB2(slater_inverse_nonpersistent, dim, u, col_update_index);
|
||||
} else if (version == "wb3") {
|
||||
WB3(slater_inverse_nonpersistent, dim, u, col_update_index);
|
||||
} else if (version == "smwb1") {
|
||||
SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "smwb2") {
|
||||
SMWB2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "smwb3") {
|
||||
SMWB3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "smwb4") {
|
||||
SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
|
||||
#ifdef MKL
|
||||
} else if (version == "lapack") {
|
||||
memcpy(slater_inverse_nonpersistent, slater_matrix, dim * dim * sizeof(double));
|
||||
inverse(slater_inverse_nonpersistent, dim);
|
||||
#endif // MKL
|
||||
} else {
|
||||
std::cerr << "Unknown version " << version << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
std::memcpy(slater_inverse, slater_inverse_nonpersistent, dim * dim * sizeof(double));
|
||||
delete[] slater_inverse_nonpersistent;
|
||||
#else
|
||||
if (version == "maponia3") {
|
||||
MaponiA3(slater_inverse, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "maponia3s") {
|
||||
@ -83,6 +132,10 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
|
||||
SM3(slater_inverse, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "sm4") {
|
||||
SM4(slater_inverse, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "wb2") {
|
||||
WB2(slater_inverse, dim, u, col_update_index);
|
||||
} else if (version == "wb3") {
|
||||
WB3(slater_inverse, dim, u, col_update_index);
|
||||
} else if (version == "smwb1") {
|
||||
SMWB1(slater_inverse, dim, nupdates, u, col_update_index);
|
||||
} else if (version == "smwb2") {
|
||||
@ -95,11 +148,12 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
|
||||
} else if (version == "lapack") {
|
||||
memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double));
|
||||
inverse(slater_inverse, dim);
|
||||
#endif
|
||||
#endif // MKL
|
||||
} else {
|
||||
std::cerr << "Unknown version " << version << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
#endif // PERF
|
||||
|
||||
#ifdef DEBUG2
|
||||
showMatrix(slater_matrix, dim, "NEW Slater");
|
||||
@ -112,22 +166,9 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
|
||||
double *res = new double[dim * dim]{0};
|
||||
matMul(slater_matrix, slater_inverse, res, dim);
|
||||
bool ok = is_identity(res, dim, tolerance);
|
||||
|
||||
double res_max = residual_max(res, dim);
|
||||
double res2 = residual_frobenius2(res, dim);
|
||||
// double det;
|
||||
// double **tmp = new double *[dim];
|
||||
// for (int i = 0; i < dim; i++) {
|
||||
// tmp[i] = new double[dim];
|
||||
// for (int j = 0; j < dim; j++) {
|
||||
// tmp[i][j] = res[i * dim + j];
|
||||
// }
|
||||
// }
|
||||
// det = determinant(tmp, dim);
|
||||
// delete[] tmp;
|
||||
// std::cout << "Residual = " << version << " " << cycle << " " << res_max <<
|
||||
// " "
|
||||
// << res2 << " " << det << std::endl;
|
||||
|
||||
std::cout << "Residual = " << version << " " << cycle << " " << res_max << " "
|
||||
<< res2 << std::endl;
|
||||
|
||||
@ -141,7 +182,11 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
#ifdef PERF
|
||||
if (argc != 6) {
|
||||
#else
|
||||
if (argc != 5) {
|
||||
#endif
|
||||
std::cerr << "Execute from within 'datasets/'" << std::endl;
|
||||
std::cerr
|
||||
<< "usage: test_h5 <version> <start cycle> <stop cycle> <tolerance>"
|
||||
@ -154,6 +199,10 @@ int main(int argc, char **argv) {
|
||||
double tolerance = std::stod(argv[4]);
|
||||
H5File file(FILE_NAME, H5F_ACC_RDONLY);
|
||||
|
||||
#ifdef PERF
|
||||
repetition_number = std::stoi(argv[5]);
|
||||
#endif
|
||||
|
||||
bool ok;
|
||||
for (int cycle = start_cycle; cycle < stop_cycle + 1; cycle++) {
|
||||
ok = test_cycle(file, cycle, version, tolerance);
|
||||
|
Loading…
Reference in New Issue
Block a user