- Replaced the copy-arrays in the combined SMWB kernels with pointers

- Added loop in test_cycle() to repeat a single update cycle n-times for performance testing.
This commit is contained in:
François Coppens 2021-06-23 17:28:29 +02:00
parent a4397f1496
commit 7b9b176545
3 changed files with 88 additions and 94 deletions

View File

@ -4,7 +4,7 @@ ifeq ($(ENV),INTEL)
FC = ifort
ARCH = -xCORE-AVX2
OPT = -O2
DEBUG = -debug full
DEBUG = -g -debug full
else ifeq ($(ENV),LLVM)
CXX = clang++
FC = flang

View File

@ -21,14 +21,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
if (n_of_3blocks > 0) {
for (unsigned int i = 0; i < n_of_3blocks; i++) {
double Updates_3block[length_3block];
unsigned int Updates_index_3block[3];
Updates_index_3block[0] = Updates_index[3 * i + 0];
Updates_index_3block[1] = Updates_index[3 * i + 1];
Updates_index_3block[2] = Updates_index[3 * i + 2];
for (unsigned int j = 0; j < length_3block; j++) {
Updates_3block[j] = Updates[i * length_3block + j];
}
double *Updates_3block = &Updates[i * length_3block];
unsigned int *Updates_index_3block = &Updates_index[i * 3];
bool ok;
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
if (!ok) { // Send the entire block to SM2
@ -41,16 +35,10 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
}
}
}
/// Convert asrray copies to pointers
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
double Updates_2block[length_2block];
unsigned int Updates_index_2block[2];
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
for (unsigned int i = 0; i < length_2block; i++) {
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
}
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
bool ok;
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM2
@ -60,12 +48,8 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
SM2(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
}
} else if (remainder == 1) { // Apply last remaining update with SM2
double Updates_1block[length_1block];
unsigned int Updates_index_1block[1];
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
for (unsigned int i = 0; i < length_1block; i++) {
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
}
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
} else { // remainder == 0
// Nothing left to do.
@ -93,14 +77,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
if (n_of_3blocks > 0) {
for (unsigned int i = 0; i < n_of_3blocks; i++) {
double Updates_3block[length_3block];
unsigned int Updates_index_3block[3];
Updates_index_3block[0] = Updates_index[3 * i + 0];
Updates_index_3block[1] = Updates_index[3 * i + 1];
Updates_index_3block[2] = Updates_index[3 * i + 2];
for (unsigned int j = 0; j < length_3block; j++) {
Updates_3block[j] = Updates[i * length_3block + j];
}
double *Updates_3block = &Updates[i * length_3block];
unsigned int *Updates_index_3block = &Updates_index[i * 3];
bool ok;
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
if (!ok) { // Send the entire block to SM3
@ -115,13 +93,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
}
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
double Updates_2block[length_2block];
unsigned int Updates_index_2block[2];
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
for (unsigned int i = 0; i < length_2block; i++) {
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
}
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
bool ok;
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM3
@ -129,12 +102,8 @@ void SMWB2(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
SM3(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
}
} else if (remainder == 1) { // Apply last remaining update with SM3
double Updates_1block[length_1block];
unsigned int Updates_index_1block[1];
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
for (unsigned int i = 0; i < length_1block; i++) {
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
}
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
SM3(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
} else { // remainder == 0
// Nothing left to do.
@ -160,14 +129,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
if (n_of_3blocks > 0) {
for (unsigned int i = 0; i < n_of_3blocks; i++) {
double Updates_3block[length_3block];
unsigned int Updates_index_3block[3];
Updates_index_3block[0] = Updates_index[3 * i + 0];
Updates_index_3block[1] = Updates_index[3 * i + 1];
Updates_index_3block[2] = Updates_index[3 * i + 2];
for (unsigned int j = 0; j < length_3block; j++) {
Updates_3block[j] = Updates[i * length_3block + j];
}
double *Updates_3block = &Updates[i * length_3block];
unsigned int *Updates_index_3block = &Updates_index[i * 3];
bool ok;
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
if (!ok) { // Send the entire block to SM4
@ -182,13 +145,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
}
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
double Updates_2block[length_2block];
unsigned int Updates_index_2block[2];
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
for (unsigned int i = 0; i < length_2block; i++) {
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
}
double *Updates_2block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
bool ok;
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM4
@ -196,12 +154,8 @@ void SMWB3(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
SM4(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
}
} else if (remainder == 1) { // Apply last remaining update with SM4
double Updates_1block[length_1block];
unsigned int Updates_index_1block[1];
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
for (unsigned int i = 0; i < length_1block; i++) {
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
}
double *Updates_1block = &Updates[n_of_3blocks * length_3block];
unsigned int *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
SM4(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
} else { // remainder == 0
// Nothing left to do.
@ -222,16 +176,11 @@ void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
unsigned int length_2block = 2 * Dim;
unsigned int length_1block = 1 * Dim;
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
// Apply first 2*n_of_2blocks updates in n_of_2blocks blocks of 2 updates with Woodbury 2x2 kernel
if (n_of_2blocks > 0) {
for (unsigned int i = 0; i < n_of_2blocks; i++) {
double Updates_2block[length_2block];
unsigned int Updates_index_2block[2];
Updates_index_2block[0] = Updates_index[2 * i + 0];
Updates_index_2block[1] = Updates_index[2 * i + 1];
for (unsigned int j = 0; j < length_2block; j++) {
Updates_2block[j] = Updates[i * length_2block + j];
}
double *Updates_2block = &Updates[i * length_2block];
unsigned int *Updates_index_2block = &Updates_index[i * 2];
bool ok;
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM2
@ -246,12 +195,8 @@ void SMWB4(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double
}
if (remainder == 1) { // Apply last remaining update with SM4
double Updates_1block[length_1block];
unsigned int Updates_index_1block[1];
Updates_index_1block[0] = Updates_index[2 * n_of_2blocks + 0];
for (unsigned int i = 0; i < length_1block; i++) {
Updates_1block[i] = Updates[n_of_2blocks * length_2block + i];
}
double *Updates_1block = &Updates[n_of_2blocks * length_2block];
unsigned int *Updates_index_1block = &Updates_index[2 * n_of_2blocks];
SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
} else { // remainder == 0
// Nothing left to do.

View File

@ -4,8 +4,15 @@
#include "Helpers.hpp"
#include "SM_Maponi.hpp"
#include "SM_Standard.hpp"
#include "Woodbury.hpp"
#include "SMWB.hpp"
// #define PERF
#ifdef PERF
unsigned int repetition_number;
#endif
using namespace H5;
// #define DEBUG
@ -71,6 +78,48 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
showMatrix(u, dim, "Updates");
#endif
#ifdef PERF
std::cout << "# of reps. = " << repetition_number << std::endl;
double *slater_inverse_nonpersistent = new double[dim * dim];
for (unsigned int i = 0; i < repetition_number; i++) {
std::memcpy(slater_inverse_nonpersistent, slater_inverse, dim * dim * sizeof(double));
if (version == "maponia3") {
MaponiA3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "maponia3s") {
MaponiA3S(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "sm1") {
SM1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "sm2") {
SM2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "sm3") {
SM3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "sm4") {
SM4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "wb2") {
WB2(slater_inverse_nonpersistent, dim, u, col_update_index);
} else if (version == "wb3") {
WB3(slater_inverse_nonpersistent, dim, u, col_update_index);
} else if (version == "smwb1") {
SMWB1(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "smwb2") {
SMWB2(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "smwb3") {
SMWB3(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
} else if (version == "smwb4") {
SMWB4(slater_inverse_nonpersistent, dim, nupdates, u, col_update_index);
#ifdef MKL
} else if (version == "lapack") {
memcpy(slater_inverse_nonpersistent, slater_matrix, dim * dim * sizeof(double));
inverse(slater_inverse_nonpersistent, dim);
#endif // MKL
} else {
std::cerr << "Unknown version " << version << std::endl;
exit(1);
}
}
std::memcpy(slater_inverse, slater_inverse_nonpersistent, dim * dim * sizeof(double));
delete[] slater_inverse_nonpersistent;
#else
if (version == "maponia3") {
MaponiA3(slater_inverse, dim, nupdates, u, col_update_index);
} else if (version == "maponia3s") {
@ -83,6 +132,10 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
SM3(slater_inverse, dim, nupdates, u, col_update_index);
} else if (version == "sm4") {
SM4(slater_inverse, dim, nupdates, u, col_update_index);
} else if (version == "wb2") {
WB2(slater_inverse, dim, u, col_update_index);
} else if (version == "wb3") {
WB3(slater_inverse, dim, u, col_update_index);
} else if (version == "smwb1") {
SMWB1(slater_inverse, dim, nupdates, u, col_update_index);
} else if (version == "smwb2") {
@ -95,11 +148,12 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
} else if (version == "lapack") {
memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double));
inverse(slater_inverse, dim);
#endif
#endif // MKL
} else {
std::cerr << "Unknown version " << version << std::endl;
exit(1);
}
#endif // PERF
#ifdef DEBUG2
showMatrix(slater_matrix, dim, "NEW Slater");
@ -112,22 +166,9 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
double *res = new double[dim * dim]{0};
matMul(slater_matrix, slater_inverse, res, dim);
bool ok = is_identity(res, dim, tolerance);
double res_max = residual_max(res, dim);
double res2 = residual_frobenius2(res, dim);
// double det;
// double **tmp = new double *[dim];
// for (int i = 0; i < dim; i++) {
// tmp[i] = new double[dim];
// for (int j = 0; j < dim; j++) {
// tmp[i][j] = res[i * dim + j];
// }
// }
// det = determinant(tmp, dim);
// delete[] tmp;
// std::cout << "Residual = " << version << " " << cycle << " " << res_max <<
// " "
// << res2 << " " << det << std::endl;
std::cout << "Residual = " << version << " " << cycle << " " << res_max << " "
<< res2 << std::endl;
@ -141,7 +182,11 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
}
int main(int argc, char **argv) {
#ifdef PERF
if (argc != 6) {
#else
if (argc != 5) {
#endif
std::cerr << "Execute from within 'datasets/'" << std::endl;
std::cerr
<< "usage: test_h5 <version> <start cycle> <stop cycle> <tolerance>"
@ -154,6 +199,10 @@ int main(int argc, char **argv) {
double tolerance = std::stod(argv[4]);
H5File file(FILE_NAME, H5F_ACC_RDONLY);
#ifdef PERF
repetition_number = std::stoi(argv[5]);
#endif
bool ok;
for (int cycle = start_cycle; cycle < stop_cycle + 1; cycle++) {
ok = test_cycle(file, cycle, version, tolerance);