From 22590b7684a256fc5da9599e1d38a678f3fb7269 Mon Sep 17 00:00:00 2001 From: Francois Coppens Date: Tue, 15 Jun 2021 11:53:04 +0200 Subject: [PATCH] * Woodburry 3x3 kernel fixed * Written Unified Sherman-Morrison-Woodbury kernel that partitions the updates in blocks of 3 and tries them with Woodbury 3x3. The remainder of 2 or one are attempted with Woodbury 2x2 and SM2. For now the unified kernel gives fails where pure SM2 does not. I suspect there is something going wrong in how the updates are partitioned. --- include/Helpers.hpp | 20 ----- src/SMWB.cpp | 61 ++++++++++++-- src/Woodbury.cpp | 145 ++++++++++++++++++++++++--------- tests/QMCChem_dataset_test.f90 | 28 +++++-- tests/test_h5.cpp | 2 - 5 files changed, 181 insertions(+), 75 deletions(-) diff --git a/include/Helpers.hpp b/include/Helpers.hpp index cd6fdbb..f2a4c5d 100644 --- a/include/Helpers.hpp +++ b/include/Helpers.hpp @@ -75,26 +75,6 @@ void showMatrix2(T *matrix, unsigned int M, unsigned int N, std::string name) { std::cout << std::endl; } - -template -void showMatrixNS(T *matrix, unsigned int M, unsigned int N, std::string name) { - std::cout.precision(17); - std::cout << name << " = [" << std::endl; - for (unsigned int i = 0; i < M; i++) { - std::cout << "["; - for (unsigned int j = 0; j < N; j++) { - if (matrix[i * N + j] >= 0) { - std::cout << " " << matrix[i * N + j] << ","; - } else { - std::cout << " " << matrix[i * N + j] << ","; - } - } - std::cout << " ]," << std::endl; - } - std::cout << "]" << std::endl; - std::cout << std::endl; -} - template T *transpose(T *A, unsigned int M) { T *B = new T[M * M]; for (unsigned int i = 0; i < M; i++) { diff --git a/src/SMWB.cpp b/src/SMWB.cpp index 670920b..a112dd7 100644 --- a/src/SMWB.cpp +++ b/src/SMWB.cpp @@ -8,13 +8,60 @@ void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index) { std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl; - bool ok; - ok = WB2(Slater_inv, Dim, Updates, Updates_index); - if (!ok) { - std::cerr << "Woodbury kernel failed!" << std::endl; - SM2(Slater_inv, Dim, N_updates, Updates, Updates_index); - } - } + unsigned int n_of_3blocks = N_updates / 3; + unsigned int remainder = N_updates % 3; + unsigned int length_3block = 3 * Dim; + unsigned int length_2block = 2 * Dim; + unsigned int length_1block = 1 * Dim; + + // std::cerr << "Number of blocks: " << n_of_3blocks << ". Remainder: " << remainder << "." << std::endl; + + // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel + if (n_of_3blocks > 0) { + for (unsigned int i = 0; i < n_of_3blocks; i++) { + double Updates_3block[length_3block]; + unsigned int Updates_index_3block[3]; + Updates_index_3block[0] = Updates_index[3 * i + 0]; + Updates_index_3block[1] = Updates_index[3 * i + 1]; + Updates_index_3block[2] = Updates_index[3 * i + 2]; + for (unsigned int j = 0; j < length_3block; j++) { + Updates_3block[j] = Updates[i * length_3block + j]; + } + bool ok; + ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); + if (!ok) { // Send the entire block to SM2 + std::cerr << "Woodbury 3x3 kernel failed! Sending block to SM2" << std::endl; + SM2(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block); + } + } + } + + if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel + double Updates_2block[length_2block]; + unsigned int Updates_index_2block[2]; + Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0]; + Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1]; + for (unsigned int i = 0; i < length_2block; i++) { + Updates_2block[i] = Updates[n_of_3blocks * length_3block + i]; + } + bool ok; + ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); + if (!ok) { // Send the entire block to SM2 + std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2" << std::endl; + SM2(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block); + } + } else if (remainder == 1) { // Apply last remaining update with SM2 + double Updates_1block[length_1block]; + unsigned int Updates_index_1block[1]; + Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0]; + for (unsigned int i = 0; i < length_1block; i++) { + Updates_1block[i] = Updates[n_of_3blocks * length_3block + i]; + } + SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block); + } else { // remainder == 0 + // Nothing left to do. + } +} extern "C" { void SMWB1_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, diff --git a/src/Woodbury.cpp b/src/Woodbury.cpp index 43eb301..dacf676 100644 --- a/src/Woodbury.cpp +++ b/src/Woodbury.cpp @@ -2,21 +2,22 @@ Woodbury 2x2 and 3x3 kernels Woodbury matrix identity: - (S + U V)^{-1} = S^{-1} - C B^{-1} D - C := S^{-1} U, dim x 2 - B := 1 + V C, 2 x 2 - D := V S^{-1}, 2 x dim - - All matrices are stored in row-major order */ + All matrices are stored in row-major order +*/ #include "Woodbury.hpp" #include "Helpers.hpp" -// Woodbury 2x2 kernel: +// Woodbury 2x2 kernel bool WB2(double *Slater_inv, unsigned int Dim, double *Updates, unsigned int *Updates_index) { + /* + C := S^{-1} * U, dim x 2 + B := 1 + V * C, 2 x 2 + D := V * S^{-1}, 2 x dim + */ std::cerr << "Called Woodbury 2x2 kernel" << std::endl; // Construct V from Updates_index @@ -25,23 +26,23 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates, V[Updates_index[0] - 1] = 1; V[Dim + Updates_index[1] - 1] = 1; - // Compute C = S_inv U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE + // Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE // OF LAYOUT OF 'Updates' !! double C[2 * Dim]; for(unsigned int i = 0; i < Dim; i++) { for(unsigned int j = 0; j < 2; j++) { C[i * 2 + j] = 0; for(unsigned int k = 0; k < Dim; k++) { - C[i * 2 + j] += Slater_inv[i * Dim + k] * Updates[4 * j + k]; + C[i * 2 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k]; } } } - // Compute D = V x S^{-1} + // Compute D = V * S^{-1} double D[2 * Dim]; matMul2(V, Slater_inv, D, 2, Dim, Dim); - // Compute B = 1 + V C + // Compute B = 1 + V * C double B[4]; matMul2(V, C, B, 2, Dim, 2); B[0] += 1; @@ -56,9 +57,9 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates, Binv[3] = idet * B[0]; // Check if determinant of inverted matrix is not zero - double det = B[0] * B[3] - B[1] * B[2]; + double det = Binv[0] * Binv[3] - Binv[1] * Binv[2]; if (std::fabs(det) < threshold()) { - std::cerr << "Determinant approached 0!" << std::endl; + std::cerr << "Determinant approaching 0!" << std::endl; return false; } @@ -79,60 +80,130 @@ bool WB2(double *Slater_inv, unsigned int Dim, double *Updates, } // Woodbury 3x3 kernel -bool WB3(double *Slater_inv, unsigned int Dim, double *Updates, +bool WB3(double *Slater_inv, unsigned int Dim, double *Updates, unsigned int *Updates_index) { + /* + C := S^{-1} * U, dim x 3 + B := 1 + V * C, 3 x 3 + D := V * S^{-1}, 3 x dim + */ std::cerr << "Called Woodbury 3x3 kernel" << std::endl; - + +#ifdef DEBUG + showMatrix2(Slater_inv, Dim, Dim, "Slater_inv BEFORE update"); + showMatrix2(Updates, 3, Dim, "Updates"); + showMatrix2(Updates_index, 1, 3, "Updates_index"); +#endif + // Construct V from Updates_index - unsigned int V[3 * Dim]; // 2 x Dim matrix stored in row-major order + unsigned int V[3 * Dim]; // 3 x Dim matrix stored in row-major order + std::memset(V, 0, 3 * Dim * sizeof(unsigned int)); V[Updates_index[0] - 1] = 1; V[Dim + Updates_index[1] - 1] = 1; V[2 * Dim + Updates_index[2] - 1] = 1; - // Compute C +#ifdef DEBUG + showMatrix2(V, 3, Dim, "V"); +#endif + + // Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE + // OF LAYOUT OF 'Updates' !! double C[3 * Dim]; - matMul2(Slater_inv, Updates, C, Dim, Dim, 3); - // Compute B + for(unsigned int i = 0; i < Dim; i++) { + for(unsigned int j = 0; j < 3; j++) { + C[i * 3 + j] = 0; + for(unsigned int k = 0; k < Dim; k++) { + C[i * 3 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k]; + } + } + } + +#ifdef DEBUG + showMatrix2(C, Dim, 3, "C = S_inv * U"); +#endif + // Compute D = V * S^{-1} + double D[3 * Dim]; + matMul2(V, Slater_inv, D, 3, Dim, Dim); + +#ifdef DEBUG + showMatrix2(D, 3, Dim, "D = V * S_inv"); +#endif + + // Compute B = 1 + V * C double B[9]; matMul2(V, C, B, 3, Dim, 3); - // Compute 1 + B B[0] += 1; B[4] += 1; B[8] += 1; - double Binv[9]; - Binv[0] = B[4] * B[8] - B[5] * B[7]; - Binv[3] = B[5] * B[6] - B[3] * B[8]; - Binv[6] = B[3] * B[7] - B[4] * B[6]; +#ifdef DEBUG + showMatrix2(B, 3, 3, "B = 1 + V * C"); +#endif - Binv[1] = B[2] * B[7] - B[1] * B[8]; - Binv[4] = B[0] * B[8] - B[2] * B[6]; - Binv[7] = B[1] * B[6] - B[0] * B[7]; + // Compute B^{-1} with explicit formula for 3x3 inversion + double Binv[9], det, idet; + det = B[0] * (B[4] * B[8] - B[5] * B[7]) - + B[1] * (B[3] * B[8] - B[5] * B[6]) + + B[2] * (B[3] * B[7] - B[4] * B[6]); + idet = 1.0 / det; - Binv[2] = B[1] * B[5] - B[2] * B[4]; - Binv[5] = B[2] * B[3] - B[0] * B[5]; - Binv[8] = B[0] * B[4] - B[1] * B[3]; +#ifdef DEBUG + std::cerr << "Determinant of B = " << det << std::endl; +#endif + + Binv[0] = ( B[4] * B[8] - B[7] * B[5] ) * idet; + Binv[1] = - ( B[1] * B[8] - B[7] * B[2] ) * idet; + Binv[2] = ( B[1] * B[5] - B[4] * B[2] ) * idet; + Binv[3] = - ( B[3] * B[8] - B[6] * B[5] ) * idet; + Binv[4] = ( B[0] * B[8] - B[6] * B[2] ) * idet; + Binv[5] = - ( B[0] * B[5] - B[3] * B[2] ) * idet; + Binv[6] = ( B[3] * B[7] - B[6] * B[4] ) * idet; + Binv[7] = - ( B[0] * B[7] - B[6] * B[1] ) * idet; + Binv[8] = ( B[0] * B[4] - B[3] * B[1] ) * idet; + +#ifdef DEBUG + showMatrix2(Binv, 3, 3, "Binv"); +#endif // Check if determinant of inverted matrix is not zero - // If so, exigt and return false. - double det; - det = B[0] * (B[4] * B[8] - B[5] * B[7]) - - B[1] * (B[3] * B[8] - B[5] * B[6]) + B[2] * (B[3] * B[7] - B[4] * B[6]); + // double det; + det = Binv[0] * (Binv[4] * Binv[8] - Binv[5] * Binv[7]) - + Binv[1] * (Binv[3] * Binv[8] - Binv[5] * Binv[6]) + + Binv[2] * (Binv[3] * Binv[7] - Binv[4] * Binv[6]); + +#ifdef DEBUG + std::cerr << "Determinant of Binv = " << det << std::endl; +#endif + if (std::fabs(det) < threshold()) { std::cerr << "Determinant approached 0!" << std::endl; return false; } - // Compute (S + U * V)^{-1} with Woobury identity - double D[3 * Dim]; - matMul2(V, Slater_inv, D, 3, Dim, Dim); + // Compute B^{-1} x D double tmp[3 * Dim]; matMul2(Binv, D, tmp, 3, 3, Dim); + +#ifdef DEBUG + showMatrix2(tmp, 3, Dim, "tmp = Binv * D"); +#endif + + // Compute C x B^{-1} x D double tmp2[Dim * Dim]; matMul2(C, tmp, tmp2, Dim, 3, Dim); + +#ifdef DEBUG + showMatrix2(tmp2, Dim, Dim, "tmp2 = C * tmp"); +#endif + + // Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D for (unsigned int i = 0; i < Dim * Dim; i++) { Slater_inv[i] -= tmp2[i]; } + +#ifdef DEBUG + showMatrix2(Slater_inv, Dim, Dim, "Slater_inv AFTER update"); +#endif return true; } diff --git a/tests/QMCChem_dataset_test.f90 b/tests/QMCChem_dataset_test.f90 index b8fac8f..3aae867 100644 --- a/tests/QMCChem_dataset_test.f90 +++ b/tests/QMCChem_dataset_test.f90 @@ -33,15 +33,15 @@ program QMCChem_dataset_test close(2000) close(3000) - !! Write Updates to file to check - open(unit = 2000, file = "Updates.dat") - do i=1,dim - do j=1,n_updates - write(2000,"(E23.15, 1X)", advance="no") Updates(i,j) - end do - write(2000,*) - end do - close(2000) + ! !! Write Updates to file to check + ! open(unit = 2000, file = "Updates.dat") + ! do i=1,dim + ! do j=1,n_updates + ! write(2000,"(E23.15, 1X)", advance="no") Updates(i,j) + ! end do + ! write(2000,*) + ! end do + ! close(2000) !! Update S & transform replacement updates 'Updates' !! into additive updates 'U' to compute the inverse @@ -53,6 +53,16 @@ program QMCChem_dataset_test end do end do + !! Write Updates to file to check + open(unit = 2000, file = "Updates.dat") + do i=1,dim + do j=1,n_updates + write(2000,"(E23.15, 1X)", advance="no") U(i,j) + end do + write(2000,*) + end do + close(2000) + !! Update S_inv !! S_inv needs to be transposed first before it !! goes to MaponiA3 diff --git a/tests/test_h5.cpp b/tests/test_h5.cpp index 9b49a1c..f750f10 100644 --- a/tests/test_h5.cpp +++ b/tests/test_h5.cpp @@ -52,8 +52,6 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { showMatrix(slater_inverse, dim, "OLD Inverse"); #endif - showMatrix2(slater_matrix, dim, dim, "Slater"); - // Transform replacement updates in 'updates[]' into additive updates in 'u[]' for (j = 0; j < nupdates; j++) { for (i = 0; i < dim; i++) {