diff --git a/Makefile b/Makefile index d69bd40..cb7ad9b 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,9 @@ FFLAGS = $(CXXFLAGS) INCLUDE = -I $(INC_DIR)/ DEPS_CXX = $(OBJ_DIR)/SM_Maponi.o \ $(OBJ_DIR)/SM_Standard.o \ - $(OBJ_DIR)/SM_Helpers.o + $(OBJ_DIR)/Woodbury.o \ + $(OBJ_DIR)/SMWB.o \ + $(OBJ_DIR)/Helpers.o DEPS_F = $(DEPS_CXX) \ $(OBJ_DIR)/finterface_mod.o \ $(OBJ_DIR)/helpers_mod.o diff --git a/include/SM_Helpers.hpp b/include/Helpers.hpp similarity index 86% rename from include/SM_Helpers.hpp rename to include/Helpers.hpp index b569d18..f2a4c5d 100644 --- a/include/SM_Helpers.hpp +++ b/include/Helpers.hpp @@ -1,4 +1,4 @@ -// SM_Helpers.hpp +// Helpers.hpp // Some usefull helper functions to support the Maponi algorithm. #include #include @@ -56,6 +56,25 @@ void showMatrix(T *matrix, unsigned int M, std::string name) { std::cout << std::endl; } +template +void showMatrix2(T *matrix, unsigned int M, unsigned int N, std::string name) { + std::cout.precision(17); + std::cout << name << " = [" << std::endl; + for (unsigned int i = 0; i < M; i++) { + std::cout << "["; + for (unsigned int j = 0; j < N; j++) { + if (matrix[i * N + j] >= 0) { + std::cout << " " << matrix[i * N + j] << ","; + } else { + std::cout << " " << matrix[i * N + j] << ","; + } + } + std::cout << " ]," << std::endl; + } + std::cout << "]" << std::endl; + std::cout << std::endl; +} + template T *transpose(T *A, unsigned int M) { T *B = new T[M * M]; for (unsigned int i = 0; i < M; i++) { @@ -77,6 +96,18 @@ template void matMul(T *A, T *B, T *C, unsigned int M) { } } +template +void matMul2(T1 *A, T2 *B, T3 *C, unsigned int M, unsigned int N, unsigned int P) { + for(unsigned int i = 0; i < M; i++) { + for(unsigned int j = 0; j < P; j++) { + C[i * P + j] = 0; + for(unsigned int k = 0; k < N; k++) { + C[i * P + j] += A[i * N + k] * B[k * P + j]; + } + } + } +} + template T1 *outProd(T1 *vec1, T2 *vec2, unsigned int M) { T1 *C = new T1[M * M]; diff --git a/include/SMWB.hpp b/include/SMWB.hpp new file mode 100644 index 0000000..0de1a49 --- /dev/null +++ b/include/SMWB.hpp @@ -0,0 +1,4 @@ +// Sherman-Morrison-Woodbury kernel 1 +// WB2, WB3, SM2 mixing scheme 1 +void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, + double *Updates, unsigned int *Updates_index); diff --git a/include/Woodbury.hpp b/include/Woodbury.hpp new file mode 100644 index 0000000..c03e509 --- /dev/null +++ b/include/Woodbury.hpp @@ -0,0 +1,7 @@ +// Woodbury 2x2 kernel +bool WB2(double *Slater_inv, unsigned int Dim, + double *Updates, unsigned int *Updates_index); + +// Woodbury 3x3 kernel +bool WB3(double *Slater_inv, unsigned int Dim, + double *Updates, unsigned int *Updates_index); diff --git a/smvars.sh b/smvars.sh index 6e846ee..2e9e868 100644 --- a/smvars.sh +++ b/smvars.sh @@ -3,6 +3,10 @@ unset THRESHOLD unset MKL ENV=$1 +echo +echo "Sherman-Morrison-Woodbury parameters" +echo "------------------------------------" + ## Set Sherman-Morrison root dir PWD=$(pwd) SRCDIR=$(dirname $BASH_SOURCE) diff --git a/src/SM_Helpers.cpp b/src/Helpers.cpp similarity index 98% rename from src/SM_Helpers.cpp rename to src/Helpers.cpp index 4d084c0..12ce105 100644 --- a/src/SM_Helpers.cpp +++ b/src/Helpers.cpp @@ -1,4 +1,4 @@ -#include "SM_Helpers.hpp" +#include "Helpers.hpp" // Set common break-down threshold double threshold() { diff --git a/src/SMWB.cpp b/src/SMWB.cpp new file mode 100644 index 0000000..a112dd7 --- /dev/null +++ b/src/SMWB.cpp @@ -0,0 +1,71 @@ +#include "SMWB.hpp" +#include "SM_Standard.hpp" +#include "Woodbury.hpp" +#include "Helpers.hpp" + +// Sherman-Morrison-Woodbury kernel 1 +// WB2, WB3, SM2 mixing scheme 1 +void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index) { + std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl; + + unsigned int n_of_3blocks = N_updates / 3; + unsigned int remainder = N_updates % 3; + unsigned int length_3block = 3 * Dim; + unsigned int length_2block = 2 * Dim; + unsigned int length_1block = 1 * Dim; + + // std::cerr << "Number of blocks: " << n_of_3blocks << ". Remainder: " << remainder << "." << std::endl; + + // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel + if (n_of_3blocks > 0) { + for (unsigned int i = 0; i < n_of_3blocks; i++) { + double Updates_3block[length_3block]; + unsigned int Updates_index_3block[3]; + Updates_index_3block[0] = Updates_index[3 * i + 0]; + Updates_index_3block[1] = Updates_index[3 * i + 1]; + Updates_index_3block[2] = Updates_index[3 * i + 2]; + for (unsigned int j = 0; j < length_3block; j++) { + Updates_3block[j] = Updates[i * length_3block + j]; + } + bool ok; + ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block); + if (!ok) { // Send the entire block to SM2 + std::cerr << "Woodbury 3x3 kernel failed! Sending block to SM2" << std::endl; + SM2(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block); + } + } + } + + if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel + double Updates_2block[length_2block]; + unsigned int Updates_index_2block[2]; + Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0]; + Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1]; + for (unsigned int i = 0; i < length_2block; i++) { + Updates_2block[i] = Updates[n_of_3blocks * length_3block + i]; + } + bool ok; + ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block); + if (!ok) { // Send the entire block to SM2 + std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2" << std::endl; + SM2(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block); + } + } else if (remainder == 1) { // Apply last remaining update with SM2 + double Updates_1block[length_1block]; + unsigned int Updates_index_1block[1]; + Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0]; + for (unsigned int i = 0; i < length_1block; i++) { + Updates_1block[i] = Updates[n_of_3blocks * length_3block + i]; + } + SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block); + } else { // remainder == 0 + // Nothing left to do. + } +} + +extern "C" { +void SMWB1_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates, + double **linUpdates, unsigned int **Updates_index) { + SMWB1(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index); +} +} diff --git a/src/SM_Maponi.cpp b/src/SM_Maponi.cpp index 7b65fde..65afefa 100644 --- a/src/SM_Maponi.cpp +++ b/src/SM_Maponi.cpp @@ -2,7 +2,7 @@ // Algorithm 3 from P. Maponi, // p. 283, doi:10.1016/j.laa.2006.07.007 #include "SM_Maponi.hpp" -#include "SM_Helpers.hpp" +#include "Helpers.hpp" // #define DEBUG diff --git a/src/SM_Standard.cpp b/src/SM_Standard.cpp index 91b69c9..c68dd81 100644 --- a/src/SM_Standard.cpp +++ b/src/SM_Standard.cpp @@ -1,7 +1,7 @@ // SM-Standard.cpp // Standard Sherman Morrison with multiple updates #include "SM_Standard.hpp" -#include "SM_Helpers.hpp" +#include "Helpers.hpp" // Naïve Sherman Morrison void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, diff --git a/src/Woodbury.cpp b/src/Woodbury.cpp new file mode 100644 index 0000000..dacf676 --- /dev/null +++ b/src/Woodbury.cpp @@ -0,0 +1,219 @@ +/* Woodbury.cpp + Woodbury 2x2 and 3x3 kernels + + Woodbury matrix identity: + (S + U V)^{-1} = S^{-1} - C B^{-1} D + + All matrices are stored in row-major order +*/ + +#include "Woodbury.hpp" +#include "Helpers.hpp" + +// Woodbury 2x2 kernel +bool WB2(double *Slater_inv, unsigned int Dim, double *Updates, + unsigned int *Updates_index) { + /* + C := S^{-1} * U, dim x 2 + B := 1 + V * C, 2 x 2 + D := V * S^{-1}, 2 x dim + */ + std::cerr << "Called Woodbury 2x2 kernel" << std::endl; + + // Construct V from Updates_index + unsigned int V[2 * Dim]; // 2 x Dim matrix stored in row-major order + std::memset(V, 0, 2 * Dim * sizeof(unsigned int)); + V[Updates_index[0] - 1] = 1; + V[Dim + Updates_index[1] - 1] = 1; + + // Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE + // OF LAYOUT OF 'Updates' !! + double C[2 * Dim]; + for(unsigned int i = 0; i < Dim; i++) { + for(unsigned int j = 0; j < 2; j++) { + C[i * 2 + j] = 0; + for(unsigned int k = 0; k < Dim; k++) { + C[i * 2 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k]; + } + } + } + + // Compute D = V * S^{-1} + double D[2 * Dim]; + matMul2(V, Slater_inv, D, 2, Dim, Dim); + + // Compute B = 1 + V * C + double B[4]; + matMul2(V, C, B, 2, Dim, 2); + B[0] += 1; + B[3] += 1; + + // Compute B^{-1} with explicit formula for 2x2 inversion + double idet = 1.0 / (B[0] * B[3] - B[1] * B[2]); + double Binv[4]; + Binv[0] = idet * B[3]; + Binv[1] = -1.0 * idet * B[1]; + Binv[2] = -1.0 * idet * B[2]; + Binv[3] = idet * B[0]; + + // Check if determinant of inverted matrix is not zero + double det = Binv[0] * Binv[3] - Binv[1] * Binv[2]; + if (std::fabs(det) < threshold()) { + std::cerr << "Determinant approaching 0!" << std::endl; + return false; + } + + // Compute B^{-1} x D + double tmp[2 * Dim]; + matMul2(Binv, D, tmp, 2, 2, Dim); + + // Compute C x B^{-1} x D + double tmp2[Dim * Dim]; + matMul2(C, tmp, tmp2, Dim, 2, Dim); + + // Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D + for (unsigned int i = 0; i < Dim * Dim; i++) { + Slater_inv[i] -= tmp2[i]; + } + + return true; +} + +// Woodbury 3x3 kernel +bool WB3(double *Slater_inv, unsigned int Dim, double *Updates, + unsigned int *Updates_index) { + /* + C := S^{-1} * U, dim x 3 + B := 1 + V * C, 3 x 3 + D := V * S^{-1}, 3 x dim + */ + std::cerr << "Called Woodbury 3x3 kernel" << std::endl; + +#ifdef DEBUG + showMatrix2(Slater_inv, Dim, Dim, "Slater_inv BEFORE update"); + showMatrix2(Updates, 3, Dim, "Updates"); + showMatrix2(Updates_index, 1, 3, "Updates_index"); +#endif + + // Construct V from Updates_index + unsigned int V[3 * Dim]; // 3 x Dim matrix stored in row-major order + std::memset(V, 0, 3 * Dim * sizeof(unsigned int)); + V[Updates_index[0] - 1] = 1; + V[Dim + Updates_index[1] - 1] = 1; + V[2 * Dim + Updates_index[2] - 1] = 1; + +#ifdef DEBUG + showMatrix2(V, 3, Dim, "V"); +#endif + + // Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE + // OF LAYOUT OF 'Updates' !! + double C[3 * Dim]; + for(unsigned int i = 0; i < Dim; i++) { + for(unsigned int j = 0; j < 3; j++) { + C[i * 3 + j] = 0; + for(unsigned int k = 0; k < Dim; k++) { + C[i * 3 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k]; + } + } + } + +#ifdef DEBUG + showMatrix2(C, Dim, 3, "C = S_inv * U"); +#endif + // Compute D = V * S^{-1} + double D[3 * Dim]; + matMul2(V, Slater_inv, D, 3, Dim, Dim); + +#ifdef DEBUG + showMatrix2(D, 3, Dim, "D = V * S_inv"); +#endif + + // Compute B = 1 + V * C + double B[9]; + matMul2(V, C, B, 3, Dim, 3); + B[0] += 1; + B[4] += 1; + B[8] += 1; + +#ifdef DEBUG + showMatrix2(B, 3, 3, "B = 1 + V * C"); +#endif + + // Compute B^{-1} with explicit formula for 3x3 inversion + double Binv[9], det, idet; + det = B[0] * (B[4] * B[8] - B[5] * B[7]) - + B[1] * (B[3] * B[8] - B[5] * B[6]) + + B[2] * (B[3] * B[7] - B[4] * B[6]); + idet = 1.0 / det; + +#ifdef DEBUG + std::cerr << "Determinant of B = " << det << std::endl; +#endif + + Binv[0] = ( B[4] * B[8] - B[7] * B[5] ) * idet; + Binv[1] = - ( B[1] * B[8] - B[7] * B[2] ) * idet; + Binv[2] = ( B[1] * B[5] - B[4] * B[2] ) * idet; + Binv[3] = - ( B[3] * B[8] - B[6] * B[5] ) * idet; + Binv[4] = ( B[0] * B[8] - B[6] * B[2] ) * idet; + Binv[5] = - ( B[0] * B[5] - B[3] * B[2] ) * idet; + Binv[6] = ( B[3] * B[7] - B[6] * B[4] ) * idet; + Binv[7] = - ( B[0] * B[7] - B[6] * B[1] ) * idet; + Binv[8] = ( B[0] * B[4] - B[3] * B[1] ) * idet; + +#ifdef DEBUG + showMatrix2(Binv, 3, 3, "Binv"); +#endif + + // Check if determinant of inverted matrix is not zero + // double det; + det = Binv[0] * (Binv[4] * Binv[8] - Binv[5] * Binv[7]) - + Binv[1] * (Binv[3] * Binv[8] - Binv[5] * Binv[6]) + + Binv[2] * (Binv[3] * Binv[7] - Binv[4] * Binv[6]); + +#ifdef DEBUG + std::cerr << "Determinant of Binv = " << det << std::endl; +#endif + + if (std::fabs(det) < threshold()) { + std::cerr << "Determinant approached 0!" << std::endl; + return false; + } + + // Compute B^{-1} x D + double tmp[3 * Dim]; + matMul2(Binv, D, tmp, 3, 3, Dim); + +#ifdef DEBUG + showMatrix2(tmp, 3, Dim, "tmp = Binv * D"); +#endif + + // Compute C x B^{-1} x D + double tmp2[Dim * Dim]; + matMul2(C, tmp, tmp2, Dim, 3, Dim); + +#ifdef DEBUG + showMatrix2(tmp2, Dim, Dim, "tmp2 = C * tmp"); +#endif + + // Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D + for (unsigned int i = 0; i < Dim * Dim; i++) { + Slater_inv[i] -= tmp2[i]; + } + +#ifdef DEBUG + showMatrix2(Slater_inv, Dim, Dim, "Slater_inv AFTER update"); +#endif + return true; +} + +extern "C" { +bool WB2_f(double **linSlater_inv, unsigned int *Dim, double **linUpdates, + unsigned int **Updates_index) { + WB2(*linSlater_inv, *Dim, *linUpdates, *Updates_index); +} +bool WB3_f(double **linSlater_inv, unsigned int *Dim, double **linUpdates, + unsigned int **Updates_index) { + WB3(*linSlater_inv, *Dim, *linUpdates, *Updates_index); +} +} diff --git a/src/finterface_mod.f90 b/src/finterface_mod.f90 index bcf072c..09de1e8 100644 --- a/src/finterface_mod.f90 +++ b/src/finterface_mod.f90 @@ -43,5 +43,12 @@ module Sherman_Morrison real(c_double), dimension(:,:), allocatable, intent(in) :: Updates real(c_double), dimension(:,:), allocatable, intent(in out) :: Slater_inv end subroutine SM4 - end interface + subroutine SMWB1(Slater_inv, dim, n_updates, Updates, Updates_index) bind(C, name="SMWB1_f") + use, intrinsic :: iso_c_binding, only : c_int, c_double + integer(c_int), intent(in) :: dim, n_updates + integer(c_int), dimension(:), allocatable, intent(in) :: Updates_index + real(c_double), dimension(:,:), allocatable, intent(in) :: Updates + real(c_double), dimension(:,:), allocatable, intent(in out) :: Slater_inv + end subroutine SMWB1 + end interface end module Sherman_Morrison diff --git a/tests/QMCChem_dataset_test.f90 b/tests/QMCChem_dataset_test.f90 index b8fac8f..3aae867 100644 --- a/tests/QMCChem_dataset_test.f90 +++ b/tests/QMCChem_dataset_test.f90 @@ -33,15 +33,15 @@ program QMCChem_dataset_test close(2000) close(3000) - !! Write Updates to file to check - open(unit = 2000, file = "Updates.dat") - do i=1,dim - do j=1,n_updates - write(2000,"(E23.15, 1X)", advance="no") Updates(i,j) - end do - write(2000,*) - end do - close(2000) + ! !! Write Updates to file to check + ! open(unit = 2000, file = "Updates.dat") + ! do i=1,dim + ! do j=1,n_updates + ! write(2000,"(E23.15, 1X)", advance="no") Updates(i,j) + ! end do + ! write(2000,*) + ! end do + ! close(2000) !! Update S & transform replacement updates 'Updates' !! into additive updates 'U' to compute the inverse @@ -53,6 +53,16 @@ program QMCChem_dataset_test end do end do + !! Write Updates to file to check + open(unit = 2000, file = "Updates.dat") + do i=1,dim + do j=1,n_updates + write(2000,"(E23.15, 1X)", advance="no") U(i,j) + end do + write(2000,*) + end do + close(2000) + !! Update S_inv !! S_inv needs to be transposed first before it !! goes to MaponiA3 diff --git a/tests/cMaponiA3_test_3x3_3.cpp b/tests/cMaponiA3_test_3x3_3.cpp index bcd363a..7f6b2af 100644 --- a/tests/cMaponiA3_test_3x3_3.cpp +++ b/tests/cMaponiA3_test_3x3_3.cpp @@ -1,5 +1,5 @@ // main.cpp -#include "SM_Helpers.hpp" +#include "Helpers.hpp" #include "SM_Maponi.hpp" int main() { diff --git a/tests/test_h5.cpp b/tests/test_h5.cpp index fb0ceda..f750f10 100644 --- a/tests/test_h5.cpp +++ b/tests/test_h5.cpp @@ -1,9 +1,10 @@ #include "hdf5/serial/H5Cpp.h" #include "hdf5/serial/hdf5.h" -#include "SM_Helpers.hpp" +#include "Helpers.hpp" #include "SM_Maponi.hpp" #include "SM_Standard.hpp" +#include "SMWB.hpp" using namespace H5; // #define DEBUG @@ -81,6 +82,8 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) { SM3(slater_inverse, dim, nupdates, u, col_update_index); } else if (version == "sm4") { SM4(slater_inverse, dim, nupdates, u, col_update_index); + } else if (version == "smwb1") { + SMWB1(slater_inverse, dim, nupdates, u, col_update_index); #ifdef MKL } else if (version == "lapack") { memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double)); diff --git a/tests/vfc_test_h5.cpp b/tests/vfc_test_h5.cpp index 920e069..d7f3c8b 100644 --- a/tests/vfc_test_h5.cpp +++ b/tests/vfc_test_h5.cpp @@ -10,7 +10,7 @@ #include #include -#include "SM_Helpers.hpp" +#include "Helpers.hpp" #include "SM_Maponi.hpp" #include "SM_Standard.hpp" #include "vfc_probe.h" diff --git a/todo.txt b/todo.txt deleted file mode 100644 index 5a579c0..0000000 --- a/todo.txt +++ /dev/null @@ -1,10 +0,0 @@ --=={TODO}==- - -* Looking at the eigenvalues of the intermediate matrix after a problematic update is -* plotting the ratio of the moduli of the maximum and the minimum eigenvalues of - -* Use valgrind to find the problem with MaponiA3S -* Contact Claudia if she would be interested in sharing datasets from Champ to test with SM - -* Make a representative selection of update cycles for presentatoin puroses - diff --git a/tools/restyle.sh b/tools/restyle.sh index ba07f81..b16069b 100755 --- a/tools/restyle.sh +++ b/tools/restyle.sh @@ -9,7 +9,7 @@ then exit 1 fi -for ext in c cpp h hpp +for ext in c cc cpp h hpp do find $SMROOT -type f -iname "*.${ext}" -exec echo "$FORMATER $STYLE" {} \; -done \ No newline at end of file +done