Merge pull request #40 from fmgjcoppens/woodbury

Woodbury  2x2 and 3x3 kernels + 
Unified Sherman-Morrison-Woodbury kernel (SMWB1).
This commit is contained in:
François Coppens 2021-06-15 12:02:34 +02:00 committed by GitHub
commit 8dac636c94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 378 additions and 30 deletions

View File

@ -42,7 +42,9 @@ FFLAGS = $(CXXFLAGS)
INCLUDE = -I $(INC_DIR)/ INCLUDE = -I $(INC_DIR)/
DEPS_CXX = $(OBJ_DIR)/SM_Maponi.o \ DEPS_CXX = $(OBJ_DIR)/SM_Maponi.o \
$(OBJ_DIR)/SM_Standard.o \ $(OBJ_DIR)/SM_Standard.o \
$(OBJ_DIR)/SM_Helpers.o $(OBJ_DIR)/Woodbury.o \
$(OBJ_DIR)/SMWB.o \
$(OBJ_DIR)/Helpers.o
DEPS_F = $(DEPS_CXX) \ DEPS_F = $(DEPS_CXX) \
$(OBJ_DIR)/finterface_mod.o \ $(OBJ_DIR)/finterface_mod.o \
$(OBJ_DIR)/helpers_mod.o $(OBJ_DIR)/helpers_mod.o

View File

@ -1,4 +1,4 @@
// SM_Helpers.hpp // Helpers.hpp
// Some usefull helper functions to support the Maponi algorithm. // Some usefull helper functions to support the Maponi algorithm.
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
@ -56,6 +56,25 @@ void showMatrix(T *matrix, unsigned int M, std::string name) {
std::cout << std::endl; std::cout << std::endl;
} }
template <typename T>
void showMatrix2(T *matrix, unsigned int M, unsigned int N, std::string name) {
std::cout.precision(17);
std::cout << name << " = [" << std::endl;
for (unsigned int i = 0; i < M; i++) {
std::cout << "[";
for (unsigned int j = 0; j < N; j++) {
if (matrix[i * N + j] >= 0) {
std::cout << " " << matrix[i * N + j] << ",";
} else {
std::cout << " " << matrix[i * N + j] << ",";
}
}
std::cout << " ]," << std::endl;
}
std::cout << "]" << std::endl;
std::cout << std::endl;
}
template <typename T> T *transpose(T *A, unsigned int M) { template <typename T> T *transpose(T *A, unsigned int M) {
T *B = new T[M * M]; T *B = new T[M * M];
for (unsigned int i = 0; i < M; i++) { for (unsigned int i = 0; i < M; i++) {
@ -77,6 +96,18 @@ template <typename T> void matMul(T *A, T *B, T *C, unsigned int M) {
} }
} }
template <typename T1, typename T2, typename T3>
void matMul2(T1 *A, T2 *B, T3 *C, unsigned int M, unsigned int N, unsigned int P) {
for(unsigned int i = 0; i < M; i++) {
for(unsigned int j = 0; j < P; j++) {
C[i * P + j] = 0;
for(unsigned int k = 0; k < N; k++) {
C[i * P + j] += A[i * N + k] * B[k * P + j];
}
}
}
}
template <typename T1, typename T2> template <typename T1, typename T2>
T1 *outProd(T1 *vec1, T2 *vec2, unsigned int M) { T1 *outProd(T1 *vec1, T2 *vec2, unsigned int M) {
T1 *C = new T1[M * M]; T1 *C = new T1[M * M];

4
include/SMWB.hpp Normal file
View File

@ -0,0 +1,4 @@
// Sherman-Morrison-Woodbury kernel 1
// WB2, WB3, SM2 mixing scheme 1
void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates,
double *Updates, unsigned int *Updates_index);

7
include/Woodbury.hpp Normal file
View File

@ -0,0 +1,7 @@
// Woodbury 2x2 kernel
bool WB2(double *Slater_inv, unsigned int Dim,
double *Updates, unsigned int *Updates_index);
// Woodbury 3x3 kernel
bool WB3(double *Slater_inv, unsigned int Dim,
double *Updates, unsigned int *Updates_index);

View File

@ -3,6 +3,10 @@ unset THRESHOLD
unset MKL unset MKL
ENV=$1 ENV=$1
echo
echo "Sherman-Morrison-Woodbury parameters"
echo "------------------------------------"
## Set Sherman-Morrison root dir ## Set Sherman-Morrison root dir
PWD=$(pwd) PWD=$(pwd)
SRCDIR=$(dirname $BASH_SOURCE) SRCDIR=$(dirname $BASH_SOURCE)

View File

@ -1,4 +1,4 @@
#include "SM_Helpers.hpp" #include "Helpers.hpp"
// Set common break-down threshold // Set common break-down threshold
double threshold() { double threshold() {

71
src/SMWB.cpp Normal file
View File

@ -0,0 +1,71 @@
#include "SMWB.hpp"
#include "SM_Standard.hpp"
#include "Woodbury.hpp"
#include "Helpers.hpp"
// Sherman-Morrison-Woodbury kernel 1
// WB2, WB3, SM2 mixing scheme 1
void SMWB1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, double *Updates, unsigned int *Updates_index) {
std::cerr << "Called Sherman-Morrison-Woodbury kernel 1 with " << N_updates << " updates" << std::endl;
unsigned int n_of_3blocks = N_updates / 3;
unsigned int remainder = N_updates % 3;
unsigned int length_3block = 3 * Dim;
unsigned int length_2block = 2 * Dim;
unsigned int length_1block = 1 * Dim;
// std::cerr << "Number of blocks: " << n_of_3blocks << ". Remainder: " << remainder << "." << std::endl;
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with Woodbury 3x3 kernel
if (n_of_3blocks > 0) {
for (unsigned int i = 0; i < n_of_3blocks; i++) {
double Updates_3block[length_3block];
unsigned int Updates_index_3block[3];
Updates_index_3block[0] = Updates_index[3 * i + 0];
Updates_index_3block[1] = Updates_index[3 * i + 1];
Updates_index_3block[2] = Updates_index[3 * i + 2];
for (unsigned int j = 0; j < length_3block; j++) {
Updates_3block[j] = Updates[i * length_3block + j];
}
bool ok;
ok = WB3(Slater_inv, Dim, Updates_3block, Updates_index_3block);
if (!ok) { // Send the entire block to SM2
std::cerr << "Woodbury 3x3 kernel failed! Sending block to SM2" << std::endl;
SM2(Slater_inv, Dim, 3, Updates_3block, Updates_index_3block);
}
}
}
if (remainder == 2) { // Apply last remaining block of 2 updates with Woodbury 2x2 kernel
double Updates_2block[length_2block];
unsigned int Updates_index_2block[2];
Updates_index_2block[0] = Updates_index[3 * n_of_3blocks + 0];
Updates_index_2block[1] = Updates_index[3 * n_of_3blocks + 1];
for (unsigned int i = 0; i < length_2block; i++) {
Updates_2block[i] = Updates[n_of_3blocks * length_3block + i];
}
bool ok;
ok = WB2(Slater_inv, Dim, Updates_2block, Updates_index_2block);
if (!ok) { // Send the entire block to SM2
std::cerr << "Woodbury 2x2 kernel failed! Sending block to SM2" << std::endl;
SM2(Slater_inv, Dim, 2, Updates_2block, Updates_index_2block);
}
} else if (remainder == 1) { // Apply last remaining update with SM2
double Updates_1block[length_1block];
unsigned int Updates_index_1block[1];
Updates_index_1block[0] = Updates_index[3 * n_of_3blocks + 0];
for (unsigned int i = 0; i < length_1block; i++) {
Updates_1block[i] = Updates[n_of_3blocks * length_3block + i];
}
SM2(Slater_inv, Dim, 1, Updates_1block, Updates_index_1block);
} else { // remainder == 0
// Nothing left to do.
}
}
extern "C" {
void SMWB1_f(double **linSlater_inv, unsigned int *Dim, unsigned int *N_updates,
double **linUpdates, unsigned int **Updates_index) {
SMWB1(*linSlater_inv, *Dim, *N_updates, *linUpdates, *Updates_index);
}
}

View File

@ -2,7 +2,7 @@
// Algorithm 3 from P. Maponi, // Algorithm 3 from P. Maponi,
// p. 283, doi:10.1016/j.laa.2006.07.007 // p. 283, doi:10.1016/j.laa.2006.07.007
#include "SM_Maponi.hpp" #include "SM_Maponi.hpp"
#include "SM_Helpers.hpp" #include "Helpers.hpp"
// #define DEBUG // #define DEBUG

View File

@ -1,7 +1,7 @@
// SM-Standard.cpp // SM-Standard.cpp
// Standard Sherman Morrison with multiple updates // Standard Sherman Morrison with multiple updates
#include "SM_Standard.hpp" #include "SM_Standard.hpp"
#include "SM_Helpers.hpp" #include "Helpers.hpp"
// Naïve Sherman Morrison // Naïve Sherman Morrison
void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates, void SM1(double *Slater_inv, unsigned int Dim, unsigned int N_updates,

219
src/Woodbury.cpp Normal file
View File

@ -0,0 +1,219 @@
/* Woodbury.cpp
Woodbury 2x2 and 3x3 kernels
Woodbury matrix identity:
(S + U V)^{-1} = S^{-1} - C B^{-1} D
All matrices are stored in row-major order
*/
#include "Woodbury.hpp"
#include "Helpers.hpp"
// Woodbury 2x2 kernel
bool WB2(double *Slater_inv, unsigned int Dim, double *Updates,
unsigned int *Updates_index) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
D := V * S^{-1}, 2 x dim
*/
std::cerr << "Called Woodbury 2x2 kernel" << std::endl;
// Construct V from Updates_index
unsigned int V[2 * Dim]; // 2 x Dim matrix stored in row-major order
std::memset(V, 0, 2 * Dim * sizeof(unsigned int));
V[Updates_index[0] - 1] = 1;
V[Dim + Updates_index[1] - 1] = 1;
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[2 * Dim];
for(unsigned int i = 0; i < Dim; i++) {
for(unsigned int j = 0; j < 2; j++) {
C[i * 2 + j] = 0;
for(unsigned int k = 0; k < Dim; k++) {
C[i * 2 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k];
}
}
}
// Compute D = V * S^{-1}
double D[2 * Dim];
matMul2(V, Slater_inv, D, 2, Dim, Dim);
// Compute B = 1 + V * C
double B[4];
matMul2(V, C, B, 2, Dim, 2);
B[0] += 1;
B[3] += 1;
// Compute B^{-1} with explicit formula for 2x2 inversion
double idet = 1.0 / (B[0] * B[3] - B[1] * B[2]);
double Binv[4];
Binv[0] = idet * B[3];
Binv[1] = -1.0 * idet * B[1];
Binv[2] = -1.0 * idet * B[2];
Binv[3] = idet * B[0];
// Check if determinant of inverted matrix is not zero
double det = Binv[0] * Binv[3] - Binv[1] * Binv[2];
if (std::fabs(det) < threshold()) {
std::cerr << "Determinant approaching 0!" << std::endl;
return false;
}
// Compute B^{-1} x D
double tmp[2 * Dim];
matMul2(Binv, D, tmp, 2, 2, Dim);
// Compute C x B^{-1} x D
double tmp2[Dim * Dim];
matMul2(C, tmp, tmp2, Dim, 2, Dim);
// Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D
for (unsigned int i = 0; i < Dim * Dim; i++) {
Slater_inv[i] -= tmp2[i];
}
return true;
}
// Woodbury 3x3 kernel
bool WB3(double *Slater_inv, unsigned int Dim, double *Updates,
unsigned int *Updates_index) {
/*
C := S^{-1} * U, dim x 3
B := 1 + V * C, 3 x 3
D := V * S^{-1}, 3 x dim
*/
std::cerr << "Called Woodbury 3x3 kernel" << std::endl;
#ifdef DEBUG
showMatrix2(Slater_inv, Dim, Dim, "Slater_inv BEFORE update");
showMatrix2(Updates, 3, Dim, "Updates");
showMatrix2(Updates_index, 1, 3, "Updates_index");
#endif
// Construct V from Updates_index
unsigned int V[3 * Dim]; // 3 x Dim matrix stored in row-major order
std::memset(V, 0, 3 * Dim * sizeof(unsigned int));
V[Updates_index[0] - 1] = 1;
V[Dim + Updates_index[1] - 1] = 1;
V[2 * Dim + Updates_index[2] - 1] = 1;
#ifdef DEBUG
showMatrix2(V, 3, Dim, "V");
#endif
// Compute C = S_inv * U !! NON-STANDARD MATRIX MULTIPLICATION BECAUSE
// OF LAYOUT OF 'Updates' !!
double C[3 * Dim];
for(unsigned int i = 0; i < Dim; i++) {
for(unsigned int j = 0; j < 3; j++) {
C[i * 3 + j] = 0;
for(unsigned int k = 0; k < Dim; k++) {
C[i * 3 + j] += Slater_inv[i * Dim + k] * Updates[Dim * j + k];
}
}
}
#ifdef DEBUG
showMatrix2(C, Dim, 3, "C = S_inv * U");
#endif
// Compute D = V * S^{-1}
double D[3 * Dim];
matMul2(V, Slater_inv, D, 3, Dim, Dim);
#ifdef DEBUG
showMatrix2(D, 3, Dim, "D = V * S_inv");
#endif
// Compute B = 1 + V * C
double B[9];
matMul2(V, C, B, 3, Dim, 3);
B[0] += 1;
B[4] += 1;
B[8] += 1;
#ifdef DEBUG
showMatrix2(B, 3, 3, "B = 1 + V * C");
#endif
// Compute B^{-1} with explicit formula for 3x3 inversion
double Binv[9], det, idet;
det = B[0] * (B[4] * B[8] - B[5] * B[7]) -
B[1] * (B[3] * B[8] - B[5] * B[6]) +
B[2] * (B[3] * B[7] - B[4] * B[6]);
idet = 1.0 / det;
#ifdef DEBUG
std::cerr << "Determinant of B = " << det << std::endl;
#endif
Binv[0] = ( B[4] * B[8] - B[7] * B[5] ) * idet;
Binv[1] = - ( B[1] * B[8] - B[7] * B[2] ) * idet;
Binv[2] = ( B[1] * B[5] - B[4] * B[2] ) * idet;
Binv[3] = - ( B[3] * B[8] - B[6] * B[5] ) * idet;
Binv[4] = ( B[0] * B[8] - B[6] * B[2] ) * idet;
Binv[5] = - ( B[0] * B[5] - B[3] * B[2] ) * idet;
Binv[6] = ( B[3] * B[7] - B[6] * B[4] ) * idet;
Binv[7] = - ( B[0] * B[7] - B[6] * B[1] ) * idet;
Binv[8] = ( B[0] * B[4] - B[3] * B[1] ) * idet;
#ifdef DEBUG
showMatrix2(Binv, 3, 3, "Binv");
#endif
// Check if determinant of inverted matrix is not zero
// double det;
det = Binv[0] * (Binv[4] * Binv[8] - Binv[5] * Binv[7]) -
Binv[1] * (Binv[3] * Binv[8] - Binv[5] * Binv[6]) +
Binv[2] * (Binv[3] * Binv[7] - Binv[4] * Binv[6]);
#ifdef DEBUG
std::cerr << "Determinant of Binv = " << det << std::endl;
#endif
if (std::fabs(det) < threshold()) {
std::cerr << "Determinant approached 0!" << std::endl;
return false;
}
// Compute B^{-1} x D
double tmp[3 * Dim];
matMul2(Binv, D, tmp, 3, 3, Dim);
#ifdef DEBUG
showMatrix2(tmp, 3, Dim, "tmp = Binv * D");
#endif
// Compute C x B^{-1} x D
double tmp2[Dim * Dim];
matMul2(C, tmp, tmp2, Dim, 3, Dim);
#ifdef DEBUG
showMatrix2(tmp2, Dim, Dim, "tmp2 = C * tmp");
#endif
// Compute (S + U V)^{-1} = S^{-1} - C B^{-1} D
for (unsigned int i = 0; i < Dim * Dim; i++) {
Slater_inv[i] -= tmp2[i];
}
#ifdef DEBUG
showMatrix2(Slater_inv, Dim, Dim, "Slater_inv AFTER update");
#endif
return true;
}
extern "C" {
bool WB2_f(double **linSlater_inv, unsigned int *Dim, double **linUpdates,
unsigned int **Updates_index) {
WB2(*linSlater_inv, *Dim, *linUpdates, *Updates_index);
}
bool WB3_f(double **linSlater_inv, unsigned int *Dim, double **linUpdates,
unsigned int **Updates_index) {
WB3(*linSlater_inv, *Dim, *linUpdates, *Updates_index);
}
}

View File

@ -43,5 +43,12 @@ module Sherman_Morrison
real(c_double), dimension(:,:), allocatable, intent(in) :: Updates real(c_double), dimension(:,:), allocatable, intent(in) :: Updates
real(c_double), dimension(:,:), allocatable, intent(in out) :: Slater_inv real(c_double), dimension(:,:), allocatable, intent(in out) :: Slater_inv
end subroutine SM4 end subroutine SM4
end interface subroutine SMWB1(Slater_inv, dim, n_updates, Updates, Updates_index) bind(C, name="SMWB1_f")
use, intrinsic :: iso_c_binding, only : c_int, c_double
integer(c_int), intent(in) :: dim, n_updates
integer(c_int), dimension(:), allocatable, intent(in) :: Updates_index
real(c_double), dimension(:,:), allocatable, intent(in) :: Updates
real(c_double), dimension(:,:), allocatable, intent(in out) :: Slater_inv
end subroutine SMWB1
end interface
end module Sherman_Morrison end module Sherman_Morrison

View File

@ -33,15 +33,15 @@ program QMCChem_dataset_test
close(2000) close(2000)
close(3000) close(3000)
!! Write Updates to file to check ! !! Write Updates to file to check
open(unit = 2000, file = "Updates.dat") ! open(unit = 2000, file = "Updates.dat")
do i=1,dim ! do i=1,dim
do j=1,n_updates ! do j=1,n_updates
write(2000,"(E23.15, 1X)", advance="no") Updates(i,j) ! write(2000,"(E23.15, 1X)", advance="no") Updates(i,j)
end do ! end do
write(2000,*) ! write(2000,*)
end do ! end do
close(2000) ! close(2000)
!! Update S & transform replacement updates 'Updates' !! Update S & transform replacement updates 'Updates'
!! into additive updates 'U' to compute the inverse !! into additive updates 'U' to compute the inverse
@ -53,6 +53,16 @@ program QMCChem_dataset_test
end do end do
end do end do
!! Write Updates to file to check
open(unit = 2000, file = "Updates.dat")
do i=1,dim
do j=1,n_updates
write(2000,"(E23.15, 1X)", advance="no") U(i,j)
end do
write(2000,*)
end do
close(2000)
!! Update S_inv !! Update S_inv
!! S_inv needs to be transposed first before it !! S_inv needs to be transposed first before it
!! goes to MaponiA3 !! goes to MaponiA3

View File

@ -1,5 +1,5 @@
// main.cpp // main.cpp
#include "SM_Helpers.hpp" #include "Helpers.hpp"
#include "SM_Maponi.hpp" #include "SM_Maponi.hpp"
int main() { int main() {

View File

@ -1,9 +1,10 @@
#include "hdf5/serial/H5Cpp.h" #include "hdf5/serial/H5Cpp.h"
#include "hdf5/serial/hdf5.h" #include "hdf5/serial/hdf5.h"
#include "SM_Helpers.hpp" #include "Helpers.hpp"
#include "SM_Maponi.hpp" #include "SM_Maponi.hpp"
#include "SM_Standard.hpp" #include "SM_Standard.hpp"
#include "SMWB.hpp"
using namespace H5; using namespace H5;
// #define DEBUG // #define DEBUG
@ -81,6 +82,8 @@ int test_cycle(H5File file, int cycle, std::string version, double tolerance) {
SM3(slater_inverse, dim, nupdates, u, col_update_index); SM3(slater_inverse, dim, nupdates, u, col_update_index);
} else if (version == "sm4") { } else if (version == "sm4") {
SM4(slater_inverse, dim, nupdates, u, col_update_index); SM4(slater_inverse, dim, nupdates, u, col_update_index);
} else if (version == "smwb1") {
SMWB1(slater_inverse, dim, nupdates, u, col_update_index);
#ifdef MKL #ifdef MKL
} else if (version == "lapack") { } else if (version == "lapack") {
memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double)); memcpy(slater_inverse, slater_matrix, dim * dim * sizeof(double));

View File

@ -10,7 +10,7 @@
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include "SM_Helpers.hpp" #include "Helpers.hpp"
#include "SM_Maponi.hpp" #include "SM_Maponi.hpp"
#include "SM_Standard.hpp" #include "SM_Standard.hpp"
#include "vfc_probe.h" #include "vfc_probe.h"

View File

@ -1,10 +0,0 @@
-=={TODO}==-
* Looking at the eigenvalues of the intermediate matrix after a problematic update is
* plotting the ratio of the moduli of the maximum and the minimum eigenvalues of
* Use valgrind to find the problem with MaponiA3S
* Contact Claudia if she would be interested in sharing datasets from Champ to test with SM
* Make a representative selection of update cycles for presentatoin puroses

View File

@ -9,7 +9,7 @@ then
exit 1 exit 1
fi fi
for ext in c cpp h hpp for ext in c cc cpp h hpp
do do
find $SMROOT -type f -iname "*.${ext}" -exec echo "$FORMATER $STYLE" {} \; find $SMROOT -type f -iname "*.${ext}" -exec echo "$FORMATER $STYLE" {} \;
done done