diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index e7e78ca..7d2b32d 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -3,42 +3,42 @@ #+INCLUDE: ../tools/lib.org #+STARTUP: content - Low- and high-level functions that use the Sherman-Morrison and - Woodbury matrix inversion formulas to update the inverse of a - non-singular matrix +Low- and high-level functions that use the Sherman-Morrison and +Woodbury matrix inversion formulas to update the inverse of a +non-singular matrix * Headers - #+begin_src elisp :noexport :results none :exports none - (org-babel-lob-ingest "../tools/lib.org") - #+end_src +#+begin_src elisp :noexport :results none :exports none +(org-babel-lob-ingest "../tools/lib.org") +#+end_src - #+begin_src c :comments link :tangle (eval c_test) :noweb yes - #include "qmckl.h" - #include "assert.h" - #ifdef HAVE_CONFIG_H - #include "config.h" - #endif - #include +#+begin_src c :comments link :tangle (eval c_test) :noweb yes +#include "qmckl.h" +#include "assert.h" +#ifdef HAVE_CONFIG_H + #include "config.h" +#endif +#include - int main() { - qmckl_context context; - context = qmckl_context_create(); - qmckl_exit_code rc; - #+end_src +int main() { + qmckl_context context; + context = qmckl_context_create(); + qmckl_exit_code rc; +#+end_src - #+NAME:kernel_generator_range - #+begin_src python :noweb yes :exports none - range(2, 22) - #+end_src +#+NAME:kernel_generator_range +#+begin_src python :noweb yes :exports none +range(2, 22) +#+end_src + * Naïve Sherman-Morrison -** ~qmckl_sherman_morrison_naive~ :PROPERTIES: :Name: qmckl_sherman_morrison_naive :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: - + This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero. @@ -67,18 +67,87 @@ If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting from applying the updates to the original matrix. - #+NAME: qmckl_sherman_morrison_naive_args - | qmckl_context | context | in | Global state | - | uint64_t | LDS | in | Leading dimension of Slater_inv | - | uint64_t | Dim | in | Dimension of Slater_inv | - | uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv | - | double | Updates[N_updates*Dim] | in | Array containing the updates | - | uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates | - | double | breakdown | in | Break-down parameter on which to fail or not | - | double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix | - | double* | determinant | inout | Determinant of the Slater-matrix | +#+NAME: qmckl_sherman_morrison_naive_args +| Variable | Type | In/Out | Description | +|-----------------+-------------------------+--------+------------------------------------------------------| +| ~context~ | ~qmckl_context~ | in | Global state | +| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv | +| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv | +| ~N_updates~ | ~uint64_t~ | in | Number of rank-1 updates to be applied to Slater_inv | +| ~Updates~ | ~double[N_updates*LDS]~ | in | Array containing the updates | +| ~Updates_index~ | ~uint64_t[N_updates]~ | in | Array containing the rank-1 updates | +| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not | +| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse of a Slater-matrix | +| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix | -*** Requirements +** Pedagogical kernel source (in Fortran) + +#+begin_src f90 :tangle (eval f) +integer function qmckl_sherman_morrison_naive_doc_f(context, & + LDS, Dim, & + N_updates, & + Updates, & + Updates_index, & + breakdown, & + Slater_inv, & + determinant) result(info) + + use qmckl + implicit none + integer*8 , intent(in) :: context + integer*8 , intent(in) :: LDS, Dim + integer*8 , intent(in) :: N_updates + integer*8 , intent(in) :: Updates_index(N_updates) + real*8 , intent(in) :: Updates(N_updates*LDS) + real*8 , intent(in) :: breakdown + real*8 , intent(inout) :: Slater_inv(Dim*LDS) + real*8 , intent(inout) :: determinant + + info = 0 + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + write(*,*) "Function 'qmckl_sherman_morrison_naive_doc_f' does nothing for now..." + + info = QMCKL_SUCCESS + +end function qmckl_sherman_morrison_naive_doc_f +#+end_src + +*** C interface to the pedagogical kernel + +#+CALL: generate_c_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc") + +#+RESULTS: +#+begin_src f90 :tangle (eval f) :comments org :exports none +integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + integer(c_int32_t), external :: qmckl_sherman_morrison_naive_doc_f + info = qmckl_sherman_morrison_naive_doc_f & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) + +end function qmckl_sherman_morrison_naive_doc +#+end_src + +** Requirements * ~context~ is not ~QMCKL_NULL_CONTEXT~ * ~LDS >= 2~ @@ -88,263 +157,261 @@ * ~Updates_index~ is allocated with $N_updates$ elements * ~breakdown~ is a small number such that $0 < breakdown << 1$ * ~Slater_inv~ is allocated with $Dim \times Dim$ elements + * ~determinant > 0~ -*** Fortran source +** C headers (exposed in qmckl.h) - #+begin_src f90 :tangle (eval f) - integer function qmckl_sherman_morrison_naive_doc_f & - (context, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) +#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - use, intrinsic :: iso_c_binding - implicit none +#+RESULTS: +#+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_sherman_morrison_naive ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant ); +#+end_src - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim) - real (c_double ) , intent(inout) :: determinant +#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_hpc") - write(*,*) "Function 'qmckl_sherman_morrison_naive_doc_f' does nothing for now..." +#+RESULTS: +#+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_sherman_morrison_naive_hpc ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant ); +#+end_src - end function qmckl_sherman_morrison_naive_doc_f - #+end_src +#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc") +#+RESULTS: +#+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_sherman_morrison_naive_doc ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant ); +#+end_src -*** C header +** C sources - #+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+begin_src c :tangle (eval c) :comments org +#include +#include +#include "qmckl.h" +#include "config.h" - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_sherman_morrison_naive( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant); - #+end_src +// Order important because +// __GNUC__ also set in ICC, ICX and CLANG +// __clang__ also set in ICX +#if defined(__INTEL_COMPILER) + #define IVDEP _Pragma("ivdep") + #define ALIGNED _Pragma("vector aligned") +#elif defined(__INTEL_LLVM_COMPILER) + #define IVDEP _Pragma("ivdep") + #define ALIGNED _Pragma("vector aligned") +#elif defined(__clang__) + #define IVDEP _Pragma("clang loop vectorize(enable)") + #define ALIGNED +#elif defined(__GNUC__) + #define IVDEP _Pragma("GCC ivdep") + #define ALIGNED +#endif +#+end_src - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_sherman_morrison_naive_doc( - const qmckl_context context, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant); - #+end_src +#+begin_src c :tangle (eval c) :comments org +qmckl_exit_code qmckl_sherman_morrison_naive_hpc( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* __restrict Updates, + const uint64_t* __restrict Updates_index, + const double breakdown, + double* __restrict Slater_inv, + double* __restrict determinant) { -*** C source + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( context, + QMCKL_NULL_CONTEXT, + "qmckl_sherman_morrison_naive_hpc", + NULL); + } - #+begin_src c :tangle (eval c) :comments org - #include - #include - #include "qmckl.h" - #include "config.h" + double __attribute__((aligned(8))) C[Dim]; + double __attribute__((aligned(8))) D[LDS]; - // Order important because - // __GNUC__ also set in ICC, ICX and CLANG - // __clang__ also set in ICX - #if defined(__INTEL_COMPILER) - #define IVDEP _Pragma("ivdep") - #define ALIGNED _Pragma("vector aligned") - #elif defined(__INTEL_LLVM_COMPILER) - #define IVDEP _Pragma("ivdep") - #define ALIGNED _Pragma("vector aligned") - #elif defined(__clang__) - #define IVDEP _Pragma("clang loop vectorize(enable)") - #define ALIGNED - #elif defined(__GNUC__) - #define IVDEP _Pragma("GCC ivdep") - #define ALIGNED - #endif - - qmckl_exit_code qmckl_sherman_morrison_naive_hpc( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith( context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_naive_hpc", - NULL); - } - - double __attribute__((aligned(8))) C[Dim]; - double __attribute__((aligned(8))) D[LDS]; - - uint64_t l = 0; - // For each update - while (l < N_updates) { - // C = S^{-1} x u_l - for (uint64_t i = 0; i < Dim; i++) { - C[i] = 0.0f; - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; - } - } - - // Denominator: v_l^T * C - const int cui = Updates_index[l] - 1; - double den = 1.0f + C[cui]; - - if (fabs(den) < breakdown) - return QMCKL_FAILURE; - - double iden = 1.0f / den; - - // Update det(A) - if (determinant) - *determinant *= den; - - // selecting column: v_l^T * S_inv - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - D[j] = Slater_inv[cui * LDS + j]; - } - - // A^{-1} = A^{-1} - C x D / den - for (uint64_t i = 0; i < Dim; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - const double update = C[i] * D[j] * iden; - Slater_inv[i * LDS + j] -= update; - } - } - l += 1; - } - return QMCKL_SUCCESS; + uint64_t l = 0; + // For each update + while (l < N_updates) { + // C = S^{-1} x u_l + for (uint64_t i = 0; i < Dim; i++) { + C[i] = 0.0f; + IVDEP + ALIGNED + for (uint64_t j = 0; j < LDS; j++) { + C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; } - #+end_src + } - #+NAME:naive_template_code - #+begin_src c - static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}( - const qmckl_context context, - const uint64_t N_updates, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict determinant) { + // Denominator: v_l^T * C + const int cui = Updates_index[l] - 1; + double den = 1.0f + C[cui]; - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_naive_{Dim}", - NULL); - } + if (fabs(den) < breakdown) + return QMCKL_FAILURE; - #define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH) + double iden = 1.0f / den; - double __attribute__((aligned(8))) C[{Dim}]; - double __attribute__((aligned(8))) D[D{Dim}_P]; + // Update det(A) + if (determinant) + *determinant *= den; - uint64_t l = 0; - // For each update - while (l < N_updates) { - // C = A^{-1} x U_l - for (uint64_t i = 0; i < {Dim}; i++) { - C[i] = 0; - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j]; - } - } + // selecting column: v_l^T * S_inv + IVDEP + ALIGNED + for (uint64_t j = 0; j < LDS; j++) { + D[j] = Slater_inv[cui * LDS + j]; + } - // Denominator - const int cui = Updates_index[l] - 1; - double den = 1.0f + C[cui]; - - if (fabs(den) < breakdown) { - return QMCKL_FAILURE; - } - double iden = 1.0f / den; - - // Update det(A) - if (determinant) - *determinant *= den; - - // selecting column: D = v_l^T * S_inv - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - D[j] = Slater_inv[cui * D{Dim}_P + j]; - } - - // A^{-1} = A^{-1} - C x D / den - for (uint64_t i = 0; i < {Dim}; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - double update = C[i] * D[j] * iden; - Slater_inv[i * D{Dim}_P + j] -= update; - } - } - - l += 1; - } - - return QMCKL_SUCCESS; + // A^{-1} = A^{-1} - C x D / den + for (uint64_t i = 0; i < Dim; i++) { + IVDEP + ALIGNED + for (uint64_t j = 0; j < LDS; j++) { + const double update = C[i] * D[j] * iden; + Slater_inv[i * LDS + j] -= update; } - #+end_src + } + l += 1; + } + return QMCKL_SUCCESS; +} +#+end_src - #+NAME:naive_kernel_generator - #+begin_src python :noweb yes :exports none +#+NAME:naive_template_code +#+begin_src c + static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}( + const qmckl_context context, + const uint64_t N_updates, + const double* __restrict Updates, + const uint64_t* __restrict Updates_index, + const double breakdown, + double* __restrict Slater_inv, + double* __restrict determinant) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith(context, + QMCKL_NULL_CONTEXT, + "qmckl_sherman_morrison_naive_{Dim}", + NULL); + } + + #define D{Dim}_P ((1+({Dim}-1)/SIMD_LENGTH)*SIMD_LENGTH) + + double __attribute__((aligned(8))) C[{Dim}]; + double __attribute__((aligned(8))) D[D{Dim}_P]; + + uint64_t l = 0; + // For each update + while (l < N_updates) { + // C = A^{-1} x U_l + for (uint64_t i = 0; i < {Dim}; i++) { + C[i] = 0; + IVDEP + ALIGNED + for (uint64_t j = 0; j < D{Dim}_P; j++) { + C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j]; + } + } + + // Denominator + const int cui = Updates_index[l] - 1; + double den = 1.0f + C[cui]; + + if (fabs(den) < breakdown) { + return QMCKL_FAILURE; + } + double iden = 1.0f / den; + + // Update det(A) + if (determinant) + *determinant *= den; + + // selecting column: D = v_l^T * S_inv + IVDEP + ALIGNED + for (uint64_t j = 0; j < D{Dim}_P; j++) { + D[j] = Slater_inv[cui * D{Dim}_P + j]; + } + + // A^{-1} = A^{-1} - C x D / den + for (uint64_t i = 0; i < {Dim}; i++) { + IVDEP + ALIGNED + for (uint64_t j = 0; j < D{Dim}_P; j++) { + double update = C[i] * D[j] * iden; + Slater_inv[i * D{Dim}_P + j] -= update; + } + } + + l += 1; + } + + return QMCKL_SUCCESS; + } + #+end_src + +#+NAME:naive_kernel_generator +#+begin_src python :noweb yes :exports none text=""" <> """ result = [] for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) + Dim=str(Dim) + result.append(text.replace("{Dim}",Dim) ) return '\n'.join(result) - #+end_src +#+end_src - #+NAME:naive_switch-case_generator - #+begin_src python :noweb yes :exports none +#+NAME:naive_switch-case_generator +#+begin_src python :noweb yes :exports none text=""" case {Dim}: return qmckl_sherman_morrison_naive_{Dim}(context, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + determinant); """ result = [] for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) + Dim=str(Dim) +result.append(text.replace("{Dim}",Dim) ) return '\n'.join(result) - #+end_src +#+end_src - #+begin_src c :tangle (eval c) :comments org :noweb yes +#+begin_src c :tangle (eval c) :comments org :noweb yes <> qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, @@ -364,7 +431,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, NULL); } - // #ifdef HAVE_HPC + #ifdef HAVE_HPC if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases switch (Dim) { <> @@ -382,23 +449,108 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, determinant); } - // #else - // return qmckl_sherman_morrison_naive_doc(context, - // Dim, - // N_updates, - // Updates, - // Updates_index, - // breakdown, - // Slater_inv, - // determinant); - // #endif + #else + return qmckl_sherman_morrison_naive_doc(context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + determinant); + #endif return QMCKL_FAILURE; } - #+end_src +#+end_src -*** Test :noexport: +** Fortran interface + :PROPERTIES: + :Name: qmckl_sherman_morrison_naive + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: +#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_naive_hpc") + +#+RESULTS: +#+begin_src f90 :tangle (eval fh_func) :comments org :exports none +interface + integer(c_int32_t) function qmckl_sherman_morrison_naive_hpc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + end function qmckl_sherman_morrison_naive_hpc +end interface +#+end_src + +#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_naive_doc") + +#+RESULTS: +#+begin_src f90 :tangle (eval fh_func) :comments org :exports none +interface + integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + end function qmckl_sherman_morrison_naive_doc +end interface +#+end_src + +#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_naive") + +#+RESULTS: +#+begin_src f90 :tangle (eval fh_func) :comments org :exports none +interface + integer(c_int32_t) function qmckl_sherman_morrison_naive & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + end function qmckl_sherman_morrison_naive +end interface +#+end_src + +** Test The tests for the kernels are executed on datasets that are extracted from a run of QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants. The tests are run such that the kernels reject the computed inverse whenever the computed @@ -457,1638 +609,6 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, assert(rc == QMCKL_SUCCESS); #+end_src -** Performance - - This function performs best when there is only 1 rank-1 update in the update cycle. It is - not useful to use Sherman-Morrison with update splitting for these cycles since splitting - can never resolve a situation where applying the update causes singular behaviour. - -** C interface -#+begin_src f90 :tangle (eval f) :comments org :exports none - interface - integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & - (context, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim) - real (c_double ) , intent(inout) :: determinant - - integer(c_int32_t), external :: qmckl_sherman_morrison_naive_doc_f - info = qmckl_sherman_morrison_naive_doc_f & - (context, Dim, N_updates, Updates, & - Updates_index, breakdown, Slater_inv, determinant) - - end function qmckl_sherman_morrison_naive_doc - end interface - #+end_src - -** Fortran interface :noexport: - :PROPERTIES: - :Name: qmckl_sherman_morrison_naive - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_sherman_morrison_naive & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_naive - end interface - #+end_src - - #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & - (context, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*Dim) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_naive_doc - end interface - #+end_src - -* Woodbury 2x2 - ** ~qmckl_woodbury_2x2~ - :PROPERTIES: - :Name: qmckl_woodbury_2x2 - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - The Woodbury 2x2 kernel. It is used to apply two rank-1 updates at once. The formula used in - this algorithm is called the Woodbury Matrix Identity - \[ - (S + U V)^{-1} = S^{-1} - C B^{-1} D - \] - where - $S$ is the Slater-matrix - $U$ and $V$ are the matrices containing the updates and the canonical basis matrix - $S^{-1}$ is the inverse of the Slater-matrix - $C:= S^{-1}U$, a Dim $\times 2$ matrix - $B := 1 + VC$, the $2 \times 2$ matrix that is going to be inverted - $D := VS^{-1}$, a $2 \times Dim$ matrix - - If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting - from applying the updates to the original matrix. - - - - #+NAME: qmckl_woodbury_2x2_args - | qmckl_context | context | in | Global state | - | uint64_t | LDS | in | Leading dimension of Slater_inv | - | uint64_t | Dim | in | Dimension of Slater_inv | - | double | Updates[2*Dim] | in | Array containing the updates | - | uint64_t | Updates_index[2] | in | Array containing the rank-1 updates | - | double | breakdown | in | Break-down parameter on which to fail or not | - | double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix | - | double* | determinant | inout | Determinant of Slater-matrix | - -*** Requirements - - - ~context~ is not ~qmckl_null_context~ - - ~LDS >= 2~ - - ~Dim >= 2~ - - ~Updates~ is allocated with $2 \times Dim$ elements - - ~Updates_index~ is allocated with $2$ elements - - ~breakdown~ is a small number such that $0 < breakdown << 1$ - - ~Slater_inv~ is allocated with $Dim \times Dim$ elements - -*** C header - - #+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_woodbury_2x2( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant); - #+end_src - -*** C source - - #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict determinant) { -/* - C := S^{-1} * U, dim x 2 - B := 1 + V * C, 2 x 2 - D := V * S^{-1}, 2 x dim -*/ - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_woodbury_2x2_hpc", - NULL); - } - - const uint64_t row1 = (Updates_index[0] - 1); - const uint64_t row2 = (Updates_index[1] - 1); - - // Compute C = (S^T)^{-1}U : Dim x 2 - double __attribute__((aligned(8))) C[2 * Dim]; - for (uint64_t i = 0; i < Dim; i++) { - C[i * 2] = 0; - C[i * 2 + 1] = 0; - IVDEP - ALIGNED - for (uint64_t k = 0; k < LDS; k++) { - C[i * 2] += Slater_inv[i * LDS + k] * Updates[k]; - C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k]; - } - } - - // Compute B = 1 + VC : 2 x 2 - const double B0 = C[row1 * 2] + 1; - const double B1 = C[row1 * 2 + 1]; - const double B2 = C[row2 * 2]; - const double B3 = C[row2 * 2 + 1] + 1; - - // Check if determinant of inverted matrix is not zero - double det = B0 * B3 - B1 * B2; - if (fabs(det) < breakdown) { - return QMCKL_FAILURE; - } - - // Update det(S) when passed - if (determinant) - *determinant *= det; - - // Compute B^{-1} with explicit formula for 2 x 2 inversion - double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det; - Binv[0] = idet * B3; - Binv[1] = -1.0 * idet * B1; - Binv[2] = -1.0 * idet * B2; - Binv[3] = idet * B0; - - // tmp = B^{-1}D : 2 x LDS - double __attribute__((aligned(8))) tmp[2 * LDS]; - double* r1dim = &(Slater_inv[row1 * LDS]); - double* r2dim = &(Slater_inv[row2 * LDS]); - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j]; - tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j]; - } - - // Compute (S^T)^{-1} - C * tmp : Dim x LDS - for (uint64_t i = 0; i < Dim; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j]; - Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j]; - } - } - - return QMCKL_SUCCESS; -} - #+end_src - - - - - - #+NAME:woodbury_2x2_kernel_template - #+begin_src c -static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}( - const qmckl_context context, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict determinant) { -/* - C := S^{-1} * U, dim x 2 - B := 1 + V * C, 2 x 2 - D := V * S^{-1}, 2 x dim -*/ - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_woodbury_2x2_{Dim}", - NULL); - } - - const uint64_t row1 = (Updates_index[0] - 1); - const uint64_t row2 = (Updates_index[1] - 1); - - // Compute C = (S^T)^{-1}U : {Dim} x 2 - double __attribute__((aligned(8))) C[2 * {Dim}]; - for (uint64_t i = 0; i < {Dim}; i++) { - C[i * 2] = 0; - C[i * 2 + 1] = 0; - IVDEP - ALIGNED - for (uint64_t k = 0; k < D{Dim}_P; k++) { - C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k]; - C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k]; - } - } - - // Compute B = 1 + VC : 2 x 2 - const double B0 = C[row1 * 2] + 1; - const double B1 = C[row1 * 2 + 1]; - const double B2 = C[row2 * 2]; - const double B3 = C[row2 * 2 + 1] + 1; - - // Check if determinant of inverted matrix is not zero - double det = B0 * B3 - B1 * B2; - if (fabs(det) < breakdown) { - return QMCKL_FAILURE; - } - - // Update det(S) when passed - if (determinant) - *determinant *= det; - - // Compute B^{-1} with explicit formula for 2 x 2 inversion - double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det; - Binv[0] = idet * B3; - Binv[1] = -1.0 * idet * B1; - Binv[2] = -1.0 * idet * B2; - Binv[3] = idet * B0; - - // tmp = B^{-1}D : 2 x D{Dim}_P - double __attribute__((aligned(8))) tmp[2 * D{Dim}_P]; - double* r1dim = &(Slater_inv[row1 * D{Dim}_P]); - double* r2dim = &(Slater_inv[row2 * D{Dim}_P]); - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j]; - tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j]; - } - - // Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P - for (uint64_t i = 0; i < {Dim}; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j]; - Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j]; - } - } - - return QMCKL_SUCCESS; -} - #+end_src - - - - #+NAME:woodbury_2x2_kernel_generator - #+begin_src python :noweb yes :exports none -text=""" -<> -""" -result = [] -for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) - -return '\n'.join(result) - #+end_src - - - - #+NAME:woodbury_2x2_switch-case_generator - #+begin_src python :noweb yes :exports none -text=""" -case {Dim}: - return qmckl_woodbury_2x2_{Dim}(context, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); -""" -result = [] -for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) - -return '\n'.join(result) - #+end_src - - - - - #+begin_src c :tangle (eval c) :comments org :noweb yes -<> - -qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_woodbury_2x2", - NULL); - } - - if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases - switch (Dim) { - <> - } - } - else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) - return qmckl_woodbury_2x2_hpc(context, - LDS, - Dim, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); - - } - - return QMCKL_FAILURE; -} - #+end_src - - - - -*** Performance - - This function is most efficient when used in cases where there are only 2 rank-1 updates and - it is sure they will not result in a singular matrix. - - ** Fortran interface :noexport: - :PROPERTIES: - :Name: qmckl_woodbury_2x2 - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_woodbury_2x2 & - (context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - real (c_double ) , intent(in) :: Updates(2*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(2) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_woodbury_2x2 - end interface - #+end_src - -*** Test :noexport: - - #+begin_src c :tangle (eval c_test) -assert(Updates2 != NULL); -assert(Updates_index2 != NULL); -assert(Slater_inv2 != NULL); -det = -1.4432116661319376e-11; -rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det); -assert(fabs(det-2.367058141251457e-10) < 1e-15); -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - res[i * Dim + j] = 0; - for (unsigned int k = 0; k < Dim; k++) { - res[i * Dim + j] += Slater2[i * Dim + k] * Slater_inv2[k * LDS + j]; - } - } -} -rc = QMCKL_SUCCESS; -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) { - rc = QMCKL_FAILURE; - } - if (i != j && fabs(res[i * Dim + j]) > tolerance) { - rc = QMCKL_FAILURE; - } - } -} -assert(rc == QMCKL_SUCCESS); - #+end_src - -* Woodbury 3x3 -** ~qmckl_woodbury_3x3~ - :PROPERTIES: - :Name: qmckl_woodbury_3x3 - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - The 3x3 version of the Woodbury 2x2 kernel. It is used to apply three - rank-1 updates at once. The formula used in this kernel is the same as for Woodbury 2x2, - except for the sizes of the following matrices: - - $C:= S^{-1}U$, a Dim $\times 3$ matrix - $B := 1 + VC$, the $3 \times 3$ matrix that is going to be inverted - $D := VS^{-1}$, a $3 \times Dim$ matrix - - If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting - from applying the updates to the original matrix. - #pragma ivdep - #pragma vector aligned - - #+NAME: qmckl_woodbury_3x3_args - | qmckl_context | context | in | Global state | - | uint64_t | LDS | in | Leading dimension of Slater_inv | - | uint64_t | Dim | in | Dimension of Slater_inv | - | double | Updates[3*Dim] | in | Array containing the updates | - | uint64_t | Updates_index[3] | in | Array containing the rank-1 updates | - | double | breakdown | in | Break-down parameter on which to fail or not | - | double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix | - | double* | determinant | inout | Determinant of Slater-matrix | - -*** Requirements - - - ~context~ is not ~qmckl_null_context~ - - ~LDS >= 2~ - - ~Dim >= 2~ - - ~Updates~ is allocated with $3 \times Dim$ elements - - ~Updates_index~ is allocated with $3$ elements - - ~breakdown~ is a small number such that $0 < breakdown << 1$ - - ~Slater_inv~ is allocated with $Dim \times Dim$ elements - -*** C header - - #+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_woodbury_3x3( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant); - #+end_src - -*** C source - - #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_woodbury_3x3_hpc(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict determinant) { -/* - C := S^{-1} * U, dim x 3 - B := 1 + V * C, 3 x 3 - D := V * S^{-1}, 3 x dim -,*/ - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_woodbury_3x3_hpc", - NULL); - } - - const uint64_t row1 = (Updates_index[0] - 1); - const uint64_t row2 = (Updates_index[1] - 1); - const uint64_t row3 = (Updates_index[2] - 1); - - // Compute C = (S^T)^{-1}U : Dim x 3 - double __attribute__((aligned(8))) C[3 * Dim]; - for (uint64_t i = 0; i < Dim; i++) { - C[i * 3] = 0; - C[i * 3 + 1] = 0; - C[i * 3 + 2] = 0; - IVDEP - ALIGNED - for (uint64_t k = 0; k < LDS; k++) { - C[i * 3] += Slater_inv[i * LDS + k] * Updates[k]; - C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k]; - C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k]; - } - } - - // Compute B = 1 + VC : 3 x 3 - const double B0 = C[row1 * 3] + 1; - const double B1 = C[row1 * 3 + 1]; - const double B2 = C[row1 * 3 + 2]; - const double B3 = C[row2 * 3]; - const double B4 = C[row2 * 3 + 1] + 1; - const double B5 = C[row2 * 3 + 2]; - const double B6 = C[row3 * 3]; - const double B7 = C[row3 * 3 + 1]; - const double B8 = C[row3 * 3 + 2] + 1; - - // Check if determinant of B is not too close to zero - double det; - det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) + - B2 * (B3 * B7 - B4 * B6); - if (fabs(det) < breakdown) { - return QMCKL_FAILURE; - } - - // Update det(Slater) if passed - if (determinant) - *determinant *= det; - - // Compute B^{-1} with explicit formula for 3 x 3 inversion - double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det; - Binv[0] = (B4 * B8 - B7 * B5) * idet; - Binv[1] = -(B1 * B8 - B7 * B2) * idet; - Binv[2] = (B1 * B5 - B4 * B2) * idet; - Binv[3] = -(B3 * B8 - B6 * B5) * idet; - Binv[4] = (B0 * B8 - B6 * B2) * idet; - Binv[5] = -(B0 * B5 - B3 * B2) * idet; - Binv[6] = (B3 * B7 - B6 * B4) * idet; - Binv[7] = -(B0 * B7 - B6 * B1) * idet; - Binv[8] = (B0 * B4 - B3 * B1) * idet; - - // tmp = B^{-1}D : 3 x LDS - double __attribute__((aligned(8))) tmp[3 * LDS]; - double* r1dim = &(Slater_inv[row1 * LDS]); - double* r2dim = &(Slater_inv[row2 * LDS]); - double* r3dim = &(Slater_inv[row3 * LDS]); - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j]; - tmp[LDS + j] = - Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j]; - tmp[2 * LDS + j] = - Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j]; - } - - // Compute (S^T)^{-1} - C * tmp : Dim x LDS - for (uint64_t i = 0; i < Dim; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j]; - Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j]; - Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j]; - } - } - - return QMCKL_SUCCESS; -} - #+end_src - - - #+NAME:woodbury_3x3_kernel_template - #+begin_src c -qmckl_exit_code qmckl_woodbury_3x3_{Dim}( - const qmckl_context context, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict determinant) { -/* - C := S^{-1} * U, dim x 3 - B := 1 + V * C, 3 x 3 - D := V * S^{-1}, 3 x dim -,*/ - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_woodbury_3x3_{Dim}", - NULL); - } - - const uint64_t row1 = (Updates_index[0] - 1); - const uint64_t row2 = (Updates_index[1] - 1); - const uint64_t row3 = (Updates_index[2] - 1); - - // Compute C = (S^T)^{-1}U : {Dim} x 3 - double __attribute__((aligned(8))) C[3 * {Dim}]; - for (uint64_t i = 0; i < {Dim}; i++) { - C[i * 3] = 0; - C[i * 3 + 1] = 0; - C[i * 3 + 2] = 0; - IVDEP - ALIGNED - for (uint64_t k = 0; k < D{Dim}_P; k++) { - C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k]; - C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k]; - C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k]; - } - } - - // Compute B = 1 + VC : 3 x 3 - const double B0 = C[row1 * 3] + 1; - const double B1 = C[row1 * 3 + 1]; - const double B2 = C[row1 * 3 + 2]; - const double B3 = C[row2 * 3]; - const double B4 = C[row2 * 3 + 1] + 1; - const double B5 = C[row2 * 3 + 2]; - const double B6 = C[row3 * 3]; - const double B7 = C[row3 * 3 + 1]; - const double B8 = C[row3 * 3 + 2] + 1; - - // Check if determinant of B is not too close to zero - double det; - det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) + - B2 * (B3 * B7 - B4 * B6); - if (fabs(det) < breakdown) { - return QMCKL_FAILURE; - } - - // Update det(Slater) if passed - if (determinant) - *determinant *= det; - - // Compute B^{-1} with explicit formula for 3 x 3 inversion - double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det; - Binv[0] = (B4 * B8 - B7 * B5) * idet; - Binv[1] = -(B1 * B8 - B7 * B2) * idet; - Binv[2] = (B1 * B5 - B4 * B2) * idet; - Binv[3] = -(B3 * B8 - B6 * B5) * idet; - Binv[4] = (B0 * B8 - B6 * B2) * idet; - Binv[5] = -(B0 * B5 - B3 * B2) * idet; - Binv[6] = (B3 * B7 - B6 * B4) * idet; - Binv[7] = -(B0 * B7 - B6 * B1) * idet; - Binv[8] = (B0 * B4 - B3 * B1) * idet; - - // tmp = B^{-1}D : 3 x D{Dim}_P - double __attribute__((aligned(8))) tmp[3 * D{Dim}_P]; - double* r1dim = &(Slater_inv[row1 * D{Dim}_P]); - double* r2dim = &(Slater_inv[row2 * D{Dim}_P]); - double* r3dim = &(Slater_inv[row3 * D{Dim}_P]); - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j]; - tmp[D{Dim}_P + j] = - Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j]; - tmp[2 * D{Dim}_P + j] = - Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j]; - } - - // Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P - for (uint64_t i = 0; i < {Dim}; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j]; - Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j]; - Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j]; - } - } - - return QMCKL_SUCCESS; -} - #+end_src - - - #+NAME:woodbury_3x3_kernel_generator - #+begin_src python :noweb yes :exports none -text=""" -<> -""" -result = [] -for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) - -return '\n'.join(result) - #+end_src - - - #+NAME:woodbury_3x3_switch-case_generator - #+begin_src python :noweb yes :exports none -text=""" -case {Dim}: - return qmckl_woodbury_3x3_{Dim}(context, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); -""" -result = [] -for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) - -return '\n'.join(result) - #+end_src - - - #+begin_src c :tangle (eval c) :comments org :noweb yes -<> - -qmckl_exit_code qmckl_woodbury_3x3(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_woodbury_3x3", - NULL); - } - - if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases - switch (Dim) { - <> - } - } - else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) - return qmckl_woodbury_3x3_hpc(context, - LDS, - Dim, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); - - } - - return QMCKL_FAILURE; -} - #+end_src - - -*** Performance... - - This function is most efficient when used in cases where there are only 3 rank-1 updates and - it is sure they will not result in a singular matrix. - - ** Fortran interface :noexport: - :PROPERTIES: - :Name: qmckl_woodbury_3x3 - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_woodbury_3x3 & - (context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - real (c_double ) , intent(in) :: Updates(3*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(3) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_woodbury_3x3 - end interface - #+end_src - -*** Test :noexport: - - #+begin_src c :tangle (eval c_test) -assert(Updates3 != NULL); -assert(Updates_index3 != NULL); -assert(Slater_inv3_1 != NULL); -det = -1.23743195512859e-09; -rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det); -assert(fabs(det - 1.602708950725074e-10) < 1e-15); -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - res[i * Dim + j] = 0; - for (unsigned int k = 0; k < Dim; k++) { - res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_1[k * LDS + j]; - } - } -} -rc = QMCKL_SUCCESS; -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) { - rc = QMCKL_FAILURE; - } - if (i != j && fabs(res[i * Dim + j]) > tolerance) { - rc = QMCKL_FAILURE; - } - } -} -assert(rc == QMCKL_SUCCESS); - #+end_src - -* Sherman-Morrison with update splitting -** ~qmckl_sherman_morrison_splitting~ - :PROPERTIES: - :Name: qmckl_sherman_morrison_splitting - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator $1+v_j^T S^{-1} u_j$ in - the Sherman-Morrison formula is deemed to be too close to zero, the update $u_j$ is split in half: - $u_j \rightarrow \frac{1}{2} u_j$. One half is applied immediately --necessarily increasing the value of the - denominator because of the split-- while the other halve is put in a queue that will be applied when all the - remaining updates have been treated. - - The kernel is executed recursively until the queue is eiter empty and all - updates are applied successfully, or the size of the queue equals the number of initial updates. In the last - case the Slater-matrix that would have resulted from applying the updates is singular and therefore the - kernel exits with an exit code. - - If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting - from applying the updates to the original matrix. - - #+NAME: qmckl_sherman_morrison_splitting_args - | qmckl_context | context | in | Global state | - | uint64_t | LDS | in | Leading dimension of Slater_inv | - | uint64_t | Dim | in | Dimension of Slater_inv | - | uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv | - | double | Updates[N_updates*Dim] | in | Array containing the updates | - | uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates | - | double | breakdown | in | Break-down parameter on which to fail or not | - | double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix | - | double* | determinant | inout | Determinant of the Slater-matrix | - If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting - from applying the updates to the original matrix. - -*** Requirements - - * ~context~ is not ~QMCKL_NULL_CONTEXT~ - * ~LDS >= 2~ - * ~Dim >= 2~ - * ~N_updates >= 1~ - * ~Updates~ is allocated with $N_updates \times Dim$ elements - * ~Updates_index~ is allocated with $N_updates$ elements - * ~breakdown~ is a small number such that $0 < breakdown << 1$ - * ~Slater_inv~ is allocated with $Dim \times Dim$ elements - -*** C header - - #+CALL: generate_c_header(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_sherman_morrison_splitting( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant); - #+end_src - -*** C source - - #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_sherman_morrison_splitting(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_splitting", - NULL); - } - - double __attribute__((aligned(8))) later_updates[LDS * N_updates]; - uint64_t later_index[N_updates]; - uint64_t later = 0; - - qmckl_exit_code rc = qmckl_slagel_splitting( - LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, - later_updates, later_index, &later, determinant); - if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; - - if (later > 0) { - qmckl_exit_code rc = qmckl_sherman_morrison_splitting( - context, LDS, Dim, later, later_updates, later_index, breakdown, - Slater_inv, determinant); - if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; - } - - return QMCKL_SUCCESS; -} - - #+end_src - -*** Performance... - - This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high. - -** Fortran interface :noexport: - :PROPERTIES: - :Name: qmckl_sherman_morrison_splitting - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+CALL: generate_f_interface(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("FRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_sherman_morrison_splitting & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_splitting - end interface - #+end_src - -*** Test :noexport: - - #+begin_src c :tangle (eval c_test) -assert(Updates3 != NULL); -assert(Updates_index3 != NULL); -assert(Slater_inv3_2 != NULL); -det = -1.23743195512859e-09; -rc = qmckl_sherman_morrison_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det); -assert(fabs(det - 1.602708950725074e-10) < 1e-15); -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - res[i * Dim + j] = 0; - for (unsigned int k = 0; k < Dim; k++) { - res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j]; - } - } -} -rc = QMCKL_SUCCESS; -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) { - rc = QMCKL_FAILURE; - } - if (i != j && fabs(res[i * Dim + j]) > tolerance) { - rc = QMCKL_FAILURE; - } - } -} -assert(rc == QMCKL_SUCCESS); - #+end_src - -* Woodbury 3x3 and 2x2 with Sherman-Morrison and update splitting - ** ~qmckl_sherman_morrison_smw32s~ - :PROPERTIES: - :Name: qmckl_sherman_morrison_smw32s - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - The Woodbury 3x3 and 2x2 kernel with Sherman-Morrison and update splitting combines the low-level Woodbury 3x3 kernel, - the Woobury 2x2 kernel and Sherman-Morrison with update splitting. It works the almost the same as Woodbury 3x3 with - Sherman-Morrison and update splitting, except that when there is a remainder of two rank-1 updates, it is first tried - with Woodbury 2x2 instead of sending them all to Sherman-Morrison with update splitting. For example, in the case of - 5 updates the updates are applied in 1 block of 3 updates end 1 block of 2 updates. - - If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting - from applying the updates to the original matrix. - - #+NAME: qmckl_sherman_morrison_smw32s_args - | qmckl_context | context | in | Global state | - | uint64_t | LDS | in | Leading dimension of Slater_inv | - | uint64_t | Dim | in | Dimension of Slater_inv | - | uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv | - | double | Updates[N_updates*Dim] | in | Array containing the updates | - | uint64_t | Updates_index[N_updates] | in | Array containing the rank-1 updates | - | double | breakdown | in | Break-down parameter on which to fail or not | - | double | Slater_inv[LDS*Dim] | inout | Array containing the inverse of a Slater-matrix | - | double* | determinant | inout | Determinant of the Slater-matrix | - - -*** Requirements - - * ~context~ is not ~QMCKL_NULL_CONTEXT~ - * ~LDS >= 2~ - * ~Dim >= 2~ - * ~N_updates >= 1~ - * ~Updates~ is allocated with $N_updates \times Dim$ elements - * ~Updates_index~ is allocated with $N_updates$ elements - * ~breakdown~ is a small number such that $0 < breakdown << 1$ - * ~Slater_inv~ is allocated with $Dim \times Dim$ elements - -*** C header - - #+CALL: generate_c_header(table=qmckl_sherman_morrison_smw32s_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_sherman_morrison_smw32s( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant); - #+end_src - -*** C source - - #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_smw32s", - NULL); - } - - double __attribute__((aligned(8))) later_updates[LDS * N_updates]; - uint64_t later_index[N_updates]; - uint64_t later = 0; - - // Special case for 4 rank-1 updates: 2+2 - if (N_updates == 4) { - qmckl_exit_code rc = - qmckl_woodbury_2x2(context, LDS, Dim, Updates, Updates_index, - breakdown, Slater_inv, determinant); - if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting - uint64_t l = 0; - rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates, Updates_index, - breakdown, Slater_inv, - later_updates + (LDS * later), - later_index + later, &l, determinant); - later += l; - } - rc = qmckl_woodbury_2x2(context, LDS, Dim, &Updates[2 * LDS], - &Updates_index[2], breakdown, Slater_inv, - determinant); - if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting - uint64_t l = 0; - rc = qmckl_slagel_splitting( - LDS, Dim, 2, &Updates[2 * LDS], &Updates_index[2], breakdown, - Slater_inv, later_updates + (LDS * later), later_index + later, - &l, determinant); - later += l; - } - if (later > 0) { - rc = qmckl_sherman_morrison_splitting( - context, LDS, Dim, later, later_updates, later_index, breakdown, - Slater_inv, determinant); - } - return QMCKL_SUCCESS; - } - - // And for the other cases != 4 - // Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates - // with Woodbury 3x3 kernel - uint64_t n_of_3blocks = N_updates / 3; - uint64_t remainder = N_updates % 3; - uint64_t length_3block = 3 * LDS; - - if (n_of_3blocks > 0) { - for (uint64_t i = 0; i < n_of_3blocks; i++) { - const double* Updates_3block = &Updates[i * length_3block]; - const uint64_t* Updates_index_3block = &Updates_index[i * 3]; - qmckl_exit_code rc = qmckl_woodbury_3x3( - context, LDS, Dim, Updates_3block, Updates_index_3block, - breakdown, Slater_inv, determinant); - if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting - uint64_t l = 0; - rc = qmckl_slagel_splitting( - LDS, Dim, 3, Updates_3block, Updates_index_3block, - breakdown, Slater_inv, later_updates + (LDS * later), - later_index + later, &l, determinant); - later += l; - } - } - } - - // Apply last remaining block of 2 updates with Woodbury 2x2 kernel - if (remainder == 2) { - const double* Updates_2block = &Updates[n_of_3blocks * length_3block]; - const uint64_t* Updates_index_2block = &Updates_index[3 * n_of_3blocks]; - qmckl_exit_code rc = qmckl_woodbury_2x2( - context, LDS, Dim, Updates_2block, Updates_index_2block, - breakdown, Slater_inv, determinant); - if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting - uint64_t l = 0; - rc = qmckl_slagel_splitting( - LDS, Dim, 2, Updates_2block, Updates_index_2block, breakdown, - Slater_inv, later_updates + (LDS * later), later_index + later, - &l, determinant); - later += l; - } - } - - // Apply last remaining update with slagel_splitting - if (remainder == 1) { - const double* Updates_1block = &Updates[n_of_3blocks * length_3block]; - const uint64_t* Updates_index_1block = &Updates_index[3 * n_of_3blocks]; - uint64_t l = 0; - qmckl_exit_code rc = qmckl_slagel_splitting( - LDS, Dim, 1, Updates_1block, Updates_index_1block, breakdown, - Slater_inv, later_updates + (LDS * later), later_index + later, &l, - determinant); - if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; - later += l; - } - - if (later > 0) { - qmckl_exit_code rc = qmckl_sherman_morrison_splitting( - context, LDS, Dim, later, later_updates, later_index, breakdown, - Slater_inv, determinant); - if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; - } - - return QMCKL_SUCCESS; -} - - #+end_src - -*** Performance... - - This kernel performs best for update cycles with 2 or more rank-1 updates and the fail-rate is low. - - ** Fortran interface :noexport: - :PROPERTIES: - :Name: qmckl_sherman_morrison_smw32s - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+CALL: generate_f_interface(table=qmckl_sherman_morrison_smw32s_args,rettyp=get_value("FRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval fh_func) :comments org :exports none - interface - integer(c_int32_t) function qmckl_sherman_morrison_smw32s & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*Dim) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_smw32s - end interface - #+end_src - -*** Test :noexport: - - #+begin_src c :tangle (eval c_test) -assert(Updates5 != NULL); -assert(Updates_index5 != NULL); -assert(Slater_inv5 != NULL); -det = -3.186005284713128e-10; -rc = qmckl_sherman_morrison_smw32s(context, LDS, Dim, N_updates5, Updates5, Updates_index5, breakdown, Slater_inv5, &det); -assert(fabs(det + 5.260200118412903e-10) < 1e-15); - -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - res[i * Dim + j] = 0; - for (unsigned int k = 0; k < Dim; k++) { - res[i * Dim + j] += Slater5[i * Dim + k] * Slater_inv5[k * LDS + j]; - } - } -} -rc = QMCKL_SUCCESS; -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) { - rc = QMCKL_FAILURE; - } - if (i != j && fabs(res[i * Dim + j]) > tolerance) { - rc = QMCKL_FAILURE; - } - } -} -assert(rc == QMCKL_SUCCESS); - #+end_src - -* Helper Functions - -Private helper-functions that are used by the Sherman-Morrison-Woodbury kernels. -These functions can only be used internally by the kernels in this module. - -** ~qmckl_slagel_splitting~ - :PROPERTIES: - :Name: qmckl_slagel_splitting - :CRetType: double - :FRetType: double precision - :END: - - ~qmckl_slagel_splitting~ is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel. - It is used internally to apply a collection of $N$ rank-1 updates to the inverse Slater-matrix $S^{-1}$ and - splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update, - while putting the second half in a waiting queue to be applied at the end. - - Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$, the update is applied as usual. Otherwise, $u_j$ will be redefined - as $\frac{1}{2}u_j$. One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors - $u_{j'}=\frac{1}{2}u_j$ and $v_{j'}^T=v_{j}^T$, which are stored in the array \texttt{later_updates}. - - If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting - from applying the updates to the original matrix. - - #+NAME: qmckl_slagel_splitting_args - | uint64_t | LDS | in | Leading dimension of Slater_inv | - | uint64_t | Dim | in | Dimension of Slater_inv | - | uint64_t | N_updates | in | Number of rank-1 updates to be applied to Slater_inv | - | double | Updates[N_updates*Dim] | in | Array containing the rank-1 updates | - | uint64_t | Updates_index[N_updates] | in | Array containing positions of the rank-1 updates | - | double | breakdown | in | Break-down parameter on which to fail or not | - | double | Slater_inv[LDS*Dim] | inout | Array containing the inverse Slater-matrix | - | double | later_updates[Dim * N_updates] | inout | Array containing the split updates for later | - | uint64_t | later_index[N_updates] | inout | Array containing the positions of the split updates for later | - | uint64_t | later | inout | Number of split updates for later | - | double* | determinant | inout | Determinant of the Slater-matrix | - - -*** Requirements - - - ~LDS >= 2~ - - ~Dim >= 2~ - - ~N_updates >= 1~ - - ~Updates~ is allocated with $N_updates \times Dim$ elements - - ~Updates_index~ is allocated with $N_updates$ elements - - ~breakdown~ is a small number such that $0 < breakdown << 1$ - - ~Slater_inv~ is allocated with $Dim \times Dim$ elements - - ~later_updates~ is allocated with $later \times Dim$ elements - - ~later_index~ is allocated with $N_updates$ elements - - ~later >= 0~ - -*** C header - - #+CALL: generate_c_header(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_slagel_splitting ( - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* later_updates, - uint64_t* later_index, - uint64_t* later, - double* determinant); - #+end_src - -*** C source - - #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_slagel_splitting_hpc( - uint64_t LDS, - uint64_t Dim, - uint64_t N_updates, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict later_updates, - uint64_t* __restrict later_index, - uint64_t* __restrict later, - double* __restrict determinant) { - - double __attribute__((aligned(8))) C[LDS]; - double __attribute__((aligned(8))) D[LDS]; - - uint64_t l = 0; - // For each update - while (l < N_updates) { - // C = S^{-1} x U_l - for (uint64_t i = 0; i < Dim; i++) { - C[i] = 0.0f; - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; - } - } - - // Denominator - const int cui = Updates_index[l] - 1; - double den = 1.0f + C[cui]; - if (fabs(den) < breakdown) { - // U_l = U_l / 2: split the update in 2 equal halves and save the - // second halve in later_updates - IVDEP - ALIGNED - for (uint64_t i = 0; i < LDS; i++) { - later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f; - C[i] *= 0.5f; - } - later_index[*later] = Updates_index[l]; - (*later)++; - - den = 1.0f + C[cui]; - } // From here onwards we continue with applying the first halve of the - // update to Slater_inv - double iden = 1.0f / den; - - if (determinant) - *determinant *= den; - - // D = v^T x S^{-1} : 1 x LDS - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - D[j] = Slater_inv[cui * LDS + j]; - } - - // S^{-1} = S^{-1} - C x D / den - for (uint64_t i = 0; i < Dim; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < LDS; j++) { - const double update = C[i] * D[j] * iden; - Slater_inv[i * LDS + j] -= update; - } - } - l += 1; - } - - return QMCKL_SUCCESS; -} - #+end_src - - - #+NAME:slagel_splitting_template_code - #+begin_src c -static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}( - uint64_t N_updates, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict later_updates, - uint64_t* __restrict later_index, - uint64_t* __restrict later, - double* __restrict determinant) { - - double __attribute__((aligned(8))) C[D{Dim}_P]; - double __attribute__((aligned(8))) D[D{Dim}_P]; - - uint64_t l = 0; - // For each update - while (l < N_updates) { - // C = S^{-1} x U_l - for (uint64_t i = 0; i < {Dim}; i++) { - C[i] = 0.0f; - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j]; - } - } - - // Denominator - const int cui = Updates_index[l] - 1; - double den = 1.0f + C[cui]; - if (fabs(den) < breakdown) { - // U_l = U_l / 2: split the update in 2 equal halves and save the - // second halve in later_updates - IVDEP - ALIGNED - for (uint64_t i = 0; i < D{Dim}_P; i++) { - later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f; - C[i] *= 0.5f; - } - later_index[*later] = Updates_index[l]; - (*later)++; - - den = 1.0f + C[cui]; - } // From here onwards we continue with applying the first halve of the - // update to Slater_inv - double iden = 1.0f / den; - - if (determinant) - *determinant *= den; - - // D = v^T x S^{-1} : 1 x D{Dim}_P - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - D[j] = Slater_inv[cui * D{Dim}_P + j]; - } - - // S^{-1} = S^{-1} - C x D / den - for (uint64_t i = 0; i < {Dim}; i++) { - IVDEP - ALIGNED - for (uint64_t j = 0; j < D{Dim}_P; j++) { - const double update = C[i] * D[j] * iden; - Slater_inv[i * D{Dim}_P + j] -= update; - } - } - l += 1; - } - - return QMCKL_SUCCESS; -} - #+end_src - - - #+NAME:slagel_splitting_kernel_generator - #+begin_src python :noweb yes :exports none -text=""" -<> -""" -result = [] -for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) - -return '\n'.join(result) - #+end_src - - - #+NAME:slagel_splitting_switch-case_generator - #+begin_src python :noweb yes :exports none -text=""" -case {Dim}: - return qmckl_slagel_splitting_{Dim}( - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - later_updates, - later_index, - later, - determinant); -""" -result = [] -for Dim in <>: - Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) - -return '\n'.join(result) - #+end_src - - - #+begin_src c :tangle (eval c) :comments org :noweb yes -<> - -qmckl_exit_code qmckl_slagel_splitting( - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* later_updates, - uint64_t* later_index, - uint64_t* later, - double* determinant) { - - if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases - switch (Dim) { - <> - } - } - else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) - return qmckl_slagel_splitting_hpc( - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - later_updates, - later_index, - later, - determinant); - } - - return QMCKL_FAILURE; -} - #+end_src - - -*** Performance - -This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2 -with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels. * End of files