diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 98193cc..4ddd949 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -34,9 +34,9 @@ range(2, 22) * Naïve Sherman-Morrison -** ~qmckl_sherman_morrison_naive~ +** ~qmckl_sm_naive~ :PROPERTIES: -:Name: qmckl_sherman_morrison_naive +:Name: qmckl_sm_naive :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: @@ -75,7 +75,7 @@ If the determinant of the Slater-matrix is passed, it will be updated to the det from applying the updates to the original matrix. *** API -#+NAME: qmckl_sherman_morrison_naive_args +#+NAME: qmckl_sm_naive_args | Variable | Type | In/Out | Description | |-----------------+-------------------------+--------+------------------------------------------------------| | ~context~ | ~qmckl_context~ | in | Global state | @@ -132,7 +132,7 @@ end subroutine convert #+end_src #+begin_src f90 :tangle (eval f) :comment org :exports none -subroutine copy_back(Inverse, s_inv, lds, dim) +subroutine copy_back_inv(Inverse, s_inv, lds, dim) implicit none integer*8 , intent(in) :: lds, dim real*8 , intent(in) , dimension(dim, lds) :: Inverse @@ -146,11 +146,29 @@ subroutine copy_back(Inverse, s_inv, lds, dim) s_inv((i - 1) * lds + j) = Inverse(i, j) end do end do -end subroutine copy_back +end subroutine copy_back_inv +#+end_src + +#+begin_src f90 :tangle (eval f) :comment org :exports none +subroutine copy_back_lu(Later_updates, later_upds, lds, nupdates) + implicit none + integer*8 , intent(in) :: lds, nupdates + real*8 , intent(in) , dimension(lds, nupdates) :: Later_updates + real*8 , intent(out) :: later_upds(nupdates * lds) + + integer*8 :: i, j + + ! Copy updated inverse back to s_inv + do i = 1, nupdates + do j = 1, lds + later_upds((i - 1) * lds + j) = Later_updates(j, i) + end do + end do +end subroutine copy_back_lu #+end_src #+begin_src f90 :tangle (eval f) -integer function qmckl_sherman_morrison_naive_doc_f(context, & +integer function qmckl_sm_naive_doc_f(context, & lds, dim, & nupdates, & upds, & @@ -226,24 +244,24 @@ integer function qmckl_sherman_morrison_naive_doc_f(context, & end do ! Copy updated inverse back to s_inv - call copy_back(Inverse, s_inv, lds, dim) + call copy_back_inv(Inverse, s_inv, lds, dim) info = QMCKL_SUCCESS -end function qmckl_sherman_morrison_naive_doc_f +end function qmckl_sm_naive_doc_f #+end_src **** C interface to the pedagogical kernel (not directly exposed) -The following Fortran function ~qmckl_sherman_morrison_naive_doc~ makes sure -that the pedagogical kernel ~qmckl_sherman_morrison_naive_doc_f~, written in -Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sherman_morrison_naive_doc~ will be exposed in the header file 'qmckl.h' +The following Fortran function ~qmckl_sm_naive_doc~ makes sure +that the pedagogical kernel ~qmckl_sm_naive_doc_f~, written in +Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sm_naive_doc~ will be exposed in the header file 'qmckl.h' for C users and in the module file 'qmckl_f.F90' for Fortran users. -#+CALL: generate_c_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc") +#+CALL: generate_c_interface(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & +integer(c_int32_t) function qmckl_sm_naive_doc & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & bind(C) result(info) @@ -260,20 +278,20 @@ integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) real (c_double ) , intent(inout) :: determinant - integer(c_int32_t), external :: qmckl_sherman_morrison_naive_doc_f - info = qmckl_sherman_morrison_naive_doc_f & + integer(c_int32_t), external :: qmckl_sm_naive_doc_f + info = qmckl_sm_naive_doc_f & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) -end function qmckl_sherman_morrison_naive_doc +end function qmckl_sm_naive_doc #+end_src *** C headers (exposed in qmckl.h) -#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+CALL: generate_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: -#+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_sherman_morrison_naive ( +#+begin_src c :tangle (eval h_func) : comments org +qmckl_exit_code qmckl_sm_naive ( const qmckl_context context, const uint64_t LDS, const uint64_t Dim, @@ -285,11 +303,11 @@ qmckl_exit_code qmckl_sherman_morrison_naive ( double* determinant ); #+end_src -#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_hpc") +#+CALL: generate_private_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_hpc") #+RESULTS: -#+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_sherman_morrison_naive_hpc ( +#+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_sm_naive_hpc ( const qmckl_context context, const uint64_t LDS, const uint64_t Dim, @@ -301,11 +319,11 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc ( double* determinant ); #+end_src -#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc") +#+CALL: generate_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_doc") #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_sherman_morrison_naive_doc ( +qmckl_exit_code qmckl_sm_naive_doc ( const qmckl_context context, const uint64_t LDS, const uint64_t Dim, @@ -324,6 +342,8 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels. #include #include "qmckl.h" #include "config.h" +#include "assert.h" +#include "stdio.h" // Order important because // __GNUC__ also set in ICC, ICX and CLANG @@ -343,17 +363,17 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels. #endif #+end_src -~qmckl_sherman_morrison_naive_hpc~ is a high performance variation of -~qmckl_sherman_morrison_naive~ written in C. It is used in cases when ~Dim~ is +~qmckl_sm_naive_hpc~ is a high performance variation of +~qmckl_sm_naive~ written in C. It is used in cases when ~Dim~ is smaller than the leading dimension ~LDS~, irrespective of whetether ~LDS~ includes zero padding to benefit from SIMD instructions or not. Cases like this include situations where one wants to apply updates to a square submatrix of the full matrix. It takes advantage of memory aligned data and assumes no data dependencies inside the loops. The loops are fully vectorised whenever ~Dim~ is an integer -multiple of ~SIMD_LEGTH~. +multiple of ~SIMD_LENGTH~. #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_sherman_morrison_naive_hpc( +qmckl_exit_code qmckl_sm_naive_hpc( const qmckl_context context, const uint64_t LDS, const uint64_t Dim, @@ -367,7 +387,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc( if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return qmckl_failwith( context, QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_naive_hpc", + "qmckl_sm_naive_hpc", NULL); } @@ -422,10 +442,10 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc( } #+end_src -~qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}~ is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively. +~qmckl_exit_code qmckl_sm_naive_{Dim}~ is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively. #+NAME:naive_template_code #+begin_src c - static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}( + static inline qmckl_exit_code qmckl_sm_naive_{Dim}( const qmckl_context context, const uint64_t N_updates, const double* __restrict Updates, @@ -437,7 +457,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc( if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return qmckl_failwith(context, QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_naive_{Dim}", + "qmckl_sm_naive_{Dim}", NULL); } @@ -505,9 +525,9 @@ text=""" result = [] for Dim in <>: Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) + result.append(text.replace("{Dim}",Dim)) -return '\n'.join(result) +return ''.join(result) #+end_src Python script that generated C switch cases that call individual kernel instances. @@ -515,43 +535,43 @@ Python script that generated C switch cases that call individual kernel instance #+begin_src python :noweb yes text=""" case {Dim}: - return qmckl_sherman_morrison_naive_{Dim}(context, + return qmckl_sm_naive_{Dim}(context, N_updates, Updates, Updates_index, breakdown, Slater_inv, - determinant); -""" + determinant);""" result = [] for Dim in <>: Dim=str(Dim) - result.append(text.replace("{Dim}",Dim) ) + result.append(text.replace("{Dim}",Dim)) -return '\n'.join(result) +return ''.join(result) #+end_src #+begin_src c :tangle (eval c) :comments org :noweb yes <> #+end_src -~qmckl_sherman_morrison_naive~ is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~. +~qmckl_sm_naive~ is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~. #+begin_src c :tangle (eval c) :comments org :noweb yes -qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { +qmckl_exit_code qmckl_sm_naive(const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_naive", - NULL); + return qmckl_failwith( + context, + QMCKL_NULL_CONTEXT, + "qmckl_sm_naive", + NULL); } #ifdef HAVE_HPC @@ -561,26 +581,28 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, } } else { // Updating smaller sub-matrix - return qmckl_sherman_morrison_naive_hpc(context, - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); + return qmckl_sm_naive_hpc( + context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + determinant); } #else - return qmckl_sherman_morrison_naive_doc(context, - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); + return qmckl_sm_naive_doc( + context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + determinant); #endif return QMCKL_FAILURE; @@ -589,17 +611,17 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context, *** Fortran interfaces (exposed in qmckl_f.F90) :PROPERTIES: - :Name: qmckl_sherman_morrison_naive + :Name: qmckl_sm_naive :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: -#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_naive_hpc") +#+CALL: generate_f_interface(table=qmckl_sm_naive_args,rettyp=get_value("FRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none interface - integer(c_int32_t) function qmckl_sherman_morrison_naive_hpc & + integer(c_int32_t) function qmckl_sm_naive & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & bind(C) use, intrinsic :: iso_c_binding @@ -616,16 +638,16 @@ interface real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) real (c_double ) , intent(inout) :: determinant - end function qmckl_sherman_morrison_naive_hpc + end function qmckl_sm_naive end interface #+end_src -#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_naive_doc") +#+CALL: generate_f_interface(table=qmckl_sm_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sm_naive_hpc") #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none interface - integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & + integer(c_int32_t) function qmckl_sm_naive_hpc & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & bind(C) use, intrinsic :: iso_c_binding @@ -642,16 +664,16 @@ interface real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) real (c_double ) , intent(inout) :: determinant - end function qmckl_sherman_morrison_naive_doc + end function qmckl_sm_naive_hpc end interface #+end_src -#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_naive") +#+CALL: generate_f_interface(table=qmckl_sm_naive_args,rettyp=get_value("FRetType"),fname="qmckl_sm_naive_doc") #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none interface - integer(c_int32_t) function qmckl_sherman_morrison_naive & + integer(c_int32_t) function qmckl_sm_naive_doc & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & bind(C) use, intrinsic :: iso_c_binding @@ -668,7 +690,7 @@ interface real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) real (c_double ) , intent(inout) :: determinant - end function qmckl_sherman_morrison_naive + end function qmckl_sm_naive_doc end interface #+end_src @@ -700,7 +722,7 @@ assert(Slater_inv1 != NULL); // original determinant of Slater1 (before applying updates) double det = 3.407025646103221e-10; -rc = qmckl_sherman_morrison_naive(context, +rc = qmckl_sm_naive(context, LDS, Dim, N_updates1, @@ -736,427 +758,42 @@ assert(rc == QMCKL_SUCCESS); #+end_src -* Sherman-Morrison with update splitting -** ~qmckl_sherman_morrison_splitting~ +* Sherman-Morrison with Slagel Splitting (core) +** ~qmckl_sm_splitting_core~ :PROPERTIES: -:Name: qmckl_sherman_morrison_splitting +:Name: qmckl_sm_splitting_core :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: *** Introduction -This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator $1+v_j^T S^{-1} u_j$ in -the Sherman-Morrison formula is deemed to be too close to zero, the update $u_j$ is split in half: -$u_j \rightarrow \frac{1}{2} u_j$. One half is applied immediately --necessarily increasing the value of the -denominator because of the split-- while the other halve is put in a queue that will be applied when all the -remaining updates have been treated. +~qmckl_sm_splitting_core~ is the inner core part of 'Sherman-Morrison with update splitting' in the next section. +It is not normally used by itself but it is possible to use it nonetheless. -The kernel is executed recursively until the queue is eiter empty and all -updates are applied successfully, or the size of the queue equals the number of initial updates. In the last -case the Slater-matrix that would have resulted from applying the updates is singular and therefore the -kernel exits with an exit code. +It has three extra parameters in its API: +- ~later_updates~ initially empty array that will contain the second halves of updates that were split during kernel execution +- ~later_index~ initially empty array that will contain the row/column numbers of the updates that were split during execution +- ~later~ initially zero integer that records the number of updates that were split during exection. -If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting -from applying the updates to the original matrix. - -*** API -#+NAME: qmckl_sherman_morrison_splitting_args -| Variable | Type | In/Out | Description | -|---------------+-----------------------+--------+------------------------------------------------------| -| context | qmckl_context | in | Global state | -| LDS | uint64_t | in | Leading dimension of Slater_inv | -| Dim | uint64_t | in | Dimension of Slater_inv | -| N_updates | uint64_t | in | Number of rank-1 updates to be applied to Slater_inv | -| Updates | double[N_updates*LDS] | in | Array containing the updates | -| Updates_index | uint64_t[N_updates] | in | Array containing the rank-1 updates | -| breakdown | double | in | Break-down parameter on which to fail or not | -| Slater_inv | double[Dim*LDS] | inout | Array containing the inverse of a Slater-matrix | -| determinant | double | inout | Determinant of the Slater-matrix | - -*** Requirements - * ~context~ is not ~QMCKL_NULL_CONTEXT~ - * ~LDS >= 2~ - * ~Dim >= 2~ - * ~N_updates >= 1~ - * ~Updates~ is allocated with $N_updates \times Dim$ elements - * ~Updates_index~ is allocated with $N_updates$ elements - * ~breakdown~ is a small number such that $0 < breakdown << 1$ - * ~Slater_inv~ is allocated with $Dim \times Dim$ elements - -*** Pedagogical kernel source (in Fortran) -The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is -able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore -not be used in real workloads. - -#+begin_src f90 :tangle (eval f) -integer function qmckl_sherman_morrison_splitting_doc_f(context, & - lds, dim, & - nupdates, & - upds, & - updates_index, & - breakdown, & - s_inv, & - determinant) result(info) - - use qmckl - implicit none - integer*8 , intent(in) :: context - integer*8 , intent(in) :: lds, dim - integer*8 , intent(in) :: nupdates - integer*8 , intent(in) :: updates_index(nupdates) - real*8 , intent(in) :: upds(nupdates * lds) - real*8 , intent(in) :: breakdown - real*8 , intent(inout) :: s_inv(dim * lds) - real*8 , intent(inout) :: determinant - - real*8 , dimension(lds, nupdates) :: Updates - real*8 , dimension(dim, lds) :: Inverse - - info = QMCKL_FAILURE - - ! Convert 'upds' and 's_inv' into the more easily readable Fortran - ! matrices 'Updates' and 'Inverse'. - call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim) - - ! YET TO BE IMPLEMENTED - - ! Copy updated inverse back to s_inv - call copy_back(Inverse, s_inv, lds, dim) - - info = QMCKL_SUCCESS - -end function qmckl_sherman_morrison_splitting_doc_f -#+end_src - -**** C interface to the pedagogical kernel (not directly exposed) -The following Fortran function ~qmckl_slagel_splitting_doc~ makes sure -that the pedagogical kernel ~qmckl_slagel_splitting_doc_f~, written in -Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function -~qmckl_slagel_splitting_doc~ will be exposed in the header file 'qmckl.h' -for C users and in the module file 'qmckl_f.F90' for Fortran users. - -#+CALL: generate_c_interface(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_splitting_doc") - -#+RESULTS: -#+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_sherman_morrison_splitting_doc & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*LDS) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) - real (c_double ) , intent(inout) :: determinant - - integer(c_int32_t), external :: qmckl_sherman_morrison_splitting_doc_f - info = qmckl_sherman_morrison_splitting_doc_f & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) - -end function qmckl_sherman_morrison_splitting_doc -#+end_src - -*** C headers (exposed in qmckl.h) - -#+CALL: generate_c_header(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - -#+RESULTS: -#+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_sherman_morrison_splitting ( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant ); -#+end_src - -#+CALL: generate_c_header(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_splitting_hpc") - -#+RESULTS: -#+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_sherman_morrison_splitting_hpc ( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant ); -#+end_src - -#+CALL: generate_c_header(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_splitting_doc") - -#+RESULTS: -#+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_sherman_morrison_splitting_doc ( - const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant ); -#+end_src - -*** C source -#+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_sherman_morrison_splitting_hpc(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_splitting", - NULL); - } - - double __attribute__((aligned(8))) later_updates[LDS * N_updates]; - uint64_t later_index[N_updates]; - uint64_t later = 0; - - qmckl_exit_code rc = qmckl_slagel_splitting( - LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, - later_updates, later_index, &later, determinant); - if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; - - if (later > 0) { - qmckl_exit_code rc = qmckl_sherman_morrison_splitting( - context, LDS, Dim, later, later_updates, later_index, breakdown, - Slater_inv, determinant); - if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; - } - - return QMCKL_SUCCESS; -} -#+end_src - -#+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_sherman_morrison_splitting(const qmckl_context context, - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* determinant) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return qmckl_failwith(context, - QMCKL_NULL_CONTEXT, - "qmckl_sherman_morrison_splitting", - NULL); - } - - #ifdef HAS_HPC - return qmckl_sherman_morrison_splitting_hpc(context, - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); - #else - // return qmckl_sherman_morrison_splitting_doc(context, - // LDS, - // Dim, - // N_updates, - // Updates, - // Updates_index, - // breakdown, - // Slater_inv, - // determinant); - return qmckl_sherman_morrison_splitting_hpc(context, - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - determinant); - #endif +It is up to the user to decide what to do with these updates once the kernel returns. Normally ~qmckl_sm_splitting_core~ is +used as the core part of a recursive function, as is done in ~qmckl_sm_splitting~ or as part of a more complex +kernel like ~qmckl_sherman_morrison_smw32s~. - return QMCKL_SUCCESS; -} -#+end_src - -*** Fortran interfaces (exposed in qmckl_f.F90) - :PROPERTIES: - :Name: qmckl_sherman_morrison_naive - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - -#+CALL: generate_f_interface(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_splitting_hpc") - -#+RESULTS: -#+begin_src f90 :tangle (eval fh_func) :comments org :exports none -interface - integer(c_int32_t) function qmckl_sherman_morrison_splitting_hpc & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*LDS) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_splitting_hpc -end interface -#+end_src - -#+CALL: generate_f_interface(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_splitting_doc") - -#+RESULTS: -#+begin_src f90 :tangle (eval fh_func) :comments org :exports none -interface - integer(c_int32_t) function qmckl_sherman_morrison_splitting_doc & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*LDS) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_splitting_doc -end interface -#+end_src - -#+CALL: generate_f_interface(table=qmckl_sherman_morrison_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sherman_morrison_splitting") - -#+RESULTS: -#+begin_src f90 :tangle (eval fh_func) :comments org :exports none -interface - integer(c_int32_t) function qmckl_sherman_morrison_splitting & - (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & - bind(C) - use, intrinsic :: iso_c_binding - import - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: LDS - integer (c_int64_t) , intent(in) , value :: Dim - integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*LDS) - integer (c_int64_t) , intent(in) :: Updates_index(N_updates) - real (c_double ) , intent(in) , value :: breakdown - real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) - real (c_double ) , intent(inout) :: determinant - - end function qmckl_sherman_morrison_splitting -end interface -#+end_src - -*** Performance... -This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high. - -*** Test -#+begin_src c :tangle (eval c_test) -assert(Updates3 != NULL); -assert(Updates_index3 != NULL); -assert(Slater_inv3_2 != NULL); -det = -1.23743195512859e-09; -rc = qmckl_sherman_morrison_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det); -assert(fabs(det - 1.602708950725074e-10) < 1e-15); -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - res[i * Dim + j] = 0; - for (unsigned int k = 0; k < Dim; k++) { - res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j]; - } - } -} -rc = QMCKL_SUCCESS; -for (unsigned int i = 0; i < Dim; i++) { - for (unsigned int j = 0; j < Dim; j++) { - if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) { - rc = QMCKL_FAILURE; - } - if (i != j && fabs(res[i * Dim + j]) > tolerance) { - rc = QMCKL_FAILURE; - } - } -} -assert(rc == QMCKL_SUCCESS); -#+end_src - - -* Slagel Splitting -** ~qmckl_slagel_splitting~ -:PROPERTIES: -:Name: qmckl_slagel_splitting -:CRetType: qmckl_exit_code -:FRetType: qmckl_exit_code -:END: - -*** Introduction -~qmckl_slagel_splitting~ is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel. -It is used internally to apply a collection of $N$ rank-1 updates to the inverse Slater-matrix $S^{-1}$ and -splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update, -while putting the second half in a waiting queue to be applied at the end. - -Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$, the update is applied as usual. Otherwise, $u_j$ will be redefined -as $\frac{1}{2}u_j$. One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors -$u_{j'}=\frac{1}{2}u_j$ and $v_{j'}^T=v_{j}^T$, which are stored in the array \texttt{later_updates}. - -If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting -from applying the updates to the original matrix. +If the determinant is passed it will only be partially updated if there were any update splits. *** API -#+NAME: qmckl_slagel_splitting_args +#+NAME: qmckl_sm_splitting_core_args | Variable | Type | In/Out | Description | |-----------------+-------------------------+--------+---------------------------------------------------------------| +| ~context~ | ~qmckl_context~ | in | Global state | | ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv | | ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv | | ~N_updates~ | ~uint64_t~ | in | Number of rank-1 updates to be applied to Slater_inv | -| ~Updates~ | ~double[N_updates*LDS]~ | in | Array containing the rank-1 updates | +| ~Updates~ | ~double[LDS*N_updates]~ | in | Array containing the rank-1 updates | | ~Updates_index~ | ~uint64_t[N_updates]~ | in | Array containing positions of the rank-1 updates | | ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not | | ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse Slater-matrix | -| ~later_updates~ | ~double[N_updates*LDS]~ | inout | Array containing the split updates for later | +| ~later_updates~ | ~double[LDS*N_updates]~ | inout | Array containing the split updates for later | | ~later_index~ | ~uint64_t[N_updates]~ | inout | Array containing the positions of the split updates for later | | ~later~ | ~uint64_t~ | inout | Number of split updates for later | | ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix | @@ -1179,63 +816,130 @@ able to do numerically correct computations, it does not do it in the most effic not be used in real workloads. #+begin_src f90 :tangle (eval f) -integer function qmckl_slagel_splitting_doc_f( & +integer function qmckl_sm_splitting_core_doc_f( & + context, & lds, dim, & nupdates, & upds, & updates_index, & breakdown, & s_inv, & - later_updates, & - later_index, & - later, & + later_upds, & + Later_index, & + Later, & determinant) result(info) use qmckl implicit none - integer*8 , intent(in) :: lds, dim - integer*8 , intent(in) :: nupdates - integer*8 , intent(in) :: updates_index(nupdates) - real*8 , intent(in) :: upds(nupdates * lds) - real*8 , intent(in) :: breakdown - real*8 , intent(inout) :: s_inv(dim * lds) - real*8 , intent(inout) :: later_updates(nupdates * lds) - integer*8 , intent(inout) :: later_index(nupdates) - integer*8 , intent(inout) :: later - real*8 , intent(inout) :: determinant + integer*8 , intent(in) :: context + integer*8 , intent(in) :: lds, dim + integer*8 , intent(in) :: nupdates + integer*8 , intent(in) :: updates_index(nupdates) + real*8 , intent(in) :: upds(lds * nupdates) + real*8 , intent(in) :: breakdown + real*8 , intent(inout) :: s_inv(dim * lds) + real*8 , intent(inout) :: determinant + integer*8 , intent(inout) :: Later + integer*8 , intent(inout) :: Later_index(nupdates) + real*8 , intent(inout) :: later_upds(lds * nupdates) real*8 , dimension(lds, nupdates) :: Updates + real*8 , dimension(lds, nupdates) :: Later_updates real*8 , dimension(dim, lds) :: Inverse + real*8 , dimension(dim) :: C + real*8 , dimension(lds) :: D + real*8 :: denominator, idenominator, update + integer*8 :: i, j, l, row + + write(*,*) "Entering 'qmckl_sm_splittinig_core_doc_f'" info = QMCKL_FAILURE + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + ! Convert 'upds' and 's_inv' into the more easily readable Fortran ! matrices 'Updates' and 'Inverse'. call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim) - ! YET TO BE IMPLEMENTED - ! Copy updated inverse back to s_inv - call copy_back(Inverse, s_inv, lds, dim) + l = 1; + ! For each update do... + do while (l < nupdates + 1) + + ! Compute C = S^{-1}U(l) + do i = 1, dim + C(i) = 0 + do j = 1, dim + C(i) = C(i) + Inverse(i, j) * Updates(j, l) + end do + end do + + ! Compute denominator = 1 + V(l)^TC + row = updates_index(l) + denominator = 1 + C(row) + + ! If denominator is too close to zero: + ! - Split update in 2 before storing in Later_updates + ! - Split previously computed vector C in 2 + ! - Recompute the denominator + if (abs(denominator) < breakdown) then + do i = 1, dim + Later_updates(i, l) = Updates(i, l) / 2 + C(i) = C(i) / 2 + end do + Later_index(Later + 1) = updates_index(l) + Later = Later + 1 + denominator = 1 + C(row) + end if + + idenominator = 1 / denominator + + ! Update det(S) + determinant = determinant * denominator + + ! selecting column: v_l^T * S_inv + D = Inverse(row, :) + + ! A^{-1} = A^{-1} - C x D / denominator + do i = 1, dim + do j = 1, dim + update = C(i) * D(j) * idenominator + Inverse(i, j) = Inverse(i, j) - update + end do + end do + + l = l + 1 + end do + + ! Copy updated inverse and later updates + ! back to s_inv and later_upds + call copy_back_inv(Inverse, s_inv, lds, dim) + call copy_back_lu(Later_Updates, later_upds, lds, nupdates) info = QMCKL_SUCCESS -end function qmckl_slagel_splitting_doc_f + write(*,*) "Leaving 'qmckl_sm_splittinig_core_doc_f'" + +end function qmckl_sm_splitting_core_doc_f #+end_src **** C interface to the pedagogical kernel (not directly exposed) -The following Fortran function ~qmckl_slagel_splitting_doc~ makes sure -that the pedagogical kernel ~qmckl_slagel_splitting_doc_f~, written in -Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function -~qmckl_slagel_splitting_doc~ will be exposed in the header file 'qmckl.h' -for C users and in the module file 'qmckl_f.F90' for Fortran users. +The function ~qmckl_sm_splitting_core_doc~ makes sure that +~qmckl_sm_splitting_core_doc_f~ can be called from C using the +~ISO_C_BINDING~. Function ~qmckl_sm_splitting_core_doc~ will be +exposed in ~qmckl.h~ and ~qmckl_f.F90~, but +~qmckl_sm_splitting_core_doc_f~ will not. -#+CALL: generate_c_interface(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_slagel_splitting_doc") +#+CALL: generate_c_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_slagel_splitting_doc & - (LDS, & +integer(c_int32_t) function qmckl_sm_splitting_core_doc & + (context, & + LDS, & Dim, & N_updates, & Updates, & @@ -1251,21 +955,23 @@ integer(c_int32_t) function qmckl_slagel_splitting_doc & use, intrinsic :: iso_c_binding implicit none + integer (c_int64_t) , intent(in) , value :: context integer (c_int64_t) , intent(in) , value :: LDS integer (c_int64_t) , intent(in) , value :: Dim integer (c_int64_t) , intent(in) , value :: N_updates - real (c_double ) , intent(in) :: Updates(N_updates*LDS) + real (c_double ) , intent(in) :: Updates(LDS*N_updates) integer (c_int64_t) , intent(in) :: Updates_index(N_updates) real (c_double ) , intent(in) , value :: breakdown real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) - real (c_double ) , intent(inout) :: later_updates(N_updates*LDS) + real (c_double ) , intent(inout) :: later_updates(LDS*N_updates) integer (c_int64_t) , intent(inout) :: later_index(N_updates) integer (c_int64_t) , intent(inout) :: later real (c_double ) , intent(inout) :: determinant - integer(c_int32_t), external :: qmckl_slagel_splitting_doc_f - info = qmckl_slagel_splitting_doc_f & - (LDS, & + integer(c_int32_t), external :: qmckl_sm_splitting_core_doc_f + info = qmckl_sm_splitting_core_doc_f & + (context, & + LDS, & Dim, & N_updates, & Updates, & @@ -1277,15 +983,54 @@ integer(c_int32_t) function qmckl_slagel_splitting_doc & later, & determinant) -end function qmckl_slagel_splitting_doc +end function qmckl_sm_splitting_core_doc #+end_src *** C headers (exposed in qmckl.h) -#+CALL: generate_c_header(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org -qmckl_exit_code qmckl_slagel_splitting ( +qmckl_exit_code qmckl_sm_splitting_core ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* later_updates, + uint64_t* later_index, + uint64_t* later, + double* determinant ); +#+end_src + +#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_hpc") + +#+RESULTS: +#+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_sm_splitting_core_hpc ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* later_updates, + uint64_t* later_index, + uint64_t* later, + double* determinant ); +#+end_src + +#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_doc") + +#+RESULTS: +#+begin_src c :tangle (eval h_func) :no-expand comments org +qmckl_exit_code qmckl_sm_splitting_core_doc ( + const qmckl_context context, const uint64_t LDS, const uint64_t Dim, const uint64_t N_updates, @@ -1301,18 +1046,27 @@ qmckl_exit_code qmckl_slagel_splitting ( *** C sources #+begin_src c :tangle (eval c) :comments org -qmckl_exit_code qmckl_slagel_splitting_hpc( - uint64_t LDS, - uint64_t Dim, - uint64_t N_updates, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict later_updates, - uint64_t* __restrict later_index, - uint64_t* __restrict later, - double* __restrict determinant) { +qmckl_exit_code qmckl_sm_splitting_core_hpc( + const qmckl_context context, + uint64_t LDS, + uint64_t Dim, + uint64_t N_updates, + const double* __restrict Updates, + const uint64_t* __restrict Updates_index, + const double breakdown, + double* __restrict Slater_inv, + double* __restrict later_updates, + uint64_t* __restrict later_index, + uint64_t* __restrict later, + double* __restrict determinant) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( + context, + QMCKL_NULL_CONTEXT, + "qmckl_sm_splitting_core_hpc", + NULL); + } double __attribute__((aligned(8))) C[LDS]; double __attribute__((aligned(8))) D[LDS]; @@ -1378,16 +1132,25 @@ qmckl_exit_code qmckl_slagel_splitting_hpc( #+NAME:slagel_splitting_template_code #+begin_src c -static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}( - uint64_t N_updates, - const double* __restrict Updates, - const uint64_t* __restrict Updates_index, - const double breakdown, - double* __restrict Slater_inv, - double* __restrict later_updates, - uint64_t* __restrict later_index, - uint64_t* __restrict later, - double* __restrict determinant) { +static inline qmckl_exit_code qmckl_sm_splitting_core_{Dim}( + const qmckl_context context, + uint64_t N_updates, + const double* __restrict Updates, + const uint64_t* __restrict Updates_index, + const double breakdown, + double* __restrict Slater_inv, + double* __restrict later_updates, + uint64_t* __restrict later_index, + uint64_t* __restrict later, + double* __restrict determinant) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( + context, + QMCKL_NULL_CONTEXT, + "qmckl_sm_splitting_core_{Dim}", + NULL); + } double __attribute__((aligned(8))) C[D{Dim}_P]; double __attribute__((aligned(8))) D[D{Dim}_P]; @@ -1461,30 +1224,32 @@ for Dim in <>: Dim=str(Dim) result.append(text.replace("{Dim}",Dim) ) -return '\n'.join(result) +return ''.join(result) #+end_src #+NAME:slagel_splitting_switch-case_generator #+begin_src python :noweb yes text=""" -case {Dim}: - return qmckl_slagel_splitting_{Dim}( - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - later_updates, - later_index, - later, - determinant); -""" +case {Dim}: { + return qmckl_sm_splitting_core_{Dim}( + context, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + later_updates, + later_index, + later, + determinant); + break; +}""" result = [] for Dim in <>: Dim=str(Dim) result.append(text.replace("{Dim}",Dim) ) -return '\n'.join(result) +return ''.join(result) #+end_src #+begin_src c :tangle (eval c) :comments org :noweb yes @@ -1492,64 +1257,59 @@ return '\n'.join(result) #+end_src #+begin_src c :tangle (eval c) :comments org :noweb yes -qmckl_exit_code qmckl_slagel_splitting( - const uint64_t LDS, - const uint64_t Dim, - const uint64_t N_updates, - const double* Updates, - const uint64_t* Updates_index, - const double breakdown, - double* Slater_inv, - double* later_updates, - uint64_t* later_index, - uint64_t* later, - double* determinant) { +qmckl_exit_code qmckl_sm_splitting_core( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* later_updates, + uint64_t* later_index, + uint64_t* later, + double* determinant) { #ifdef HAVE_HPC if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases switch (Dim) { <> + default: { + assert(0 == 1 && "TEMPLATE NOT IMPLEMENTED!"); + break; + } } } else { // Updating smaller sub-matrix - return qmckl_slagel_splitting_hpc( - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - later_updates, - later_index, - later, - determinant); + return qmckl_sm_splitting_core_hpc( + context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + later_updates, + later_index, + later, + determinant); } #else - // return qmckl_slagel_splitting_doc( - // LDS, - // Dim, - // N_updates, - // Updates, - // Updates_index, - // breakdown, - // Slater_inv, - // later_updates, - // later_index, - // later, - // determinant); - return qmckl_slagel_splitting_hpc( - LDS, - Dim, - N_updates, - Updates, - Updates_index, - breakdown, - Slater_inv, - later_updates, - later_index, - later, - determinant); + return qmckl_sm_splitting_core_doc( + context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + later_updates, + later_index, + later, + determinant); #endif return QMCKL_FAILURE; @@ -1558,23 +1318,25 @@ qmckl_exit_code qmckl_slagel_splitting( *** Fortran interfaces (exposed in qmckl_f.F90) :PROPERTIES: -:Name: qmckl_slagel_splitting +:Name: qmckl_sm_splitting_core :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: -#+CALL: generate_f_interface(table=qmckl_slagel_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_slagel_splitting_hpc") +#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_core_hpc") #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none interface - integer(c_int32_t) function qmckl_slagel_splitting_hpc & - (LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, later_updates, later_index, later, determinant) & + integer(c_int32_t) function qmckl_sm_splitting_core_hpc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, & + Slater_inv, later_updates, later_index, later, determinant) & bind(C) use, intrinsic :: iso_c_binding import implicit none + integer (c_int64_t) , intent(in) , value :: context integer (c_int64_t) , intent(in) , value :: LDS integer (c_int64_t) , intent(in) , value :: Dim integer (c_int64_t) , intent(in) , value :: N_updates @@ -1587,22 +1349,24 @@ interface integer (c_int64_t) , intent(inout) :: later real (c_double ) , intent(inout) :: determinant - end function qmckl_slagel_splitting_hpc + end function qmckl_sm_splitting_core_hpc end interface #+end_src -#+CALL: generate_f_interface(table=qmckl_slagel_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_slagel_splitting_doc") +#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_core_doc") #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none interface - integer(c_int32_t) function qmckl_slagel_splitting_doc & - (LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, later_updates, later_index, later, determinant) & + integer(c_int32_t) function qmckl_sm_splitting_core_doc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, & + Slater_inv, later_updates, later_index, later, determinant) & bind(C) use, intrinsic :: iso_c_binding import implicit none + integer (c_int64_t) , intent(in) , value :: context integer (c_int64_t) , intent(in) , value :: LDS integer (c_int64_t) , intent(in) , value :: Dim integer (c_int64_t) , intent(in) , value :: N_updates @@ -1615,22 +1379,24 @@ interface integer (c_int64_t) , intent(inout) :: later real (c_double ) , intent(inout) :: determinant - end function qmckl_slagel_splitting_doc + end function qmckl_sm_splitting_core_doc end interface #+end_src -#+CALL: generate_f_interface(table=qmckl_slagel_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_slagel_splitting") +#+CALL: generate_f_interface(table=qmckl_sm_splitting_core_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_core") #+RESULTS: #+begin_src f90 :tangle (eval fh_func) :comments org :exports none interface - integer(c_int32_t) function qmckl_slagel_splitting & - (LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, later_updates, later_index, later, determinant) & + integer(c_int32_t) function qmckl_sm_splitting_core & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, & + Slater_inv, later_updates, later_index, later, determinant) & bind(C) use, intrinsic :: iso_c_binding import implicit none + integer (c_int64_t) , intent(in) , value :: context integer (c_int64_t) , intent(in) , value :: LDS integer (c_int64_t) , intent(in) , value :: Dim integer (c_int64_t) , intent(in) , value :: N_updates @@ -1643,7 +1409,7 @@ interface integer (c_int64_t) , intent(inout) :: later real (c_double ) , intent(inout) :: determinant - end function qmckl_slagel_splitting + end function qmckl_sm_splitting_core end interface #+end_src @@ -1652,6 +1418,470 @@ This function cannot be used by itself and is used in Sherman-Morrison with upda with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels. +* Sherman-Morrison with Slagel Splitting +** ~qmckl_sm_splitting~ +:PROPERTIES: +:Name: qmckl_sm_splitting +:CRetType: qmckl_exit_code +:FRetType: qmckl_exit_code +:END: + +*** Introduction +This is a variation on the 'Naive' Sherman-Morrison kernel. Whenever the denominator $1+v_j^T S^{-1} u_j$ in +the Sherman-Morrison formula is deemed to be too close to zero, the update $u_j$ is split in half: +$u_j \rightarrow \frac{1}{2} u_j$. One half is applied immediately --necessarily increasing the value of the +denominator because of the split-- while the other halve is put in a queue that will be applied when all the +remaining updates have been treated. + +The kernel is executed recursively until the queue is eiter empty and all +updates are applied successfully, or the size of the queue equals the number of initial updates. In the last +case the Slater-matrix that would have resulted from applying the updates is singular and therefore the +kernel exits with an exit code. + +If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting +from applying the updates to the original matrix. + +*** API +#+NAME: qmckl_sm_splitting_args +| Variable | Type | In/Out | Description | +|---------------+-----------------------+--------+------------------------------------------------------| +| context | qmckl_context | in | Global state | +| LDS | uint64_t | in | Leading dimension of Slater_inv | +| Dim | uint64_t | in | Dimension of Slater_inv | +| N_updates | uint64_t | in | Number of rank-1 updates to be applied to Slater_inv | +| Updates | double[N_updates*LDS] | in | Array containing the updates | +| Updates_index | uint64_t[N_updates] | in | Array containing the rank-1 updates | +| breakdown | double | in | Break-down parameter on which to fail or not | +| Slater_inv | double[Dim*LDS] | inout | Array containing the inverse of a Slater-matrix | +| determinant | double | inout | Determinant of the Slater-matrix | + +*** Requirements + * ~context~ is not ~QMCKL_NULL_CONTEXT~ + * ~LDS >= 2~ + * ~Dim >= 2~ + * ~N_updates >= 1~ + * ~Updates~ is allocated with $N_updates \times Dim$ elements + * ~Updates_index~ is allocated with $N_updates$ elements + * ~breakdown~ is a small number such that $0 < breakdown << 1$ + * ~Slater_inv~ is allocated with $Dim \times Dim$ elements + +*** Pedagogical kernel source (in Fortran) +The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is +able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore +not be used in real workloads. + +#+begin_src f90 :tangle (eval f) +integer recursive function qmckl_sm_splitting_doc_f( & + context, & + lds, dim, & + nupdates, & + upds, & + updates_index, & + breakdown, & + s_inv, & + determinant) result(info) + + use qmckl + implicit none + integer*8 , intent(in) :: context + integer*8 , intent(in) :: lds, dim + integer*8 , intent(in) :: nupdates + integer*8 , intent(in) :: updates_index(nupdates) + real*8 , intent(in) :: upds(lds * nupdates) + real*8 , intent(in) :: breakdown + real*8 , intent(inout) :: s_inv(dim * lds) + real*8 , intent(inout) :: determinant + + integer , external :: qmckl_sm_splitting_core_doc_f + + integer*8 :: Later + integer*8 , dimension(nupdates) :: Later_index + real*8 , dimension(lds * nupdates) :: Later_updates + + write(*,*) "Entering 'qmckl_sm_splitting_doc_f'" + + info = QMCKL_FAILURE + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + Later = 0 + Later_index = 0 + Later_updates = 0 + + info = qmckl_sm_splitting_core_doc_f( & + context, & + lds, dim, & + nupdates, & + upds, & + updates_index, & + breakdown, & + s_inv, & + Later_updates, & + Later_index, & + Later, & + determinant) + + if (Later > 0) then + info = qmckl_sm_splitting_doc_f( & + context, & + lds, dim, & + Later, & + Later_updates, & + Later_index, & + breakdown, & + s_inv, & + determinant) + end if + + info = QMCKL_SUCCESS + + write(*,*) "Leaving 'qmckl_sm_splitting_doc_f'" + +end function qmckl_sm_splitting_doc_f +#+end_src + +**** C interface to the pedagogical kernel (not directly exposed) +The following Fortran function ~qmckl_sm_splitting_core_doc~ makes sure +that the pedagogical kernel ~qmckl_sm_splitting_core_doc_f~, written in +Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function +~qmckl_sm_splitting_core_doc~ will be exposed in the header file 'qmckl.h' +for C users and in the module file 'qmckl_f.F90' for Fortran users. + +#+CALL: generate_c_interface(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_doc") + +#+RESULTS: +#+begin_src f90 :tangle (eval f) :comments org :exports none +integer(c_int32_t) function qmckl_sm_splitting_doc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(LDS*N_updates) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + integer(c_int32_t), external :: qmckl_sm_splitting_doc_f + + write(*,*) "Entering 'qmckl_sm_splitting_doc'" + + info = qmckl_sm_splitting_doc_f & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) + + write(*,*) "Leaving 'qmckl_sm_splitting_doc'" + +end function qmckl_sm_splitting_doc +#+end_src + +*** C headers (exposed in qmckl.h) + +#+CALL: generate_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + +#+RESULTS: +#+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_sm_splitting ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant ); +#+end_src + +#+CALL: generate_private_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_hpc") + +#+RESULTS: +#+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_sm_splitting_hpc ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant ); +#+end_src + +#+CALL: generate_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_doc") + +#+RESULTS: +#+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_sm_splitting_doc ( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant ); +#+end_src + +*** C source +#+NAME:splitting_switch-case_generator +#+begin_src python :noweb yes +text=""" +case {Dim}: { + rc = qmckl_sm_splitting_core_{Dim}( + context, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + later_updates, + later_index, &later, determinant); + break; +} +""" +result = [] +for Dim in <>: + Dim=str(Dim) + result.append(text.replace("{Dim}",Dim) ) + +return '\n'.join(result) +#+end_src + +#+begin_src c :tangle (eval c) :comments org :noweb yes +qmckl_exit_code qmckl_sm_splitting_hpc( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( + context, + QMCKL_NULL_CONTEXT, + "qmckl_sm_splitting_hpc", + NULL); + } + + double __attribute__((aligned(8))) later_updates[LDS * N_updates]; + uint64_t later_index[N_updates]; + uint64_t later = 0; + + qmckl_exit_code rc; + if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { + switch (Dim) { + <> + default: { + assert(0 == 1 && "TEMPLATE NOT IMPLEMENTED!"); + break; + } + } + } else { + rc = qmckl_sm_splitting_core_hpc( + context, LDS, Dim, N_updates, Updates, Updates_index, + breakdown, Slater_inv, later_updates, + later_index, &later, determinant); + } + if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; + + if (later > 0) { + qmckl_exit_code rc = qmckl_sm_splitting_hpc( + context, LDS, Dim, later, + later_updates, later_index, + breakdown, Slater_inv, determinant); + if (rc != QMCKL_SUCCESS) return QMCKL_FAILURE; + } + + return QMCKL_SUCCESS; +} +#+end_src + +#+begin_src c :tangle (eval c) :comment org +qmckl_exit_code qmckl_sm_splitting( + const qmckl_context context, + const uint64_t LDS, + const uint64_t Dim, + const uint64_t N_updates, + const double* Updates, + const uint64_t* Updates_index, + const double breakdown, + double* Slater_inv, + double* determinant) { + + printf("Entering 'qmckl_sm_splitting'\n"); + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( + context, + QMCKL_NULL_CONTEXT, + "qmckl_sm_splitting", + NULL); + } + #ifdef HAVE_HPC + return qmckl_sm_splitting_hpc( + context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + determinant); + #else + return qmckl_sm_splitting_doc( + context, + LDS, + Dim, + N_updates, + Updates, + Updates_index, + breakdown, + Slater_inv, + determinant); + #endif + + printf("Leaving 'qmckl_sm_splitting'\n"); + + return QMCKL_SUCCESS; +} +#+end_src + +*** Fortran interfaces (exposed in qmckl_f.F90) + :PROPERTIES: + :Name: qmckl_sm_naive + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + +#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_hpc") + +#+RESULTS: +#+begin_src f90 :tangle (eval fh_func) :comments org :exports none +interface + integer(c_int32_t) function qmckl_sm_splitting_hpc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + end function qmckl_sm_splitting_hpc +end interface +#+end_src + +#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting_doc") + +#+RESULTS: +#+begin_src f90 :tangle (eval fh_func) :comments org :exports none +interface + integer(c_int32_t) function qmckl_sm_splitting_doc & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + end function qmckl_sm_splitting_doc +end interface +#+end_src + +#+CALL: generate_f_interface(table=qmckl_sm_splitting_args,rettyp=get_value("FRetType"),fname="qmckl_sm_splitting") + +#+RESULTS: +#+begin_src f90 :tangle (eval fh_func) :comments org :exports none +interface + integer(c_int32_t) function qmckl_sm_splitting & + (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & + bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: LDS + integer (c_int64_t) , intent(in) , value :: Dim + integer (c_int64_t) , intent(in) , value :: N_updates + real (c_double ) , intent(in) :: Updates(N_updates*LDS) + integer (c_int64_t) , intent(in) :: Updates_index(N_updates) + real (c_double ) , intent(in) , value :: breakdown + real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS) + real (c_double ) , intent(inout) :: determinant + + end function qmckl_sm_splitting +end interface +#+end_src + +*** Performance... +This kernel performs best when there are 2 or more rank-1 update cycles and fail-rate is high. + +*** Test +#+begin_src c :tangle (eval c_test) +assert(Updates3 != NULL); +assert(Updates_index3 != NULL); +assert(Slater_inv3_2 != NULL); +det = -1.23743195512859e-09; +rc = qmckl_sm_splitting(context, LDS, Dim, N_updates3, Updates3, Updates_index3, breakdown, Slater_inv3_2, &det); +assert(fabs(det - 1.602708950725074e-10) < 1e-15); +for (unsigned int i = 0; i < Dim; i++) { + for (unsigned int j = 0; j < Dim; j++) { + res[i * Dim + j] = 0; + for (unsigned int k = 0; k < Dim; k++) { + res[i * Dim + j] += Slater3[i * Dim + k] * Slater_inv3_2[k * LDS + j]; + } + } +} +rc = QMCKL_SUCCESS; +for (unsigned int i = 0; i < Dim; i++) { + for (unsigned int j = 0; j < Dim; j++) { + if (i == j && fabs(res[i * Dim + j] - 1) > tolerance) { + rc = QMCKL_FAILURE; + } + if (i != j && fabs(res[i * Dim + j]) > tolerance) { + rc = QMCKL_FAILURE; + } + } +} +assert(rc == QMCKL_SUCCESS); +#+end_src + + + * End of files #+begin_src c :comments link :tangle (eval c_test)