From c07553480cd96cdf80b8fce771780082ea1cbb39 Mon Sep 17 00:00:00 2001 From: Francois Coppens Date: Wed, 15 Feb 2023 11:46:48 +0100 Subject: [PATCH] Pedagogical Naive kernel works. --- org/qmckl_sherman_morrison_woodbury.org | 139 +++++++++++++++++++++--- 1 file changed, 122 insertions(+), 17 deletions(-) diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 858da0f..b81128b 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -26,6 +26,7 @@ int main() { qmckl_exit_code rc; #+end_src +This is the range that determines the how many high performance kernel instantces will be generated, using the C-function templates defined in the sections below. If the name of the C-function template is called ~qmckl_kernel_{Dim}~, then ~range(K, L+1)~ will results in kernel instances from ~qmckl_kernel_K~ to ~qmckl_kernel_L~. #+NAME:kernel_generator_range #+begin_src python :noweb yes :exports none range(2, 22) @@ -43,6 +44,10 @@ This is the simplest of the available Sherman-Morrison-Woodbury kernels. It appl the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero. +#+TODO +Change the math notation so that the update vectors appear as row in the math +so that it is consistent with the representation in C (memory) + The formula for any update $u_j$ (index $j$ is suppresed for clarity) that is applied is \[ (S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u} @@ -85,48 +90,143 @@ The following source code written in Fortran is inteded to illustrate how the ke able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore not be used in real workloads. +#+begin_src f90 :tangle (eval f) :comment org :export none +subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim) + implicit none + integer*8 , intent(in) :: lds, dim, nupdates + real*8 , intent(in) :: upds(nupdates * lds) + real*8 , intent(in) :: s_inv(dim * lds) + real*8 , intent(out) , dimension(lds, nupdates) :: Updates + real*8 , intent(out) , dimension(dim, lds) :: Inverse + + integer*8 :: i, j + + ! Construct Updates: lds x nupdates + do i = 1, nupdates + do j = 1, lds + Updates(j, i) = upds((i - 1) * lds + j) + end do + end do + + ! Construct Inverse: dim x lds + do i = 1, dim + do j = 1, lds + Inverse(i, j) = s_inv((i - 1) * lds + j) + end do + end do +end subroutine convert +#+end_src + +#+begin_src f90 :tangle (eval f) :comment org :export none +subroutine copy_back(Inverse, s_inv, lds, dim) + implicit none + integer*8 , intent(in) :: lds, dim + real*8 , intent(in) , dimension(dim, lds) :: Inverse + real*8 , intent(out) :: s_inv(dim * lds) + + integer*8 :: i, j + + ! Copy updated inverse back to s_inv + do i = 1, dim + do j = 1, lds + s_inv((i - 1) * lds + j) = Inverse(i, j) + end do + end do +end subroutine copy_back +#+end_src + #+begin_src f90 :tangle (eval f) integer function qmckl_sherman_morrison_naive_doc_f(context, & - LDS, Dim, & - N_updates, & - Updates, & - Updates_index, & + lds, dim, & + nupdates, & + upds, & + updates_index, & breakdown, & - Slater_inv, & + s_inv, & determinant) result(info) use qmckl implicit none integer*8 , intent(in) :: context - integer*8 , intent(in) :: LDS, Dim - integer*8 , intent(in) :: N_updates - integer*8 , intent(in) :: Updates_index(N_updates) - real*8 , intent(in) :: Updates(N_updates*LDS) + integer*8 , intent(in) :: lds, dim + integer*8 , intent(in) :: nupdates + integer*8 , intent(in) :: updates_index(nupdates) + real*8 , intent(in) :: upds(nupdates * lds) real*8 , intent(in) :: breakdown - real*8 , intent(inout) :: Slater_inv(Dim*LDS) + real*8 , intent(inout) :: s_inv(dim * lds) real*8 , intent(inout) :: determinant - info = 0 + real*8 , dimension(lds, nupdates) :: Updates + real*8 , dimension(dim, lds) :: Inverse + real*8 , dimension(dim) :: C + real*8 , dimension(lds) :: D + real*8 :: denominator, idenominator, update + integer*8 :: i, j, l, row + + info = QMCKL_FAILURE if (context == QMCKL_NULL_CONTEXT) then info = QMCKL_INVALID_CONTEXT return endif - write(*,*) "Function 'qmckl_sherman_morrison_naive_doc_f' does nothing for now..." + call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim) + + l = 1; + ! For each update do... + do while (l < nupdates + 1) + + ! Compute C = S^{-1}U(l) + do i = 1, dim + C(i) = 0 + do j = 1, dim + C(i) = C(i) + Inverse(i, j) * Updates(j, l) + end do + end do + + ! Compute denominator = 1 + V(l)^TC + row = updates_index(l) + denominator = 1 + C(row) + + ! Return early if denominator is too small + if (abs(denominator) < breakdown) return + idenominator = 1 / denominator + + ! Update det(S) + determinant = determinant * denominator + + ! selecting column: v_l^T * S_inv + D = Inverse(row, :) + + ! A^{-1} = A^{-1} - C x D / denominator + do i = 1, dim + do j = 1, dim + update = C(i) * D(j) * idenominator + Inverse(i, j) = Inverse(i, j) - update + end do + end do + + l = l + 1 + end do + + ! Copy updated inverse back to s_inv + call copy_back(Inverse, s_inv, lds, dim) info = QMCKL_SUCCESS end function qmckl_sherman_morrison_naive_doc_f #+end_src -*** C interface to the pedagogical kernel -The following interface block in Fortran makes sure that the pedagogical kernel, -written in Fortran, can be called from C using the ~ISO_C_BINDING~. +*** C interface to the pedagogical kernel (not directly exposed) +The following Fortran function ~qmckl_sherman_morrison_naive_doc~ makes sure +that the pedagogical kernel ~qmckl_sherman_morrison_naive_doc_f~, written in +Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sherman_morrison_naive_doc~ will be exposed in the header file 'qmckl.h' +for C users and in the module file 'qmckl_f.F90' for Fortran users. #+CALL: generate_c_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc") -#+begin_src f90 :tangle (eval f) +#+RESULTS: +#+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & bind(C) result(info) @@ -151,6 +251,7 @@ integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & end function qmckl_sherman_morrison_naive_doc #+end_src + ** Requirements * ~context~ is not ~QMCKL_NULL_CONTEXT~ @@ -318,6 +419,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc( } #+end_src +~qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}~ is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively. #+NAME:naive_template_code #+begin_src c static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}( @@ -391,6 +493,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc( } #+end_src +This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances. #+NAME:naive_kernel_generator #+begin_src python :noweb yes :exports none text=""" @@ -404,6 +507,7 @@ for Dim in <>: return '\n'.join(result) #+end_src +Python script that generated C switch cases that call individual kernel instances. #+NAME:naive_switch-case_generator #+begin_src python :noweb yes :exports none text=""" @@ -419,11 +523,12 @@ case {Dim}: result = [] for Dim in <>: Dim=str(Dim) -result.append(text.replace("{Dim}",Dim) ) + result.append(text.replace("{Dim}",Dim) ) return '\n'.join(result) #+end_src +~qmckl_sherman_morrison_naive~ is the general function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~. #+begin_src c :tangle (eval c) :comments org :noweb yes <>