1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-05 11:00:36 +01:00

Pedagogical Naive kernel works.

This commit is contained in:
Francois Coppens 2023-02-15 11:46:48 +01:00
parent 87d6acb49a
commit c07553480c

View File

@ -26,6 +26,7 @@ int main() {
qmckl_exit_code rc; qmckl_exit_code rc;
#+end_src #+end_src
This is the range that determines the how many high performance kernel instantces will be generated, using the C-function templates defined in the sections below. If the name of the C-function template is called ~qmckl_kernel_{Dim}~, then ~range(K, L+1)~ will results in kernel instances from ~qmckl_kernel_K~ to ~qmckl_kernel_L~.
#+NAME:kernel_generator_range #+NAME:kernel_generator_range
#+begin_src python :noweb yes :exports none #+begin_src python :noweb yes :exports none
range(2, 22) range(2, 22)
@ -43,6 +44,10 @@ This is the simplest of the available Sherman-Morrison-Woodbury kernels. It appl
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero. zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
#+TODO
Change the math notation so that the update vectors appear as row in the math
so that it is consistent with the representation in C (memory)
The formula for any update $u_j$ (index $j$ is suppresed for clarity) that is applied is The formula for any update $u_j$ (index $j$ is suppresed for clarity) that is applied is
\[ \[
(S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u} (S + uv^T)^{-1} = S^{-1} - \frac{S^{-1} uv^T S^{-1}}{1 + v^T S^{-1} u}
@ -85,48 +90,143 @@ The following source code written in Fortran is inteded to illustrate how the ke
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
not be used in real workloads. not be used in real workloads.
#+begin_src f90 :tangle (eval f) :comment org :export none
subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
implicit none
integer*8 , intent(in) :: lds, dim, nupdates
real*8 , intent(in) :: upds(nupdates * lds)
real*8 , intent(in) :: s_inv(dim * lds)
real*8 , intent(out) , dimension(lds, nupdates) :: Updates
real*8 , intent(out) , dimension(dim, lds) :: Inverse
integer*8 :: i, j
! Construct Updates: lds x nupdates
do i = 1, nupdates
do j = 1, lds
Updates(j, i) = upds((i - 1) * lds + j)
end do
end do
! Construct Inverse: dim x lds
do i = 1, dim
do j = 1, lds
Inverse(i, j) = s_inv((i - 1) * lds + j)
end do
end do
end subroutine convert
#+end_src
#+begin_src f90 :tangle (eval f) :comment org :export none
subroutine copy_back(Inverse, s_inv, lds, dim)
implicit none
integer*8 , intent(in) :: lds, dim
real*8 , intent(in) , dimension(dim, lds) :: Inverse
real*8 , intent(out) :: s_inv(dim * lds)
integer*8 :: i, j
! Copy updated inverse back to s_inv
do i = 1, dim
do j = 1, lds
s_inv((i - 1) * lds + j) = Inverse(i, j)
end do
end do
end subroutine copy_back
#+end_src
#+begin_src f90 :tangle (eval f) #+begin_src f90 :tangle (eval f)
integer function qmckl_sherman_morrison_naive_doc_f(context, & integer function qmckl_sherman_morrison_naive_doc_f(context, &
LDS, Dim, & lds, dim, &
N_updates, & nupdates, &
Updates, & upds, &
Updates_index, & updates_index, &
breakdown, & breakdown, &
Slater_inv, & s_inv, &
determinant) result(info) determinant) result(info)
use qmckl use qmckl
implicit none implicit none
integer*8 , intent(in) :: context integer*8 , intent(in) :: context
integer*8 , intent(in) :: LDS, Dim integer*8 , intent(in) :: lds, dim
integer*8 , intent(in) :: N_updates integer*8 , intent(in) :: nupdates
integer*8 , intent(in) :: Updates_index(N_updates) integer*8 , intent(in) :: updates_index(nupdates)
real*8 , intent(in) :: Updates(N_updates*LDS) real*8 , intent(in) :: upds(nupdates * lds)
real*8 , intent(in) :: breakdown real*8 , intent(in) :: breakdown
real*8 , intent(inout) :: Slater_inv(Dim*LDS) real*8 , intent(inout) :: s_inv(dim * lds)
real*8 , intent(inout) :: determinant real*8 , intent(inout) :: determinant
info = 0 real*8 , dimension(lds, nupdates) :: Updates
real*8 , dimension(dim, lds) :: Inverse
real*8 , dimension(dim) :: C
real*8 , dimension(lds) :: D
real*8 :: denominator, idenominator, update
integer*8 :: i, j, l, row
info = QMCKL_FAILURE
if (context == QMCKL_NULL_CONTEXT) then if (context == QMCKL_NULL_CONTEXT) then
info = QMCKL_INVALID_CONTEXT info = QMCKL_INVALID_CONTEXT
return return
endif endif
write(*,*) "Function 'qmckl_sherman_morrison_naive_doc_f' does nothing for now..." call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
l = 1;
! For each update do...
do while (l < nupdates + 1)
! Compute C = S^{-1}U(l)
do i = 1, dim
C(i) = 0
do j = 1, dim
C(i) = C(i) + Inverse(i, j) * Updates(j, l)
end do
end do
! Compute denominator = 1 + V(l)^TC
row = updates_index(l)
denominator = 1 + C(row)
! Return early if denominator is too small
if (abs(denominator) < breakdown) return
idenominator = 1 / denominator
! Update det(S)
determinant = determinant * denominator
! selecting column: v_l^T * S_inv
D = Inverse(row, :)
! A^{-1} = A^{-1} - C x D / denominator
do i = 1, dim
do j = 1, dim
update = C(i) * D(j) * idenominator
Inverse(i, j) = Inverse(i, j) - update
end do
end do
l = l + 1
end do
! Copy updated inverse back to s_inv
call copy_back(Inverse, s_inv, lds, dim)
info = QMCKL_SUCCESS info = QMCKL_SUCCESS
end function qmckl_sherman_morrison_naive_doc_f end function qmckl_sherman_morrison_naive_doc_f
#+end_src #+end_src
*** C interface to the pedagogical kernel *** C interface to the pedagogical kernel (not directly exposed)
The following interface block in Fortran makes sure that the pedagogical kernel, The following Fortran function ~qmckl_sherman_morrison_naive_doc~ makes sure
written in Fortran, can be called from C using the ~ISO_C_BINDING~. that the pedagogical kernel ~qmckl_sherman_morrison_naive_doc_f~, written in
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sherman_morrison_naive_doc~ will be exposed in the header file 'qmckl.h'
for C users and in the module file 'qmckl_f.F90' for Fortran users.
#+CALL: generate_c_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc") #+CALL: generate_c_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sherman_morrison_naive_doc")
#+begin_src f90 :tangle (eval f) #+RESULTS:
#+begin_src f90 :tangle (eval f) :comments org :exports none
integer(c_int32_t) function qmckl_sherman_morrison_naive_doc & integer(c_int32_t) function qmckl_sherman_morrison_naive_doc &
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) & (context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C) result(info) bind(C) result(info)
@ -151,6 +251,7 @@ integer(c_int32_t) function qmckl_sherman_morrison_naive_doc &
end function qmckl_sherman_morrison_naive_doc end function qmckl_sherman_morrison_naive_doc
#+end_src #+end_src
** Requirements ** Requirements
* ~context~ is not ~QMCKL_NULL_CONTEXT~ * ~context~ is not ~QMCKL_NULL_CONTEXT~
@ -318,6 +419,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
} }
#+end_src #+end_src
~qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}~ is a C function-template that is used to genereate instances of C fucntions based on the range given above. The advantage of this method is that for each of these instances all the dimensions and loop-bounds are known at compile time, allowing the compiler to optimize more aggressively.
#+NAME:naive_template_code #+NAME:naive_template_code
#+begin_src c #+begin_src c
static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}( static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}(
@ -391,6 +493,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
} }
#+end_src #+end_src
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
#+NAME:naive_kernel_generator #+NAME:naive_kernel_generator
#+begin_src python :noweb yes :exports none #+begin_src python :noweb yes :exports none
text=""" text="""
@ -404,6 +507,7 @@ for Dim in <<kernel_generator_range>>:
return '\n'.join(result) return '\n'.join(result)
#+end_src #+end_src
Python script that generated C switch cases that call individual kernel instances.
#+NAME:naive_switch-case_generator #+NAME:naive_switch-case_generator
#+begin_src python :noweb yes :exports none #+begin_src python :noweb yes :exports none
text=""" text="""
@ -424,6 +528,7 @@ result.append(text.replace("{Dim}",Dim) )
return '\n'.join(result) return '\n'.join(result)
#+end_src #+end_src
~qmckl_sherman_morrison_naive~ is the general function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~.
#+begin_src c :tangle (eval c) :comments org :noweb yes #+begin_src c :tangle (eval c) :comments org :noweb yes
<<naive_kernel_generator()>> <<naive_kernel_generator()>>