mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-05 11:00:36 +01:00
Added Slagel splitting back + pedagogical skeleton function and interface.
This commit is contained in:
parent
c07553480c
commit
54a51b6ecc
@ -34,12 +34,14 @@ range(2, 22)
|
|||||||
|
|
||||||
|
|
||||||
* Naïve Sherman-Morrison
|
* Naïve Sherman-Morrison
|
||||||
|
** ~qmckl_sherman_morrison_naive~
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
:Name: qmckl_sherman_morrison_naive
|
:Name: qmckl_sherman_morrison_naive
|
||||||
:CRetType: qmckl_exit_code
|
:CRetType: qmckl_exit_code
|
||||||
:FRetType: qmckl_exit_code
|
:FRetType: qmckl_exit_code
|
||||||
:END:
|
:END:
|
||||||
|
|
||||||
|
*** Introduction
|
||||||
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
||||||
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
||||||
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
||||||
@ -72,6 +74,7 @@ If $1+v_j^TS^{-1}u_j \leq \epsilon$ the update is rejected and the kernel exits
|
|||||||
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||||
from applying the updates to the original matrix.
|
from applying the updates to the original matrix.
|
||||||
|
|
||||||
|
*** API
|
||||||
#+NAME: qmckl_sherman_morrison_naive_args
|
#+NAME: qmckl_sherman_morrison_naive_args
|
||||||
| Variable | Type | In/Out | Description |
|
| Variable | Type | In/Out | Description |
|
||||||
|-----------------+-------------------------+--------+------------------------------------------------------|
|
|-----------------+-------------------------+--------+------------------------------------------------------|
|
||||||
@ -85,12 +88,12 @@ from applying the updates to the original matrix.
|
|||||||
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse of a Slater-matrix |
|
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse of a Slater-matrix |
|
||||||
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
||||||
|
|
||||||
** Pedagogical kernel source (in Fortran)
|
*** Pedagogical kernel source (in Fortran)
|
||||||
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
||||||
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
||||||
not be used in real workloads.
|
not be used in real workloads.
|
||||||
|
|
||||||
#+begin_src f90 :tangle (eval f) :comment org :export none
|
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
||||||
subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||||
implicit none
|
implicit none
|
||||||
integer*8 , intent(in) :: lds, dim, nupdates
|
integer*8 , intent(in) :: lds, dim, nupdates
|
||||||
@ -117,7 +120,7 @@ subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
|||||||
end subroutine convert
|
end subroutine convert
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
#+begin_src f90 :tangle (eval f) :comment org :export none
|
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
||||||
subroutine copy_back(Inverse, s_inv, lds, dim)
|
subroutine copy_back(Inverse, s_inv, lds, dim)
|
||||||
implicit none
|
implicit none
|
||||||
integer*8 , intent(in) :: lds, dim
|
integer*8 , intent(in) :: lds, dim
|
||||||
@ -170,6 +173,8 @@ integer function qmckl_sherman_morrison_naive_doc_f(context, &
|
|||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
||||||
|
! matrices 'Updates' and 'Inverse'.
|
||||||
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||||
|
|
||||||
l = 1;
|
l = 1;
|
||||||
@ -217,7 +222,7 @@ integer function qmckl_sherman_morrison_naive_doc_f(context, &
|
|||||||
end function qmckl_sherman_morrison_naive_doc_f
|
end function qmckl_sherman_morrison_naive_doc_f
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
*** C interface to the pedagogical kernel (not directly exposed)
|
**** C interface to the pedagogical kernel (not directly exposed)
|
||||||
The following Fortran function ~qmckl_sherman_morrison_naive_doc~ makes sure
|
The following Fortran function ~qmckl_sherman_morrison_naive_doc~ makes sure
|
||||||
that the pedagogical kernel ~qmckl_sherman_morrison_naive_doc_f~, written in
|
that the pedagogical kernel ~qmckl_sherman_morrison_naive_doc_f~, written in
|
||||||
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sherman_morrison_naive_doc~ will be exposed in the header file 'qmckl.h'
|
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sherman_morrison_naive_doc~ will be exposed in the header file 'qmckl.h'
|
||||||
@ -251,8 +256,7 @@ integer(c_int32_t) function qmckl_sherman_morrison_naive_doc &
|
|||||||
end function qmckl_sherman_morrison_naive_doc
|
end function qmckl_sherman_morrison_naive_doc
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
*** Requirements
|
||||||
** Requirements
|
|
||||||
|
|
||||||
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||||
* ~LDS >= 2~
|
* ~LDS >= 2~
|
||||||
@ -264,7 +268,7 @@ end function qmckl_sherman_morrison_naive_doc
|
|||||||
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||||
* ~determinant > 0~
|
* ~determinant > 0~
|
||||||
|
|
||||||
** C headers (exposed in qmckl.h)
|
*** C headers (exposed in qmckl.h)
|
||||||
|
|
||||||
#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
@ -314,7 +318,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_doc (
|
|||||||
double* determinant );
|
double* determinant );
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** C sources
|
*** C sources
|
||||||
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
||||||
#+begin_src c :tangle (eval c) :comments org
|
#+begin_src c :tangle (eval c) :comments org
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
@ -495,7 +499,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
|
|||||||
|
|
||||||
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
|
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
|
||||||
#+NAME:naive_kernel_generator
|
#+NAME:naive_kernel_generator
|
||||||
#+begin_src python :noweb yes :exports none
|
#+begin_src python :noweb yes
|
||||||
text="""
|
text="""
|
||||||
<<naive_template_code>>
|
<<naive_template_code>>
|
||||||
"""
|
"""
|
||||||
@ -509,7 +513,7 @@ return '\n'.join(result)
|
|||||||
|
|
||||||
Python script that generated C switch cases that call individual kernel instances.
|
Python script that generated C switch cases that call individual kernel instances.
|
||||||
#+NAME:naive_switch-case_generator
|
#+NAME:naive_switch-case_generator
|
||||||
#+begin_src python :noweb yes :exports none
|
#+begin_src python :noweb yes
|
||||||
text="""
|
text="""
|
||||||
case {Dim}:
|
case {Dim}:
|
||||||
return qmckl_sherman_morrison_naive_{Dim}(context,
|
return qmckl_sherman_morrison_naive_{Dim}(context,
|
||||||
@ -528,10 +532,12 @@ for Dim in <<kernel_generator_range>>:
|
|||||||
return '\n'.join(result)
|
return '\n'.join(result)
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
~qmckl_sherman_morrison_naive~ is the general function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~.
|
|
||||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||||
<<naive_kernel_generator()>>
|
<<naive_kernel_generator()>>
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
~qmckl_sherman_morrison_naive~ is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~.
|
||||||
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||||
qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
||||||
const uint64_t LDS,
|
const uint64_t LDS,
|
||||||
const uint64_t Dim,
|
const uint64_t Dim,
|
||||||
@ -583,7 +589,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
|||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** Fortran interfaces (exposed in qmckl_f.F90)
|
*** Fortran interfaces (exposed in qmckl_f.F90)
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
:Name: qmckl_sherman_morrison_naive
|
:Name: qmckl_sherman_morrison_naive
|
||||||
:CRetType: qmckl_exit_code
|
:CRetType: qmckl_exit_code
|
||||||
@ -668,7 +674,11 @@ interface
|
|||||||
end interface
|
end interface
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** Tests
|
*** Performance
|
||||||
|
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
||||||
|
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
||||||
|
can never resolve a situation where applying the update causes singular behaviour.
|
||||||
|
*** Tests
|
||||||
The tests for the kernels are executed on datasets that are extracted from a run of
|
The tests for the kernels are executed on datasets that are extracted from a run of
|
||||||
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
||||||
The tests are run such that the kernels reject the computed inverse whenever the computed
|
The tests are run such that the kernels reject the computed inverse whenever the computed
|
||||||
@ -728,6 +738,419 @@ assert(rc == QMCKL_SUCCESS);
|
|||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
* Helper Functions
|
||||||
|
Private helper-functions that are used by the Sherman-Morrison-Woodbury kernels.
|
||||||
|
These functions can only be used internally by the kernels in this module.
|
||||||
|
|
||||||
|
** ~qmckl_slagel_splitting~
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_slagel_splitting
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
*** Introduction
|
||||||
|
~qmckl_slagel_splitting~ is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel.
|
||||||
|
It is used internally to apply a collection of $N$ rank-1 updates to the inverse Slater-matrix $S^{-1}$ and
|
||||||
|
splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update,
|
||||||
|
while putting the second half in a waiting queue to be applied at the end.
|
||||||
|
|
||||||
|
Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$, the update is applied as usual. Otherwise, $u_j$ will be redefined
|
||||||
|
as $\frac{1}{2}u_j$. One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors
|
||||||
|
$u_{j'}=\frac{1}{2}u_j$ and $v_{j'}^T=v_{j}^T$, which are stored in the array \texttt{later_updates}.
|
||||||
|
|
||||||
|
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||||
|
from applying the updates to the original matrix.
|
||||||
|
|
||||||
|
*** API
|
||||||
|
#+NAME: qmckl_slagel_splitting_args
|
||||||
|
| Variable | Type | In/Out | Description |
|
||||||
|
|-----------------+-------------------------+--------+---------------------------------------------------------------|
|
||||||
|
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
||||||
|
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
||||||
|
| ~N_updates~ | ~uint64_t~ | in | Number of rank-1 updates to be applied to Slater_inv |
|
||||||
|
| ~Updates~ | ~double[N_updates*LDS]~ | in | Array containing the rank-1 updates |
|
||||||
|
| ~Updates_index~ | ~uint64_t[N_updates]~ | in | Array containing positions of the rank-1 updates |
|
||||||
|
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
||||||
|
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse Slater-matrix |
|
||||||
|
| ~later_updates~ | ~double[N_updates*LDS]~ | inout | Array containing the split updates for later |
|
||||||
|
| ~later_index~ | ~uint64_t[N_updates]~ | inout | Array containing the positions of the split updates for later |
|
||||||
|
| ~later~ | ~uint64_t~ | inout | Number of split updates for later |
|
||||||
|
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
||||||
|
|
||||||
|
*** Pedagogical kernel source (in Fortran)
|
||||||
|
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
||||||
|
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
||||||
|
not be used in real workloads.
|
||||||
|
|
||||||
|
#+begin_src f90 :tangle (eval f)
|
||||||
|
integer function qmckl_slagel_splitting_doc_f( &
|
||||||
|
lds, dim, &
|
||||||
|
nupdates, &
|
||||||
|
upds, &
|
||||||
|
updates_index, &
|
||||||
|
breakdown, &
|
||||||
|
s_inv, &
|
||||||
|
later_updates, &
|
||||||
|
later_index, &
|
||||||
|
later, &
|
||||||
|
determinant) result(info)
|
||||||
|
|
||||||
|
use qmckl
|
||||||
|
implicit none
|
||||||
|
integer*8 , intent(in) :: lds, dim
|
||||||
|
integer*8 , intent(in) :: nupdates
|
||||||
|
integer*8 , intent(in) :: updates_index(nupdates)
|
||||||
|
real*8 , intent(in) :: upds(nupdates * lds)
|
||||||
|
real*8 , intent(in) :: breakdown
|
||||||
|
real*8 , intent(inout) :: s_inv(dim * lds)
|
||||||
|
real*8 , intent(inout) :: later_updates(nupdates * lds)
|
||||||
|
integer*8 , intent(inout) :: later_index(nupdates)
|
||||||
|
integer*8 , intent(inout) :: later
|
||||||
|
real*8 , intent(inout) :: determinant
|
||||||
|
|
||||||
|
real*8 , dimension(lds, nupdates) :: Updates
|
||||||
|
real*8 , dimension(dim, lds) :: Inverse
|
||||||
|
|
||||||
|
info = QMCKL_FAILURE
|
||||||
|
|
||||||
|
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
||||||
|
! matrices 'Updates' and 'Inverse'.
|
||||||
|
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||||
|
|
||||||
|
! YET TO BE IMPLEMENTED
|
||||||
|
|
||||||
|
! Copy updated inverse back to s_inv
|
||||||
|
call copy_back(Inverse, s_inv, lds, dim)
|
||||||
|
|
||||||
|
info = QMCKL_SUCCESS
|
||||||
|
|
||||||
|
end function qmckl_slagel_splitting_doc_f
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
**** C interface to the pedagogical kernel (not directly exposed)
|
||||||
|
The following Fortran function ~qmckl_slagel_splitting_doc~ makes sure
|
||||||
|
that the pedagogical kernel ~qmckl_slagel_splitting_doc_f~, written in
|
||||||
|
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function
|
||||||
|
~qmckl_slagel_splitting_doc~ will be exposed in the header file 'qmckl.h'
|
||||||
|
for C users and in the module file 'qmckl_f.F90' for Fortran users.
|
||||||
|
|
||||||
|
#+CALL: generate_c_interface(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_slagel_splitting_doc")
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
||||||
|
integer(c_int32_t) function qmckl_slagel_splitting_doc &
|
||||||
|
(LDS, &
|
||||||
|
Dim, &
|
||||||
|
N_updates, &
|
||||||
|
Updates, &
|
||||||
|
Updates_index, &
|
||||||
|
breakdown, &
|
||||||
|
Slater_inv, &
|
||||||
|
later_updates, &
|
||||||
|
later_index, &
|
||||||
|
later, &
|
||||||
|
determinant) &
|
||||||
|
bind(C) result(info)
|
||||||
|
|
||||||
|
use, intrinsic :: iso_c_binding
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer (c_int64_t) , intent(in) , value :: LDS
|
||||||
|
integer (c_int64_t) , intent(in) , value :: Dim
|
||||||
|
integer (c_int64_t) , intent(in) , value :: N_updates
|
||||||
|
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
||||||
|
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
||||||
|
real (c_double ) , intent(in) , value :: breakdown
|
||||||
|
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
||||||
|
real (c_double ) , intent(inout) :: later_updates(N_updates*LDS)
|
||||||
|
integer (c_int64_t) , intent(inout) :: later_index(N_updates)
|
||||||
|
integer (c_int64_t) , intent(inout) :: later
|
||||||
|
real (c_double ) , intent(inout) :: determinant
|
||||||
|
|
||||||
|
integer(c_int32_t), external :: qmckl_slagel_splitting_doc_f
|
||||||
|
info = qmckl_slagel_splitting_doc_f &
|
||||||
|
(LDS, &
|
||||||
|
Dim, &
|
||||||
|
N_updates, &
|
||||||
|
Updates, &
|
||||||
|
Updates_index, &
|
||||||
|
breakdown, &
|
||||||
|
Slater_inv, &
|
||||||
|
later_updates, &
|
||||||
|
later_index, &
|
||||||
|
later, &
|
||||||
|
determinant)
|
||||||
|
|
||||||
|
end function qmckl_slagel_splitting_doc
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** Requirements
|
||||||
|
* ~LDS >= 2~
|
||||||
|
* ~Dim >= 2~
|
||||||
|
* ~N_updates >= 1~
|
||||||
|
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
||||||
|
* ~Updates_index~ is allocated with $N_updates$ elements
|
||||||
|
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||||
|
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||||
|
* ~later_updates~ is allocated with $later \times Dim$ elements
|
||||||
|
* ~later_index~ is allocated with $N_updates$ elements
|
||||||
|
* ~later >= 0~
|
||||||
|
|
||||||
|
*** C headers (exposed in qmckl.h)
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
qmckl_exit_code qmckl_slagel_splitting (
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const uint64_t N_updates,
|
||||||
|
const double* Updates,
|
||||||
|
const uint64_t* Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* Slater_inv,
|
||||||
|
double* later_updates,
|
||||||
|
uint64_t* later_index,
|
||||||
|
uint64_t* later,
|
||||||
|
double* determinant );
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** C source
|
||||||
|
#+begin_src c :tangle (eval c) :comments org
|
||||||
|
qmckl_exit_code qmckl_slagel_splitting_hpc(
|
||||||
|
uint64_t LDS,
|
||||||
|
uint64_t Dim,
|
||||||
|
uint64_t N_updates,
|
||||||
|
const double* __restrict Updates,
|
||||||
|
const uint64_t* __restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* __restrict Slater_inv,
|
||||||
|
double* __restrict later_updates,
|
||||||
|
uint64_t* __restrict later_index,
|
||||||
|
uint64_t* __restrict later,
|
||||||
|
double* __restrict determinant) {
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) C[LDS];
|
||||||
|
double __attribute__((aligned(8))) D[LDS];
|
||||||
|
|
||||||
|
uint64_t l = 0;
|
||||||
|
// For each update
|
||||||
|
while (l < N_updates) {
|
||||||
|
// C = S^{-1} x U_l
|
||||||
|
for (uint64_t i = 0; i < Dim; i++) {
|
||||||
|
C[i] = 0.0f;
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < LDS; j++) {
|
||||||
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Denominator
|
||||||
|
const int cui = Updates_index[l] - 1;
|
||||||
|
double den = 1.0f + C[cui];
|
||||||
|
if (fabs(den) < breakdown) {
|
||||||
|
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
||||||
|
// second halve in later_updates
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t i = 0; i < LDS; i++) {
|
||||||
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
||||||
|
C[i] *= 0.5f;
|
||||||
|
}
|
||||||
|
later_index[*later] = Updates_index[l];
|
||||||
|
(*later)++;
|
||||||
|
|
||||||
|
den = 1.0f + C[cui];
|
||||||
|
} // From here onwards we continue with applying the first halve of the
|
||||||
|
// update to Slater_inv
|
||||||
|
double iden = 1.0f / den;
|
||||||
|
|
||||||
|
if (determinant)
|
||||||
|
*determinant *= den;
|
||||||
|
|
||||||
|
// D = v^T x S^{-1} : 1 x LDS
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < LDS; j++) {
|
||||||
|
D[j] = Slater_inv[cui * LDS + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// S^{-1} = S^{-1} - C x D / den
|
||||||
|
for (uint64_t i = 0; i < Dim; i++) {
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < LDS; j++) {
|
||||||
|
const double update = C[i] * D[j] * iden;
|
||||||
|
Slater_inv[i * LDS + j] -= update;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME:slagel_splitting_template_code
|
||||||
|
#+begin_src c
|
||||||
|
static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}(
|
||||||
|
uint64_t N_updates,
|
||||||
|
const double* __restrict Updates,
|
||||||
|
const uint64_t* __restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* __restrict Slater_inv,
|
||||||
|
double* __restrict later_updates,
|
||||||
|
uint64_t* __restrict later_index,
|
||||||
|
uint64_t* __restrict later,
|
||||||
|
double* __restrict determinant) {
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) C[D{Dim}_P];
|
||||||
|
double __attribute__((aligned(8))) D[D{Dim}_P];
|
||||||
|
|
||||||
|
uint64_t l = 0;
|
||||||
|
// For each update
|
||||||
|
while (l < N_updates) {
|
||||||
|
// C = S^{-1} x U_l
|
||||||
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||||
|
C[i] = 0.0f;
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||||
|
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Denominator
|
||||||
|
const int cui = Updates_index[l] - 1;
|
||||||
|
double den = 1.0f + C[cui];
|
||||||
|
if (fabs(den) < breakdown) {
|
||||||
|
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
||||||
|
// second halve in later_updates
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t i = 0; i < D{Dim}_P; i++) {
|
||||||
|
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
||||||
|
C[i] *= 0.5f;
|
||||||
|
}
|
||||||
|
later_index[*later] = Updates_index[l];
|
||||||
|
(*later)++;
|
||||||
|
|
||||||
|
den = 1.0f + C[cui];
|
||||||
|
} // From here onwards we continue with applying the first halve of the
|
||||||
|
// update to Slater_inv
|
||||||
|
double iden = 1.0f / den;
|
||||||
|
|
||||||
|
if (determinant)
|
||||||
|
*determinant *= den;
|
||||||
|
|
||||||
|
// D = v^T x S^{-1} : 1 x D{Dim}_P
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||||
|
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// S^{-1} = S^{-1} - C x D / den
|
||||||
|
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||||
|
IVDEP
|
||||||
|
ALIGNED
|
||||||
|
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||||
|
const double update = C[i] * D[j] * iden;
|
||||||
|
Slater_inv[i * D{Dim}_P + j] -= update;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME:slagel_splitting_kernel_generator
|
||||||
|
#+begin_src python :noweb yes :exports none
|
||||||
|
text="""
|
||||||
|
<<slagel_splitting_template_code>>
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
for Dim in <<kernel_generator_range>>:
|
||||||
|
Dim=str(Dim)
|
||||||
|
result.append(text.replace("{Dim}",Dim) )
|
||||||
|
|
||||||
|
return '\n'.join(result)
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME:slagel_splitting_switch-case_generator
|
||||||
|
#+begin_src python :noweb yes :exports none
|
||||||
|
text="""
|
||||||
|
case {Dim}:
|
||||||
|
return qmckl_slagel_splitting_{Dim}(
|
||||||
|
N_updates,
|
||||||
|
Updates,
|
||||||
|
Updates_index,
|
||||||
|
breakdown,
|
||||||
|
Slater_inv,
|
||||||
|
later_updates,
|
||||||
|
later_index,
|
||||||
|
later,
|
||||||
|
determinant);
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
for Dim in <<kernel_generator_range>>:
|
||||||
|
Dim=str(Dim)
|
||||||
|
result.append(text.replace("{Dim}",Dim) )
|
||||||
|
|
||||||
|
return '\n'.join(result)
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||||
|
<<slagel_splitting_kernel_generator()>>
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||||
|
qmckl_exit_code qmckl_slagel_splitting(
|
||||||
|
const uint64_t LDS,
|
||||||
|
const uint64_t Dim,
|
||||||
|
const uint64_t N_updates,
|
||||||
|
const double* Updates,
|
||||||
|
const uint64_t* Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double* Slater_inv,
|
||||||
|
double* later_updates,
|
||||||
|
uint64_t* later_index,
|
||||||
|
uint64_t* later,
|
||||||
|
double* determinant) {
|
||||||
|
|
||||||
|
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||||
|
switch (Dim) {
|
||||||
|
<<slagel_splitting_switch-case_generator()>>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
||||||
|
return qmckl_slagel_splitting_hpc(
|
||||||
|
LDS,
|
||||||
|
Dim,
|
||||||
|
N_updates,
|
||||||
|
Updates,
|
||||||
|
Updates_index,
|
||||||
|
breakdown,
|
||||||
|
Slater_inv,
|
||||||
|
later_updates,
|
||||||
|
later_index,
|
||||||
|
later,
|
||||||
|
determinant);
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_FAILURE;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** Performance
|
||||||
|
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
||||||
|
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
||||||
|
|
||||||
* End of files
|
* End of files
|
||||||
|
|
||||||
#+begin_src c :comments link :tangle (eval c_test)
|
#+begin_src c :comments link :tangle (eval c_test)
|
||||||
|
Loading…
Reference in New Issue
Block a user