mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 10:06:09 +01:00
Added Slagel splitting back + pedagogical skeleton function and interface.
This commit is contained in:
parent
c07553480c
commit
54a51b6ecc
@ -34,12 +34,14 @@ range(2, 22)
|
||||
|
||||
|
||||
* Naïve Sherman-Morrison
|
||||
** ~qmckl_sherman_morrison_naive~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_sherman_morrison_naive
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
*** Introduction
|
||||
This is the simplest of the available Sherman-Morrison-Woodbury kernels. It applies rank-1 updates one by one in
|
||||
the order that is given. It only checks if the denominator in the Sherman-Morrison formula is not too close to
|
||||
zero when an update is evaluated. It will exit with an error code of the denominator is too close to zero.
|
||||
@ -72,6 +74,7 @@ If $1+v_j^TS^{-1}u_j \leq \epsilon$ the update is rejected and the kernel exits
|
||||
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||
from applying the updates to the original matrix.
|
||||
|
||||
*** API
|
||||
#+NAME: qmckl_sherman_morrison_naive_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|-----------------+-------------------------+--------+------------------------------------------------------|
|
||||
@ -85,12 +88,12 @@ from applying the updates to the original matrix.
|
||||
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse of a Slater-matrix |
|
||||
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
||||
|
||||
** Pedagogical kernel source (in Fortran)
|
||||
*** Pedagogical kernel source (in Fortran)
|
||||
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
||||
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
||||
not be used in real workloads.
|
||||
|
||||
#+begin_src f90 :tangle (eval f) :comment org :export none
|
||||
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
||||
subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||
implicit none
|
||||
integer*8 , intent(in) :: lds, dim, nupdates
|
||||
@ -117,7 +120,7 @@ subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||
end subroutine convert
|
||||
#+end_src
|
||||
|
||||
#+begin_src f90 :tangle (eval f) :comment org :export none
|
||||
#+begin_src f90 :tangle (eval f) :comment org :exports none
|
||||
subroutine copy_back(Inverse, s_inv, lds, dim)
|
||||
implicit none
|
||||
integer*8 , intent(in) :: lds, dim
|
||||
@ -170,6 +173,8 @@ integer function qmckl_sherman_morrison_naive_doc_f(context, &
|
||||
return
|
||||
endif
|
||||
|
||||
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
||||
! matrices 'Updates' and 'Inverse'.
|
||||
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||
|
||||
l = 1;
|
||||
@ -217,7 +222,7 @@ integer function qmckl_sherman_morrison_naive_doc_f(context, &
|
||||
end function qmckl_sherman_morrison_naive_doc_f
|
||||
#+end_src
|
||||
|
||||
*** C interface to the pedagogical kernel (not directly exposed)
|
||||
**** C interface to the pedagogical kernel (not directly exposed)
|
||||
The following Fortran function ~qmckl_sherman_morrison_naive_doc~ makes sure
|
||||
that the pedagogical kernel ~qmckl_sherman_morrison_naive_doc_f~, written in
|
||||
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function ~qmckl_sherman_morrison_naive_doc~ will be exposed in the header file 'qmckl.h'
|
||||
@ -251,8 +256,7 @@ integer(c_int32_t) function qmckl_sherman_morrison_naive_doc &
|
||||
end function qmckl_sherman_morrison_naive_doc
|
||||
#+end_src
|
||||
|
||||
|
||||
** Requirements
|
||||
*** Requirements
|
||||
|
||||
* ~context~ is not ~QMCKL_NULL_CONTEXT~
|
||||
* ~LDS >= 2~
|
||||
@ -264,7 +268,7 @@ end function qmckl_sherman_morrison_naive_doc
|
||||
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
* ~determinant > 0~
|
||||
|
||||
** C headers (exposed in qmckl.h)
|
||||
*** C headers (exposed in qmckl.h)
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
@ -314,7 +318,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_doc (
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
** C sources
|
||||
*** C sources
|
||||
Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
#include <stdbool.h>
|
||||
@ -495,7 +499,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
|
||||
|
||||
This is the kernel generator written in Python. It uses the kernel generator range and templates defined above to generate the C kernel instances.
|
||||
#+NAME:naive_kernel_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
#+begin_src python :noweb yes
|
||||
text="""
|
||||
<<naive_template_code>>
|
||||
"""
|
||||
@ -509,7 +513,7 @@ return '\n'.join(result)
|
||||
|
||||
Python script that generated C switch cases that call individual kernel instances.
|
||||
#+NAME:naive_switch-case_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
#+begin_src python :noweb yes
|
||||
text="""
|
||||
case {Dim}:
|
||||
return qmckl_sherman_morrison_naive_{Dim}(context,
|
||||
@ -528,10 +532,12 @@ for Dim in <<kernel_generator_range>>:
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
~qmckl_sherman_morrison_naive~ is the general function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~.
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
<<naive_kernel_generator()>>
|
||||
#+end_src
|
||||
|
||||
~qmckl_sherman_morrison_naive~ is a generic function that contains decision making logic that calls the proper kernel based on the used library configuration (~--enable-doc~ and ~--enable-hpc~) and the passed array dimensions ~LDS~ and ~Dim~.
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
@ -583,7 +589,7 @@ qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
||||
}
|
||||
#+end_src
|
||||
|
||||
** Fortran interfaces (exposed in qmckl_f.F90)
|
||||
*** Fortran interfaces (exposed in qmckl_f.F90)
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_sherman_morrison_naive
|
||||
:CRetType: qmckl_exit_code
|
||||
@ -668,7 +674,11 @@ interface
|
||||
end interface
|
||||
#+end_src
|
||||
|
||||
** Tests
|
||||
*** Performance
|
||||
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
||||
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
||||
can never resolve a situation where applying the update causes singular behaviour.
|
||||
*** Tests
|
||||
The tests for the kernels are executed on datasets that are extracted from a run of
|
||||
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
||||
The tests are run such that the kernels reject the computed inverse whenever the computed
|
||||
@ -728,6 +738,419 @@ assert(rc == QMCKL_SUCCESS);
|
||||
#+end_src
|
||||
|
||||
|
||||
* Helper Functions
|
||||
Private helper-functions that are used by the Sherman-Morrison-Woodbury kernels.
|
||||
These functions can only be used internally by the kernels in this module.
|
||||
|
||||
** ~qmckl_slagel_splitting~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_slagel_splitting
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
*** Introduction
|
||||
~qmckl_slagel_splitting~ is the non-recursive, inner part of the 'Sherman-Morrison with update splitting'-kernel.
|
||||
It is used internally to apply a collection of $N$ rank-1 updates to the inverse Slater-matrix $S^{-1}$ and
|
||||
splitting an update in two equal pieces if necessary. In case of a split, it applies the first half of the update,
|
||||
while putting the second half in a waiting queue to be applied at the end.
|
||||
|
||||
Therefore, when $1+v_j^TS^{-1}u_j \geq \epsilon$, the update is applied as usual. Otherwise, $u_j$ will be redefined
|
||||
as $\frac{1}{2}u_j$. One half is applied immediately, the other half will be applied at the end of the algorithm, using vectors
|
||||
$u_{j'}=\frac{1}{2}u_j$ and $v_{j'}^T=v_{j}^T$, which are stored in the array \texttt{later_updates}.
|
||||
|
||||
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||
from applying the updates to the original matrix.
|
||||
|
||||
*** API
|
||||
#+NAME: qmckl_slagel_splitting_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|-----------------+-------------------------+--------+---------------------------------------------------------------|
|
||||
| ~LDS~ | ~uint64_t~ | in | Leading dimension of Slater_inv |
|
||||
| ~Dim~ | ~uint64_t~ | in | Dimension of Slater_inv |
|
||||
| ~N_updates~ | ~uint64_t~ | in | Number of rank-1 updates to be applied to Slater_inv |
|
||||
| ~Updates~ | ~double[N_updates*LDS]~ | in | Array containing the rank-1 updates |
|
||||
| ~Updates_index~ | ~uint64_t[N_updates]~ | in | Array containing positions of the rank-1 updates |
|
||||
| ~breakdown~ | ~double~ | in | Break-down parameter on which to fail or not |
|
||||
| ~Slater_inv~ | ~double[Dim*LDS]~ | inout | Array containing the inverse Slater-matrix |
|
||||
| ~later_updates~ | ~double[N_updates*LDS]~ | inout | Array containing the split updates for later |
|
||||
| ~later_index~ | ~uint64_t[N_updates]~ | inout | Array containing the positions of the split updates for later |
|
||||
| ~later~ | ~uint64_t~ | inout | Number of split updates for later |
|
||||
| ~determinant~ | ~double~ | inout | Determinant of the Slater-matrix |
|
||||
|
||||
*** Pedagogical kernel source (in Fortran)
|
||||
The following source code written in Fortran is inteded to illustrate how the kernel works. Even though the kernel is
|
||||
able to do numerically correct computations, it does not do it in the most efficient way possible. It should therefore
|
||||
not be used in real workloads.
|
||||
|
||||
#+begin_src f90 :tangle (eval f)
|
||||
integer function qmckl_slagel_splitting_doc_f( &
|
||||
lds, dim, &
|
||||
nupdates, &
|
||||
upds, &
|
||||
updates_index, &
|
||||
breakdown, &
|
||||
s_inv, &
|
||||
later_updates, &
|
||||
later_index, &
|
||||
later, &
|
||||
determinant) result(info)
|
||||
|
||||
use qmckl
|
||||
implicit none
|
||||
integer*8 , intent(in) :: lds, dim
|
||||
integer*8 , intent(in) :: nupdates
|
||||
integer*8 , intent(in) :: updates_index(nupdates)
|
||||
real*8 , intent(in) :: upds(nupdates * lds)
|
||||
real*8 , intent(in) :: breakdown
|
||||
real*8 , intent(inout) :: s_inv(dim * lds)
|
||||
real*8 , intent(inout) :: later_updates(nupdates * lds)
|
||||
integer*8 , intent(inout) :: later_index(nupdates)
|
||||
integer*8 , intent(inout) :: later
|
||||
real*8 , intent(inout) :: determinant
|
||||
|
||||
real*8 , dimension(lds, nupdates) :: Updates
|
||||
real*8 , dimension(dim, lds) :: Inverse
|
||||
|
||||
info = QMCKL_FAILURE
|
||||
|
||||
! Convert 'upds' and 's_inv' into the more easily readable Fortran
|
||||
! matrices 'Updates' and 'Inverse'.
|
||||
call convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||
|
||||
! YET TO BE IMPLEMENTED
|
||||
|
||||
! Copy updated inverse back to s_inv
|
||||
call copy_back(Inverse, s_inv, lds, dim)
|
||||
|
||||
info = QMCKL_SUCCESS
|
||||
|
||||
end function qmckl_slagel_splitting_doc_f
|
||||
#+end_src
|
||||
|
||||
**** C interface to the pedagogical kernel (not directly exposed)
|
||||
The following Fortran function ~qmckl_slagel_splitting_doc~ makes sure
|
||||
that the pedagogical kernel ~qmckl_slagel_splitting_doc_f~, written in
|
||||
Fortran, can be called from C using the ~ISO_C_BINDING~. The Fortran function
|
||||
~qmckl_slagel_splitting_doc~ will be exposed in the header file 'qmckl.h'
|
||||
for C users and in the module file 'qmckl_f.F90' for Fortran users.
|
||||
|
||||
#+CALL: generate_c_interface(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_slagel_splitting_doc")
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval f) :comments org :exports none
|
||||
integer(c_int32_t) function qmckl_slagel_splitting_doc &
|
||||
(LDS, &
|
||||
Dim, &
|
||||
N_updates, &
|
||||
Updates, &
|
||||
Updates_index, &
|
||||
breakdown, &
|
||||
Slater_inv, &
|
||||
later_updates, &
|
||||
later_index, &
|
||||
later, &
|
||||
determinant) &
|
||||
bind(C) result(info)
|
||||
|
||||
use, intrinsic :: iso_c_binding
|
||||
implicit none
|
||||
|
||||
integer (c_int64_t) , intent(in) , value :: LDS
|
||||
integer (c_int64_t) , intent(in) , value :: Dim
|
||||
integer (c_int64_t) , intent(in) , value :: N_updates
|
||||
real (c_double ) , intent(in) :: Updates(N_updates*LDS)
|
||||
integer (c_int64_t) , intent(in) :: Updates_index(N_updates)
|
||||
real (c_double ) , intent(in) , value :: breakdown
|
||||
real (c_double ) , intent(inout) :: Slater_inv(Dim*LDS)
|
||||
real (c_double ) , intent(inout) :: later_updates(N_updates*LDS)
|
||||
integer (c_int64_t) , intent(inout) :: later_index(N_updates)
|
||||
integer (c_int64_t) , intent(inout) :: later
|
||||
real (c_double ) , intent(inout) :: determinant
|
||||
|
||||
integer(c_int32_t), external :: qmckl_slagel_splitting_doc_f
|
||||
info = qmckl_slagel_splitting_doc_f &
|
||||
(LDS, &
|
||||
Dim, &
|
||||
N_updates, &
|
||||
Updates, &
|
||||
Updates_index, &
|
||||
breakdown, &
|
||||
Slater_inv, &
|
||||
later_updates, &
|
||||
later_index, &
|
||||
later, &
|
||||
determinant)
|
||||
|
||||
end function qmckl_slagel_splitting_doc
|
||||
#+end_src
|
||||
|
||||
*** Requirements
|
||||
* ~LDS >= 2~
|
||||
* ~Dim >= 2~
|
||||
* ~N_updates >= 1~
|
||||
* ~Updates~ is allocated with $N_updates \times Dim$ elements
|
||||
* ~Updates_index~ is allocated with $N_updates$ elements
|
||||
* ~breakdown~ is a small number such that $0 < breakdown << 1$
|
||||
* ~Slater_inv~ is allocated with $Dim \times Dim$ elements
|
||||
* ~later_updates~ is allocated with $later \times Dim$ elements
|
||||
* ~later_index~ is allocated with $N_updates$ elements
|
||||
* ~later >= 0~
|
||||
|
||||
*** C headers (exposed in qmckl.h)
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_slagel_splitting_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_slagel_splitting (
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
qmckl_exit_code qmckl_slagel_splitting_hpc(
|
||||
uint64_t LDS,
|
||||
uint64_t Dim,
|
||||
uint64_t N_updates,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict later_updates,
|
||||
uint64_t* __restrict later_index,
|
||||
uint64_t* __restrict later,
|
||||
double* __restrict determinant) {
|
||||
|
||||
double __attribute__((aligned(8))) C[LDS];
|
||||
double __attribute__((aligned(8))) D[LDS];
|
||||
|
||||
uint64_t l = 0;
|
||||
// For each update
|
||||
while (l < N_updates) {
|
||||
// C = S^{-1} x U_l
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
C[i] = 0.0f;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
|
||||
}
|
||||
}
|
||||
|
||||
// Denominator
|
||||
const int cui = Updates_index[l] - 1;
|
||||
double den = 1.0f + C[cui];
|
||||
if (fabs(den) < breakdown) {
|
||||
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
||||
// second halve in later_updates
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t i = 0; i < LDS; i++) {
|
||||
later_updates[*later * LDS + i] = Updates[l * LDS + i] * 0.5f;
|
||||
C[i] *= 0.5f;
|
||||
}
|
||||
later_index[*later] = Updates_index[l];
|
||||
(*later)++;
|
||||
|
||||
den = 1.0f + C[cui];
|
||||
} // From here onwards we continue with applying the first halve of the
|
||||
// update to Slater_inv
|
||||
double iden = 1.0f / den;
|
||||
|
||||
if (determinant)
|
||||
*determinant *= den;
|
||||
|
||||
// D = v^T x S^{-1} : 1 x LDS
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
D[j] = Slater_inv[cui * LDS + j];
|
||||
}
|
||||
|
||||
// S^{-1} = S^{-1} - C x D / den
|
||||
for (uint64_t i = 0; i < Dim; i++) {
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < LDS; j++) {
|
||||
const double update = C[i] * D[j] * iden;
|
||||
Slater_inv[i * LDS + j] -= update;
|
||||
}
|
||||
}
|
||||
l += 1;
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
#+NAME:slagel_splitting_template_code
|
||||
#+begin_src c
|
||||
static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}(
|
||||
uint64_t N_updates,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict later_updates,
|
||||
uint64_t* __restrict later_index,
|
||||
uint64_t* __restrict later,
|
||||
double* __restrict determinant) {
|
||||
|
||||
double __attribute__((aligned(8))) C[D{Dim}_P];
|
||||
double __attribute__((aligned(8))) D[D{Dim}_P];
|
||||
|
||||
uint64_t l = 0;
|
||||
// For each update
|
||||
while (l < N_updates) {
|
||||
// C = S^{-1} x U_l
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
C[i] = 0.0f;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
||||
}
|
||||
}
|
||||
|
||||
// Denominator
|
||||
const int cui = Updates_index[l] - 1;
|
||||
double den = 1.0f + C[cui];
|
||||
if (fabs(den) < breakdown) {
|
||||
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
||||
// second halve in later_updates
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t i = 0; i < D{Dim}_P; i++) {
|
||||
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
||||
C[i] *= 0.5f;
|
||||
}
|
||||
later_index[*later] = Updates_index[l];
|
||||
(*later)++;
|
||||
|
||||
den = 1.0f + C[cui];
|
||||
} // From here onwards we continue with applying the first halve of the
|
||||
// update to Slater_inv
|
||||
double iden = 1.0f / den;
|
||||
|
||||
if (determinant)
|
||||
*determinant *= den;
|
||||
|
||||
// D = v^T x S^{-1} : 1 x D{Dim}_P
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
||||
}
|
||||
|
||||
// S^{-1} = S^{-1} - C x D / den
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
const double update = C[i] * D[j] * iden;
|
||||
Slater_inv[i * D{Dim}_P + j] -= update;
|
||||
}
|
||||
}
|
||||
l += 1;
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
#+NAME:slagel_splitting_kernel_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
<<slagel_splitting_template_code>>
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
#+NAME:slagel_splitting_switch-case_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
case {Dim}:
|
||||
return qmckl_slagel_splitting_{Dim}(
|
||||
N_updates,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
later_updates,
|
||||
later_index,
|
||||
later,
|
||||
determinant);
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
<<slagel_splitting_kernel_generator()>>
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
qmckl_exit_code qmckl_slagel_splitting(
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant) {
|
||||
|
||||
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||
switch (Dim) {
|
||||
<<slagel_splitting_switch-case_generator()>>
|
||||
}
|
||||
}
|
||||
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
||||
return qmckl_slagel_splitting_hpc(
|
||||
LDS,
|
||||
Dim,
|
||||
N_updates,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
later_updates,
|
||||
later_index,
|
||||
later,
|
||||
determinant);
|
||||
}
|
||||
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Performance
|
||||
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
||||
with Sherman-Morrison and update splitting. Please look at the performance reccomendations for those two kernels.
|
||||
|
||||
* End of files
|
||||
|
||||
#+begin_src c :comments link :tangle (eval c_test)
|
||||
|
Loading…
Reference in New Issue
Block a user