1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-31 16:46:03 +01:00

Added Slagel Splitting kernel template generator.

This commit is contained in:
Francois Coppens 2023-01-27 19:33:27 +01:00
parent 6c0430a509
commit 31ea30cdc3

View File

@ -1762,33 +1762,34 @@ These functions can only be used internally by the kernels in this module.
#+RESULTS: #+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org #+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_slagel_splitting ( qmckl_exit_code qmckl_slagel_splitting (
const uint64_t LDS, const uint64_t LDS,
const uint64_t Dim, const uint64_t Dim,
const uint64_t N_updates, const uint64_t N_updates,
const double* Updates, const double* Updates,
const uint64_t* Updates_index, const uint64_t* Updates_index,
const double breakdown, const double breakdown,
double* Slater_inv, double* Slater_inv,
double* later_updates, double* later_updates,
uint64_t* later_index, uint64_t* later_index,
uint64_t* later, uint64_t* later,
double* determinant); double* determinant);
#+end_src #+end_src
*** C source *** C source
#+begin_src c :tangle (eval c) :comments org #+begin_src c :tangle (eval c) :comments org
qmckl_exit_code qmckl_slagel_splitting(uint64_t LDS, qmckl_exit_code qmckl_slagel_splitting_hpc(
uint64_t Dim, uint64_t LDS,
uint64_t N_updates, uint64_t Dim,
const double* Updates, uint64_t N_updates,
const uint64_t* Updates_index, const double* __restrict Updates,
const double breakdown, const uint64_t* __restrict Updates_index,
double* Slater_inv, const double breakdown,
double* later_updates, double* __restrict Slater_inv,
uint64_t* later_index, double* __restrict later_updates,
uint64_t* later, uint64_t* __restrict later_index,
double* determinant) { uint64_t* __restrict later,
double* __restrict determinant) {
double __attribute__((aligned(8))) C[LDS]; double __attribute__((aligned(8))) C[LDS];
double __attribute__((aligned(8))) D[LDS]; double __attribute__((aligned(8))) D[LDS];
@ -1850,9 +1851,164 @@ qmckl_exit_code qmckl_slagel_splitting(uint64_t LDS,
return QMCKL_SUCCESS; return QMCKL_SUCCESS;
} }
#+end_src #+end_src
#+NAME:slagel_splitting_template_code
#+begin_src c
static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}(
uint64_t N_updates,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* __restrict Slater_inv,
double* __restrict later_updates,
uint64_t* __restrict later_index,
uint64_t* __restrict later,
double* __restrict determinant) {
double __attribute__((aligned(8))) C[D{Dim}_P];
double __attribute__((aligned(8))) D[D{Dim}_P];
uint64_t l = 0;
// For each update
while (l < N_updates) {
// C = S^{-1} x U_l
for (uint64_t i = 0; i < {Dim}; i++) {
C[i] = 0.0f;
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
}
}
// Denominator
const int cui = Updates_index[l] - 1;
double den = 1.0f + C[cui];
if (fabs(den) < breakdown) {
// U_l = U_l / 2: split the update in 2 equal halves and save the
// second halve in later_updates
IVDEP
ALIGNED
for (uint64_t i = 0; i < D{Dim}_P; i++) {
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
C[i] *= 0.5f;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0f + C[cui];
} // From here onwards we continue with applying the first halve of the
// update to Slater_inv
double iden = 1.0f / den;
if (determinant)
*determinant *= den;
// D = v^T x S^{-1} : 1 x D{Dim}_P
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
D[j] = Slater_inv[cui * D{Dim}_P + j];
}
// S^{-1} = S^{-1} - C x D / den
for (uint64_t i = 0; i < {Dim}; i++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
const double update = C[i] * D[j] * iden;
Slater_inv[i * D{Dim}_P + j] -= update;
}
}
l += 1;
}
return QMCKL_SUCCESS;
}
#+end_src
#+NAME:slagel_splitting_kernel_generator
#+begin_src python :noweb yes :exports none
text="""
<<slagel_splitting_template_code>>
"""
result = []
for Dim in <<kernel_generator_range>>:
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return '\n'.join(result)
#+end_src
#+NAME:slagel_splitting_switch-case_generator
#+begin_src python :noweb yes :exports none
text="""
case {Dim}:
return qmckl_slagel_splitting_{Dim}(
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
"""
result = []
for Dim in <<kernel_generator_range>>:
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return '\n'.join(result)
#+end_src
#+begin_src c :tangle (eval c) :comments org :noweb yes
<<slagel_splitting_kernel_generator()>>
qmckl_exit_code qmckl_slagel_splitting(
const uint64_t LDS,
const uint64_t Dim,
const uint64_t N_updates,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* later_updates,
uint64_t* later_index,
uint64_t* later,
double* determinant) {
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
switch (Dim) {
<<slagel_splitting_switch-case_generator()>>
}
}
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
return qmckl_slagel_splitting_hpc(
LDS,
Dim,
N_updates,
Updates,
Updates_index,
breakdown,
Slater_inv,
later_updates,
later_index,
later,
determinant);
}
return QMCKL_FAILURE;
}
#+end_src
*** Performance *** Performance
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2 This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2