mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-03 20:54:09 +01:00
Merge pull request #101 from fmgjcoppens/master
Woodbury 2x2, 3x3 & Slagel Splitting kernel template generator
This commit is contained in:
commit
30c3e48d91
@ -26,11 +26,16 @@ int main() {
|
||||
qmckl_exit_code rc;
|
||||
#+end_src
|
||||
|
||||
#+NAME:kernel_generator_range
|
||||
#+begin_src python :noweb yes :exports none
|
||||
range(2, 22)
|
||||
#+end_src
|
||||
|
||||
* Naïve Sherman-Morrison
|
||||
|
||||
** ~qmckl_sherman_morrison~
|
||||
** ~qmckl_sherman_morrison_naive~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_sherman_morrison
|
||||
:Name: qmckl_sherman_morrison_naive
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
@ -63,7 +68,7 @@ int main() {
|
||||
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
|
||||
from applying the updates to the original matrix.
|
||||
|
||||
#+NAME: qmckl_sherman_morrison_args
|
||||
#+NAME: qmckl_sherman_morrison_naive_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
||||
| uint64_t | Dim | in | Dimension of Slater_inv |
|
||||
@ -87,11 +92,11 @@ int main() {
|
||||
|
||||
*** C header
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sherman_morrison_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_sherman_morrison(
|
||||
qmckl_exit_code qmckl_sherman_morrison_naive(
|
||||
const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
@ -130,7 +135,7 @@ int main() {
|
||||
#define ALIGNED
|
||||
#endif
|
||||
|
||||
qmckl_exit_code qmckl_sherman_morrison_hpc(
|
||||
qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
|
||||
const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
@ -203,9 +208,9 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
|
||||
|
||||
|
||||
|
||||
#+NAME:naive_template
|
||||
#+NAME:naive_template_code
|
||||
#+begin_src c
|
||||
static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
|
||||
static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}(
|
||||
const qmckl_context context,
|
||||
const uint64_t N_updates,
|
||||
const double* __restrict Updates,
|
||||
@ -217,7 +222,7 @@ static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith( context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_sherman_morrison_{Dim}",
|
||||
"qmckl_sherman_morrison_naive_{Dim}",
|
||||
NULL);
|
||||
}
|
||||
|
||||
@ -280,13 +285,13 @@ static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
|
||||
|
||||
|
||||
|
||||
#+NAME:naive-python
|
||||
#+NAME:naive_kernel_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
<<naive_template>>
|
||||
<<naive_template_code>>
|
||||
"""
|
||||
result = []
|
||||
for Dim in range(2, 22):
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
@ -297,11 +302,11 @@ return '\n'.join(result)
|
||||
|
||||
|
||||
|
||||
#+NAME:naive-python-switch
|
||||
#+NAME:naive_switch-case_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
case {Dim}:
|
||||
return qmckl_sherman_morrison_{Dim}(context,
|
||||
return qmckl_sherman_morrison_naive_{Dim}(context,
|
||||
N_updates,
|
||||
Updates,
|
||||
Updates_index,
|
||||
@ -310,7 +315,7 @@ case {Dim}:
|
||||
determinant);
|
||||
"""
|
||||
result = []
|
||||
for Dim in range(2, 22):
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
@ -322,9 +327,9 @@ return '\n'.join(result)
|
||||
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
<<naive-python()>>
|
||||
<<naive_kernel_generator()>>
|
||||
|
||||
qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
|
||||
qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
@ -337,17 +342,17 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_sherman_morrison",
|
||||
"qmckl_sherman_morrison_naive",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||
switch (Dim) {
|
||||
<<naive-python-switch()>>
|
||||
<<naive_switch-case_generator()>>
|
||||
}
|
||||
}
|
||||
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
||||
return qmckl_sherman_morrison_hpc(context,
|
||||
return qmckl_sherman_morrison_naive_hpc(context,
|
||||
LDS,
|
||||
Dim,
|
||||
N_updates,
|
||||
@ -359,7 +364,7 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
|
||||
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
@ -375,17 +380,17 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
|
||||
|
||||
** Fortran interface :noexport:
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_sherman_morrison
|
||||
:Name: qmckl_sherman_morrison_naive
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||
interface
|
||||
integer(c_int32_t) function qmckl_sherman_morrison &
|
||||
integer(c_int32_t) function qmckl_sherman_morrison_naive &
|
||||
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||
bind(C)
|
||||
|
||||
@ -403,7 +408,7 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
|
||||
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||
real (c_double ) , intent(inout) :: determinant
|
||||
|
||||
end function qmckl_sherman_morrison
|
||||
end function qmckl_sherman_morrison_naive
|
||||
end interface
|
||||
#+end_src
|
||||
|
||||
@ -426,7 +431,7 @@ assert(Slater_inv1 != NULL);
|
||||
|
||||
// original determinant of Slater1 (before applying updates)
|
||||
double det = 3.407025646103221e-10;
|
||||
rc = qmckl_sherman_morrison(context,
|
||||
rc = qmckl_sherman_morrison_naive(context,
|
||||
LDS,
|
||||
Dim,
|
||||
N_updates1,
|
||||
@ -463,9 +468,9 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
* Woodbury 2x2
|
||||
|
||||
** ~qmckl_woodbury_2~
|
||||
** ~qmckl_woodbury_2x2~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_woodbury_2
|
||||
:Name: qmckl_woodbury_2x2
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
@ -488,7 +493,7 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
|
||||
|
||||
#+NAME: qmckl_woodbury_2_args
|
||||
#+NAME: qmckl_woodbury_2x2_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
||||
| uint64_t | Dim | in | Dimension of Slater_inv |
|
||||
@ -510,11 +515,11 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
*** C header
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_woodbury_2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_woodbury_2(
|
||||
qmckl_exit_code qmckl_woodbury_2x2(
|
||||
const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
@ -528,14 +533,14 @@ assert(rc == QMCKL_SUCCESS);
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
/*
|
||||
C := S^{-1} * U, dim x 2
|
||||
B := 1 + V * C, 2 x 2
|
||||
@ -545,7 +550,7 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_2",
|
||||
"qmckl_woodbury_2x2_hpc",
|
||||
NULL);
|
||||
}
|
||||
|
||||
@ -613,6 +618,179 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#+NAME:woodbury_2x2_kernel_template
|
||||
#+begin_src c
|
||||
static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}(
|
||||
const qmckl_context context,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
/*
|
||||
C := S^{-1} * U, dim x 2
|
||||
B := 1 + V * C, 2 x 2
|
||||
D := V * S^{-1}, 2 x dim
|
||||
*/
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_2x2_{Dim}",
|
||||
NULL);
|
||||
}
|
||||
|
||||
const uint64_t row1 = (Updates_index[0] - 1);
|
||||
const uint64_t row2 = (Updates_index[1] - 1);
|
||||
|
||||
// Compute C = (S^T)^{-1}U : {Dim} x 2
|
||||
double __attribute__((aligned(8))) C[2 * {Dim}];
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
C[i * 2] = 0;
|
||||
C[i * 2 + 1] = 0;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
||||
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
||||
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute B = 1 + VC : 2 x 2
|
||||
const double B0 = C[row1 * 2] + 1;
|
||||
const double B1 = C[row1 * 2 + 1];
|
||||
const double B2 = C[row2 * 2];
|
||||
const double B3 = C[row2 * 2 + 1] + 1;
|
||||
|
||||
// Check if determinant of inverted matrix is not zero
|
||||
double det = B0 * B3 - B1 * B2;
|
||||
if (fabs(det) < breakdown) {
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
|
||||
// Update det(S) when passed
|
||||
if (determinant)
|
||||
*determinant *= det;
|
||||
|
||||
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
||||
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
||||
Binv[0] = idet * B3;
|
||||
Binv[1] = -1.0 * idet * B1;
|
||||
Binv[2] = -1.0 * idet * B2;
|
||||
Binv[3] = idet * B0;
|
||||
|
||||
// tmp = B^{-1}D : 2 x D{Dim}_P
|
||||
double __attribute__((aligned(8))) tmp[2 * D{Dim}_P];
|
||||
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
||||
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
||||
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
||||
}
|
||||
|
||||
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
|
||||
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
#+NAME:woodbury_2x2_kernel_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
<<woodbury_2x2_kernel_template>>
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
#+NAME:woodbury_2x2_switch-case_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
case {Dim}:
|
||||
return qmckl_woodbury_2x2_{Dim}(context,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
determinant);
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
<<woodbury_2x2_kernel_generator()>>
|
||||
|
||||
qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_2x2",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||
switch (Dim) {
|
||||
<<woodbury_2x2_switch-case_generator()>>
|
||||
}
|
||||
}
|
||||
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
||||
return qmckl_woodbury_2x2_hpc(context,
|
||||
LDS,
|
||||
Dim,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
determinant);
|
||||
|
||||
}
|
||||
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
*** Performance
|
||||
|
||||
This function is most efficient when used in cases where there are only 2 rank-1 updates and
|
||||
@ -620,17 +798,17 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
|
||||
** Fortran interface :noexport:
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_woodbury_2
|
||||
:Name: qmckl_woodbury_2x2
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+CALL: generate_f_interface(table=qmckl_woodbury_2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||
interface
|
||||
integer(c_int32_t) function qmckl_woodbury_2 &
|
||||
integer(c_int32_t) function qmckl_woodbury_2x2 &
|
||||
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||
bind(C)
|
||||
use, intrinsic :: iso_c_binding
|
||||
@ -646,7 +824,7 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
|
||||
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||
real (c_double ) , intent(inout) :: determinant
|
||||
|
||||
end function qmckl_woodbury_2
|
||||
end function qmckl_woodbury_2x2
|
||||
end interface
|
||||
#+end_src
|
||||
|
||||
@ -657,7 +835,7 @@ assert(Updates2 != NULL);
|
||||
assert(Updates_index2 != NULL);
|
||||
assert(Slater_inv2 != NULL);
|
||||
det = -1.4432116661319376e-11;
|
||||
rc = qmckl_woodbury_2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
||||
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
|
||||
assert(fabs(det-2.367058141251457e-10) < 1e-15);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
@ -683,9 +861,9 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
* Woodbury 3x3
|
||||
|
||||
** ~qmckl_woodbury_3~
|
||||
** ~qmckl_woodbury_3x3~
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_woodbury_3
|
||||
:Name: qmckl_woodbury_3x3
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
@ -703,7 +881,7 @@ assert(rc == QMCKL_SUCCESS);
|
||||
#pragma ivdep
|
||||
#pragma vector aligned
|
||||
|
||||
#+NAME: qmckl_woodbury_3_args
|
||||
#+NAME: qmckl_woodbury_3x3_args
|
||||
| qmckl_context | context | in | Global state |
|
||||
| uint64_t | LDS | in | Leading dimension of Slater_inv |
|
||||
| uint64_t | Dim | in | Dimension of Slater_inv |
|
||||
@ -725,11 +903,11 @@ assert(rc == QMCKL_SUCCESS);
|
||||
|
||||
*** C header
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_woodbury_3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_woodbury_3(
|
||||
qmckl_exit_code qmckl_woodbury_3x3(
|
||||
const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
@ -743,14 +921,14 @@ assert(rc == QMCKL_SUCCESS);
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
qmckl_exit_code qmckl_woodbury_3x3_hpc(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
/*
|
||||
C := S^{-1} * U, dim x 3
|
||||
B := 1 + V * C, 3 x 3
|
||||
@ -760,7 +938,7 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_3",
|
||||
"qmckl_woodbury_3x3_hpc",
|
||||
NULL);
|
||||
}
|
||||
|
||||
@ -848,6 +1026,190 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME:woodbury_3x3_kernel_template
|
||||
#+begin_src c
|
||||
qmckl_exit_code qmckl_woodbury_3x3_{Dim}(
|
||||
const qmckl_context context,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict determinant) {
|
||||
/*
|
||||
C := S^{-1} * U, dim x 3
|
||||
B := 1 + V * C, 3 x 3
|
||||
D := V * S^{-1}, 3 x dim
|
||||
,*/
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_3x3_{Dim}",
|
||||
NULL);
|
||||
}
|
||||
|
||||
const uint64_t row1 = (Updates_index[0] - 1);
|
||||
const uint64_t row2 = (Updates_index[1] - 1);
|
||||
const uint64_t row3 = (Updates_index[2] - 1);
|
||||
|
||||
// Compute C = (S^T)^{-1}U : {Dim} x 3
|
||||
double __attribute__((aligned(8))) C[3 * {Dim}];
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
C[i * 3] = 0;
|
||||
C[i * 3 + 1] = 0;
|
||||
C[i * 3 + 2] = 0;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t k = 0; k < D{Dim}_P; k++) {
|
||||
C[i * 3] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
|
||||
C[i * 3 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
|
||||
C[i * 3 + 2] += Slater_inv[i * D{Dim}_P + k] * Updates[2 * D{Dim}_P + k];
|
||||
}
|
||||
}
|
||||
|
||||
// Compute B = 1 + VC : 3 x 3
|
||||
const double B0 = C[row1 * 3] + 1;
|
||||
const double B1 = C[row1 * 3 + 1];
|
||||
const double B2 = C[row1 * 3 + 2];
|
||||
const double B3 = C[row2 * 3];
|
||||
const double B4 = C[row2 * 3 + 1] + 1;
|
||||
const double B5 = C[row2 * 3 + 2];
|
||||
const double B6 = C[row3 * 3];
|
||||
const double B7 = C[row3 * 3 + 1];
|
||||
const double B8 = C[row3 * 3 + 2] + 1;
|
||||
|
||||
// Check if determinant of B is not too close to zero
|
||||
double det;
|
||||
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
||||
B2 * (B3 * B7 - B4 * B6);
|
||||
if (fabs(det) < breakdown) {
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
|
||||
// Update det(Slater) if passed
|
||||
if (determinant)
|
||||
*determinant *= det;
|
||||
|
||||
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
||||
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
||||
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
||||
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
||||
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
||||
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
||||
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
||||
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
||||
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
||||
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
||||
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
||||
|
||||
// tmp = B^{-1}D : 3 x D{Dim}_P
|
||||
double __attribute__((aligned(8))) tmp[3 * D{Dim}_P];
|
||||
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
|
||||
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
|
||||
double* r3dim = &(Slater_inv[row3 * D{Dim}_P]);
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
||||
tmp[D{Dim}_P + j] =
|
||||
Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
||||
tmp[2 * D{Dim}_P + j] =
|
||||
Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
||||
}
|
||||
|
||||
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
Slater_inv[i * D{Dim}_P + j] -= C[i * 3] * tmp[j];
|
||||
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 1] * tmp[D{Dim}_P + j];
|
||||
Slater_inv[i * D{Dim}_P + j] -= C[i * 3 + 2] * tmp[2 * D{Dim}_P + j];
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME:woodbury_3x3_kernel_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
<<woodbury_3x3_kernel_template>>
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME:woodbury_3x3_switch-case_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
case {Dim}:
|
||||
return qmckl_woodbury_3x3_{Dim}(context,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
determinant);
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
<<woodbury_3x3_kernel_generator()>>
|
||||
|
||||
qmckl_exit_code qmckl_woodbury_3x3(const qmckl_context context,
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(context,
|
||||
QMCKL_NULL_CONTEXT,
|
||||
"qmckl_woodbury_3x3",
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||
switch (Dim) {
|
||||
<<woodbury_3x3_switch-case_generator()>>
|
||||
}
|
||||
}
|
||||
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
||||
return qmckl_woodbury_3x3_hpc(context,
|
||||
LDS,
|
||||
Dim,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
determinant);
|
||||
|
||||
}
|
||||
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
*** Performance...
|
||||
|
||||
This function is most efficient when used in cases where there are only 3 rank-1 updates and
|
||||
@ -855,17 +1217,17 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
|
||||
** Fortran interface :noexport:
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_woodbury_3
|
||||
:Name: qmckl_woodbury_3x3
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+CALL: generate_f_interface(table=qmckl_woodbury_3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
|
||||
interface
|
||||
integer(c_int32_t) function qmckl_woodbury_3 &
|
||||
integer(c_int32_t) function qmckl_woodbury_3x3 &
|
||||
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
|
||||
bind(C)
|
||||
use, intrinsic :: iso_c_binding
|
||||
@ -881,7 +1243,7 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
|
||||
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
|
||||
real (c_double ) , intent(inout) :: determinant
|
||||
|
||||
end function qmckl_woodbury_3
|
||||
end function qmckl_woodbury_3x3
|
||||
end interface
|
||||
#+end_src
|
||||
|
||||
@ -892,7 +1254,7 @@ assert(Updates3 != NULL);
|
||||
assert(Updates_index3 != NULL);
|
||||
assert(Slater_inv3_1 != NULL);
|
||||
det = -1.23743195512859e-09;
|
||||
rc = qmckl_woodbury_3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
||||
rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
|
||||
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
|
||||
for (unsigned int i = 0; i < Dim; i++) {
|
||||
for (unsigned int j = 0; j < Dim; j++) {
|
||||
@ -1177,7 +1539,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
// Special case for 4 rank-1 updates: 2+2
|
||||
if (N_updates == 4) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_woodbury_2(context, LDS, Dim, Updates, Updates_index,
|
||||
qmckl_woodbury_2x2(context, LDS, Dim, Updates, Updates_index,
|
||||
breakdown, Slater_inv, determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
@ -1187,7 +1549,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
later_index + later, &l, determinant);
|
||||
later += l;
|
||||
}
|
||||
rc = qmckl_woodbury_2(context, LDS, Dim, &Updates[2 * LDS],
|
||||
rc = qmckl_woodbury_2x2(context, LDS, Dim, &Updates[2 * LDS],
|
||||
&Updates_index[2], breakdown, Slater_inv,
|
||||
determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
@ -1217,7 +1579,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
for (uint64_t i = 0; i < n_of_3blocks; i++) {
|
||||
const double* Updates_3block = &Updates[i * length_3block];
|
||||
const uint64_t* Updates_index_3block = &Updates_index[i * 3];
|
||||
qmckl_exit_code rc = qmckl_woodbury_3(
|
||||
qmckl_exit_code rc = qmckl_woodbury_3x3(
|
||||
context, LDS, Dim, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
@ -1235,7 +1597,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
if (remainder == 2) {
|
||||
const double* Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||
const uint64_t* Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||
qmckl_exit_code rc = qmckl_woodbury_2(
|
||||
qmckl_exit_code rc = qmckl_woodbury_2x2(
|
||||
context, LDS, Dim, Updates_2block, Updates_index_2block,
|
||||
breakdown, Slater_inv, determinant);
|
||||
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
|
||||
@ -1400,33 +1762,34 @@ These functions can only be used internally by the kernels in this module.
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
qmckl_exit_code qmckl_slagel_splitting (
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant);
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant);
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org
|
||||
qmckl_exit_code qmckl_slagel_splitting(uint64_t LDS,
|
||||
uint64_t Dim,
|
||||
uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant) {
|
||||
qmckl_exit_code qmckl_slagel_splitting_hpc(
|
||||
uint64_t LDS,
|
||||
uint64_t Dim,
|
||||
uint64_t N_updates,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict later_updates,
|
||||
uint64_t* __restrict later_index,
|
||||
uint64_t* __restrict later,
|
||||
double* __restrict determinant) {
|
||||
|
||||
double __attribute__((aligned(8))) C[LDS];
|
||||
double __attribute__((aligned(8))) D[LDS];
|
||||
@ -1488,9 +1851,164 @@ qmckl_exit_code qmckl_slagel_splitting(uint64_t LDS,
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME:slagel_splitting_template_code
|
||||
#+begin_src c
|
||||
static inline qmckl_exit_code qmckl_slagel_splitting_{Dim}(
|
||||
uint64_t N_updates,
|
||||
const double* __restrict Updates,
|
||||
const uint64_t* __restrict Updates_index,
|
||||
const double breakdown,
|
||||
double* __restrict Slater_inv,
|
||||
double* __restrict later_updates,
|
||||
uint64_t* __restrict later_index,
|
||||
uint64_t* __restrict later,
|
||||
double* __restrict determinant) {
|
||||
|
||||
double __attribute__((aligned(8))) C[D{Dim}_P];
|
||||
double __attribute__((aligned(8))) D[D{Dim}_P];
|
||||
|
||||
uint64_t l = 0;
|
||||
// For each update
|
||||
while (l < N_updates) {
|
||||
// C = S^{-1} x U_l
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
C[i] = 0.0f;
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
|
||||
}
|
||||
}
|
||||
|
||||
// Denominator
|
||||
const int cui = Updates_index[l] - 1;
|
||||
double den = 1.0f + C[cui];
|
||||
if (fabs(den) < breakdown) {
|
||||
// U_l = U_l / 2: split the update in 2 equal halves and save the
|
||||
// second halve in later_updates
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t i = 0; i < D{Dim}_P; i++) {
|
||||
later_updates[*later * D{Dim}_P + i] = Updates[l * D{Dim}_P + i] * 0.5f;
|
||||
C[i] *= 0.5f;
|
||||
}
|
||||
later_index[*later] = Updates_index[l];
|
||||
(*later)++;
|
||||
|
||||
den = 1.0f + C[cui];
|
||||
} // From here onwards we continue with applying the first halve of the
|
||||
// update to Slater_inv
|
||||
double iden = 1.0f / den;
|
||||
|
||||
if (determinant)
|
||||
*determinant *= den;
|
||||
|
||||
// D = v^T x S^{-1} : 1 x D{Dim}_P
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
D[j] = Slater_inv[cui * D{Dim}_P + j];
|
||||
}
|
||||
|
||||
// S^{-1} = S^{-1} - C x D / den
|
||||
for (uint64_t i = 0; i < {Dim}; i++) {
|
||||
IVDEP
|
||||
ALIGNED
|
||||
for (uint64_t j = 0; j < D{Dim}_P; j++) {
|
||||
const double update = C[i] * D[j] * iden;
|
||||
Slater_inv[i * D{Dim}_P + j] -= update;
|
||||
}
|
||||
}
|
||||
l += 1;
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME:slagel_splitting_kernel_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
<<slagel_splitting_template_code>>
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
|
||||
#+NAME:slagel_splitting_switch-case_generator
|
||||
#+begin_src python :noweb yes :exports none
|
||||
text="""
|
||||
case {Dim}:
|
||||
return qmckl_slagel_splitting_{Dim}(
|
||||
N_updates,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
later_updates,
|
||||
later_index,
|
||||
later,
|
||||
determinant);
|
||||
"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
Dim=str(Dim)
|
||||
result.append(text.replace("{Dim}",Dim) )
|
||||
|
||||
return '\n'.join(result)
|
||||
#+end_src
|
||||
|
||||
|
||||
#+begin_src c :tangle (eval c) :comments org :noweb yes
|
||||
<<slagel_splitting_kernel_generator()>>
|
||||
|
||||
qmckl_exit_code qmckl_slagel_splitting(
|
||||
const uint64_t LDS,
|
||||
const uint64_t Dim,
|
||||
const uint64_t N_updates,
|
||||
const double* Updates,
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant) {
|
||||
|
||||
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
|
||||
switch (Dim) {
|
||||
<<slagel_splitting_switch-case_generator()>>
|
||||
}
|
||||
}
|
||||
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
|
||||
return qmckl_slagel_splitting_hpc(
|
||||
LDS,
|
||||
Dim,
|
||||
N_updates,
|
||||
Updates,
|
||||
Updates_index,
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
later_updates,
|
||||
later_index,
|
||||
later,
|
||||
determinant);
|
||||
}
|
||||
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
||||
*** Performance
|
||||
|
||||
This function cannot be used by itself and is used in Sherman-Morrison with update splitting and Woodbury 3x3 and 2x2
|
||||
|
Loading…
Reference in New Issue
Block a user