1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 10:06:09 +01:00

Added Woodbury 2x2 kernel template generator.

This commit is contained in:
Francois Coppens 2023-01-27 14:29:35 +01:00
parent 2d02b8cd63
commit c58cf3c7f6

View File

@ -28,9 +28,9 @@ int main() {
* Naïve Sherman-Morrison
** ~qmckl_sherman_morrison~
** ~qmckl_sherman_morrison_naive~
:PROPERTIES:
:Name: qmckl_sherman_morrison
:Name: qmckl_sherman_morrison_naive
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
@ -63,7 +63,7 @@ int main() {
If the determinant of the Slater-matrix is passed, it will be updated to the determinant resulting
from applying the updates to the original matrix.
#+NAME: qmckl_sherman_morrison_args
#+NAME: qmckl_sherman_morrison_naive_args
| qmckl_context | context | in | Global state |
| uint64_t | LDS | in | Leading dimension of Slater_inv |
| uint64_t | Dim | in | Dimension of Slater_inv |
@ -87,11 +87,11 @@ int main() {
*** C header
#+CALL: generate_c_header(table=qmckl_sherman_morrison_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+CALL: generate_c_header(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_sherman_morrison(
qmckl_exit_code qmckl_sherman_morrison_naive(
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -130,7 +130,7 @@ int main() {
#define ALIGNED
#endif
qmckl_exit_code qmckl_sherman_morrison_hpc(
qmckl_exit_code qmckl_sherman_morrison_naive_hpc(
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -144,7 +144,7 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith( context,
QMCKL_NULL_CONTEXT,
"qmckl_sherman_morrison_hpc",
"qmckl_sherman_morrison_naive_hpc",
NULL);
}
@ -203,9 +203,9 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
#+NAME:naive_template
#+NAME:naive_template_code
#+begin_src c
static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
static inline qmckl_exit_code qmckl_sherman_morrison_naive_{Dim}(
const qmckl_context context,
const uint64_t N_updates,
const double* __restrict Updates,
@ -217,7 +217,7 @@ static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith( context,
QMCKL_NULL_CONTEXT,
"qmckl_sherman_morrison_{Dim}",
"qmckl_sherman_morrison_naive_{Dim}",
NULL);
}
@ -280,10 +280,10 @@ static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
#+NAME:naive-python
#+NAME:naive_template_generator
#+begin_src python :noweb yes :exports none
text="""
<<naive_template>>
<<naive_template_code>>
"""
result = []
for Dim in range(2, 22):
@ -297,11 +297,11 @@ return '\n'.join(result)
#+NAME:naive-python-switch
#+NAME:naive_switch_generator
#+begin_src python :noweb yes :exports none
text="""
case {Dim}:
return qmckl_sherman_morrison_{Dim}(context,
return qmckl_sherman_morrison_naive_{Dim}(context,
N_updates,
Updates,
Updates_index,
@ -322,9 +322,9 @@ return '\n'.join(result)
#+begin_src c :tangle (eval c) :comments org :noweb yes
<<naive-python()>>
<<naive_template_generator()>>
qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
qmckl_exit_code qmckl_sherman_morrison_naive(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const uint64_t N_updates,
@ -337,17 +337,17 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_sherman_morrison",
"qmckl_sherman_morrison_naive",
NULL);
}
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
switch (Dim) {
<<naive-python-switch()>>
<<naive_switch_generator()>>
}
}
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
return qmckl_sherman_morrison_hpc(context,
return qmckl_sherman_morrison_naive_hpc(context,
LDS,
Dim,
N_updates,
@ -359,7 +359,7 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
}
return QMCKL_SUCCESS;
return QMCKL_FAILURE;
}
#+end_src
@ -375,17 +375,17 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
** Fortran interface :noexport:
:PROPERTIES:
:Name: qmckl_sherman_morrison
:Name: qmckl_sherman_morrison_naive
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+CALL: generate_f_interface(table=qmckl_sherman_morrison_naive_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_sherman_morrison &
integer(c_int32_t) function qmckl_sherman_morrison_naive &
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
@ -403,7 +403,7 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_sherman_morrison
end function qmckl_sherman_morrison_naive
end interface
#+end_src
@ -426,7 +426,7 @@ assert(Slater_inv1 != NULL);
// original determinant of Slater1 (before applying updates)
double det = 3.407025646103221e-10;
rc = qmckl_sherman_morrison(context,
rc = qmckl_sherman_morrison_naive(context,
LDS,
Dim,
N_updates1,
@ -463,9 +463,9 @@ assert(rc == QMCKL_SUCCESS);
* Woodbury 2x2
** ~qmckl_woodbury_2~
** ~qmckl_woodbury_2x2~
:PROPERTIES:
:Name: qmckl_woodbury_2
:Name: qmckl_woodbury_2x2
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
@ -488,7 +488,7 @@ assert(rc == QMCKL_SUCCESS);
#+NAME: qmckl_woodbury_2_args
#+NAME: qmckl_woodbury_2x2_args
| qmckl_context | context | in | Global state |
| uint64_t | LDS | in | Leading dimension of Slater_inv |
| uint64_t | Dim | in | Dimension of Slater_inv |
@ -510,11 +510,11 @@ assert(rc == QMCKL_SUCCESS);
*** C header
#+CALL: generate_c_header(table=qmckl_woodbury_2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+CALL: generate_c_header(table=qmckl_woodbury_2x2_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_2(
qmckl_exit_code qmckl_woodbury_2x2(
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -528,14 +528,14 @@ assert(rc == QMCKL_SUCCESS);
*** C source
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
qmckl_exit_code qmckl_woodbury_2x2_hpc(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant) {
double* __restrict Slater_inv,
double* __restrict determinant) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
@ -545,7 +545,7 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2",
"qmckl_woodbury_2x2_hpc",
NULL);
}
@ -613,6 +613,179 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
}
#+end_src
#+NAME:woodbury_2x2_template_code
#+begin_src c
static inline qmckl_exit_code qmckl_woodbury_2x2_{Dim}(
const qmckl_context context,
const double* __restrict Updates,
const uint64_t* __restrict Updates_index,
const double breakdown,
double* __restrict Slater_inv,
double* __restrict determinant) {
/*
C := S^{-1} * U, dim x 2
B := 1 + V * C, 2 x 2
D := V * S^{-1}, 2 x dim
*/
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2_{Dim}",
NULL);
}
const uint64_t row1 = (Updates_index[0] - 1);
const uint64_t row2 = (Updates_index[1] - 1);
// Compute C = (S^T)^{-1}U : {Dim} x 2
double __attribute__((aligned(8))) C[2 * {Dim}];
for (uint64_t i = 0; i < {Dim}; i++) {
C[i * 2] = 0;
C[i * 2 + 1] = 0;
IVDEP
ALIGNED
for (uint64_t k = 0; k < D{Dim}_P; k++) {
C[i * 2] += Slater_inv[i * D{Dim}_P + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * D{Dim}_P + k] * Updates[D{Dim}_P + k];
}
}
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
const double B3 = C[row2 * 2 + 1] + 1;
// Check if determinant of inverted matrix is not zero
double det = B0 * B3 - B1 * B2;
if (fabs(det) < breakdown) {
return QMCKL_FAILURE;
}
// Update det(S) when passed
if (determinant)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// tmp = B^{-1}D : 2 x D{Dim}_P
double __attribute__((aligned(8))) tmp[2 * D{Dim}_P];
double* r1dim = &(Slater_inv[row1 * D{Dim}_P]);
double* r2dim = &(Slater_inv[row2 * D{Dim}_P]);
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[D{Dim}_P + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S^T)^{-1} - C * tmp : {Dim} x D{Dim}_P
for (uint64_t i = 0; i < {Dim}; i++) {
IVDEP
ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) {
Slater_inv[i * D{Dim}_P + j] -= C[i * 2] * tmp[j];
Slater_inv[i * D{Dim}_P + j] -= C[i * 2 + 1] * tmp[D{Dim}_P + j];
}
}
return QMCKL_SUCCESS;
}
#+end_src
#+NAME:woodbury_2x2_template_generator
#+begin_src python :noweb yes :exports none
text="""
<<woodbury_2x2_template_code>>
"""
result = []
for Dim in range(2, 22):
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return '\n'.join(result)
#+end_src
#+NAME:woodbury_2x2_switch_generator
#+begin_src python :noweb yes :exports none
text="""
case {Dim}:
return qmckl_woodbury_2x2_{Dim}(context,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
"""
result = []
for Dim in range(2, 22):
Dim=str(Dim)
result.append(text.replace("{Dim}",Dim) )
return '\n'.join(result)
#+end_src
#+begin_src c :tangle (eval c) :comments org :noweb yes
<<woodbury_2x2_template_generator()>>
qmckl_exit_code qmckl_woodbury_2x2(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
const uint64_t* Updates_index,
const double breakdown,
double* Slater_inv,
double* determinant) {
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_2x2",
NULL);
}
if (LDS == (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) { // Most cases
switch (Dim) {
<<woodbury_2x2_switch_generator()>>
}
}
else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
return qmckl_woodbury_2x2_hpc(context,
LDS,
Dim,
Updates,
Updates_index,
breakdown,
Slater_inv,
determinant);
}
return QMCKL_FAILURE;
}
#+end_src
*** Performance
This function is most efficient when used in cases where there are only 2 rank-1 updates and
@ -620,17 +793,17 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
** Fortran interface :noexport:
:PROPERTIES:
:Name: qmckl_woodbury_2
:Name: qmckl_woodbury_2x2
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
#+CALL: generate_f_interface(table=qmckl_woodbury_2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+CALL: generate_f_interface(table=qmckl_woodbury_2x2_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_2 &
integer(c_int32_t) function qmckl_woodbury_2x2 &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
use, intrinsic :: iso_c_binding
@ -646,7 +819,7 @@ qmckl_exit_code qmckl_woodbury_2(const qmckl_context context,
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_woodbury_2
end function qmckl_woodbury_2x2
end interface
#+end_src
@ -657,7 +830,7 @@ assert(Updates2 != NULL);
assert(Updates_index2 != NULL);
assert(Slater_inv2 != NULL);
det = -1.4432116661319376e-11;
rc = qmckl_woodbury_2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
rc = qmckl_woodbury_2x2(context, LDS, Dim, Updates2, Updates_index2, breakdown, Slater_inv2, &det);
assert(fabs(det-2.367058141251457e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
@ -683,9 +856,9 @@ assert(rc == QMCKL_SUCCESS);
* Woodbury 3x3
** ~qmckl_woodbury_3~
** ~qmckl_woodbury_3x3~
:PROPERTIES:
:Name: qmckl_woodbury_3
:Name: qmckl_woodbury_3x3
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
@ -703,7 +876,7 @@ assert(rc == QMCKL_SUCCESS);
#pragma ivdep
#pragma vector aligned
#+NAME: qmckl_woodbury_3_args
#+NAME: qmckl_woodbury_3x3_args
| qmckl_context | context | in | Global state |
| uint64_t | LDS | in | Leading dimension of Slater_inv |
| uint64_t | Dim | in | Dimension of Slater_inv |
@ -725,11 +898,11 @@ assert(rc == QMCKL_SUCCESS);
*** C header
#+CALL: generate_c_header(table=qmckl_woodbury_3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+CALL: generate_c_header(table=qmckl_woodbury_3x3_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src c :tangle (eval h_func) :comments org
qmckl_exit_code qmckl_woodbury_3(
qmckl_exit_code qmckl_woodbury_3x3(
const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
@ -743,7 +916,7 @@ assert(rc == QMCKL_SUCCESS);
*** C source
#+begin_src c :tangle (eval c) :comments org
qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
qmckl_exit_code qmckl_woodbury_3x3(const qmckl_context context,
const uint64_t LDS,
const uint64_t Dim,
const double* Updates,
@ -760,7 +933,7 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
return qmckl_failwith(context,
QMCKL_NULL_CONTEXT,
"qmckl_woodbury_3",
"qmckl_woodbury_3x3",
NULL);
}
@ -855,17 +1028,17 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
** Fortran interface :noexport:
:PROPERTIES:
:Name: qmckl_woodbury_3
:Name: qmckl_woodbury_3x3
:CRetType: qmckl_exit_code
:FRetType: qmckl_exit_code
:END:
#+CALL: generate_f_interface(table=qmckl_woodbury_3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+CALL: generate_f_interface(table=qmckl_woodbury_3x3_args,rettyp=get_value("FRetType"),fname=get_value("Name"))
#+RESULTS:
#+begin_src f90 :tangle (eval fh_func) :comments org :exports none
interface
integer(c_int32_t) function qmckl_woodbury_3 &
integer(c_int32_t) function qmckl_woodbury_3x3 &
(context, LDS, Dim, Updates, Updates_index, breakdown, Slater_inv, determinant) &
bind(C)
use, intrinsic :: iso_c_binding
@ -881,7 +1054,7 @@ qmckl_exit_code qmckl_woodbury_3(const qmckl_context context,
real (c_double ) , intent(inout) :: Slater_inv(LDS*Dim)
real (c_double ) , intent(inout) :: determinant
end function qmckl_woodbury_3
end function qmckl_woodbury_3x3
end interface
#+end_src
@ -892,7 +1065,7 @@ assert(Updates3 != NULL);
assert(Updates_index3 != NULL);
assert(Slater_inv3_1 != NULL);
det = -1.23743195512859e-09;
rc = qmckl_woodbury_3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
rc = qmckl_woodbury_3x3(context, LDS, Dim, Updates3, Updates_index3, breakdown, Slater_inv3_1, &det);
assert(fabs(det - 1.602708950725074e-10) < 1e-15);
for (unsigned int i = 0; i < Dim; i++) {
for (unsigned int j = 0; j < Dim; j++) {
@ -1177,7 +1350,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
// Special case for 4 rank-1 updates: 2+2
if (N_updates == 4) {
qmckl_exit_code rc =
qmckl_woodbury_2(context, LDS, Dim, Updates, Updates_index,
qmckl_woodbury_2x2(context, LDS, Dim, Updates, Updates_index,
breakdown, Slater_inv, determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
uint64_t l = 0;
@ -1187,7 +1360,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
later_index + later, &l, determinant);
later += l;
}
rc = qmckl_woodbury_2(context, LDS, Dim, &Updates[2 * LDS],
rc = qmckl_woodbury_2x2(context, LDS, Dim, &Updates[2 * LDS],
&Updates_index[2], breakdown, Slater_inv,
determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
@ -1217,7 +1390,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
for (uint64_t i = 0; i < n_of_3blocks; i++) {
const double* Updates_3block = &Updates[i * length_3block];
const uint64_t* Updates_index_3block = &Updates_index[i * 3];
qmckl_exit_code rc = qmckl_woodbury_3(
qmckl_exit_code rc = qmckl_woodbury_3x3(
context, LDS, Dim, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting
@ -1235,7 +1408,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
if (remainder == 2) {
const double* Updates_2block = &Updates[n_of_3blocks * length_3block];
const uint64_t* Updates_index_2block = &Updates_index[3 * n_of_3blocks];
qmckl_exit_code rc = qmckl_woodbury_2(
qmckl_exit_code rc = qmckl_woodbury_2x2(
context, LDS, Dim, Updates_2block, Updates_index_2block,
breakdown, Slater_inv, determinant);
if (rc != QMCKL_SUCCESS) { // Send the entire block to slagel_splitting