Added partial Python C-template generation for the qmckl_sherman_morrison kernel.

2024-11-20 04:52:47 +01:00 · 2022-12-15 16:14:57 +01:00 · 2022-12-15 16:14:57 +01:00 · 1af5ddc76c
commit 1af5ddc76c
parent 0c5c2bed41
1 changed files with 178 additions and 0 deletions
--- a/org/qmckl_sherman_morrison_woodbury.org
+++ b/org/qmckl_sherman_morrison_woodbury.org
@ -110,6 +110,116 @@ int main() {
 #include <math.h>
 #include "qmckl.h"

+#if defined(__AVX512__)
+#define SIMD 8
+#elif defined(__AVX__)
+#define SIMD 4
+#else
+#define SIMD 2
+#endif
+    #+end_src
+
+    #+NAME:naive_template
+    #+begin_src c
+static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(const qmckl_context context,
+                                const uint64_t N_updates,
+                                const double* Updates,
+                                const uint64_t* Updates_index,
+                                const double breakdown,
+                                double* Slater_inv,
+                                double* determinant) {
+
+// TODO: Specialize for padding
+//  const uint LDS=(1+({Dim}-1)/SIMD) * SIMD;
+  const uint LDS={Dim};
+  double C[{Dim}];
+  double D[{Dim}];
+
+  uint64_t l = 0;
+  // For each update
+  while (l < N_updates) {
+    // C = A^{-1} x U_l
+    for (uint64_t i = 0; i < {Dim}; i++) {
+      C[i] = 0;
+      for (uint64_t j = 0; j < {Dim}; j++) {
+        C[i] += Slater_inv[i * LDS + j] * Updates[l * {Dim} + j];
+      }
+    }
+
+    // Denominator
+    double den = 1 + C[Updates_index[l] - 1];
+
+    if (fabs(den) < breakdown) {
+      return QMCKL_FAILURE;
+    }
+    double iden = 1 / den;
+
+    // Update det(A)
+    if (determinant != NULL)
+      *determinant *= den;
+
+    // D = v^T x A^{-1}
+    for (uint64_t j = 0; j < {Dim}; j++) {
+      D[j] = Slater_inv[(Updates_index[l] - 1) * LDS + j];
+    }
+
+    // A^{-1} = A^{-1} - C x D / den
+    for (uint64_t i = 0; i < {Dim}; i++) {
+      for (uint64_t j = 0; j < {Dim}; j++) {
+        double update = C[i] * D[j] * iden;
+        Slater_inv[i * LDS + j] -= update;
+      }
+    }
+
+    l += 1;
+  }
+
+  return QMCKL_SUCCESS;
+}
+    #+end_src
+
+    #+NAME:naive-python
+    #+begin_src python :noweb yes :exports none
+text="""
+<<naive_template>>
+"""
+result = []
+for Dim in range(1,26):
+  Dim=str(Dim)
+  result.append(text.replace("{Dim}",Dim) )
+
+return '\n'.join(result)
+    #+end_src
+
+    #+RESULTS: naive-python
+
+    #+NAME:naive-python-switch
+    #+begin_src python :noweb yes :exports none
+text="""
+case {Dim}:  
+    return qmckl_sherman_morrison_{Dim}(context,
+                                N_updates,
+                                Updates,
+                                Updates_index,
+                                breakdown,
+                                Slater_inv,
+                                determinant);
+"""
+result = []
+for Dim in range(1,26):
+  Dim=str(Dim)
+  result.append(text.replace("{Dim}",Dim) )
+
+return '\n'.join(result)
+    #+end_src
+
+    #+RESULTS: naive-python-switch
+
+    ------
+
+    #+begin_src c :tangle (eval c) :comments org :noweb yes
+<<naive-python()>>
+
 qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
                                const uint64_t LDS,
                                const uint64_t Dim,
@ -124,6 +234,13 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
    return QMCKL_NULL_CONTEXT;
  }

+  if (Dim == LDS) {
+    switch (Dim) {
+    <<naive-python-switch()>>
+    }
+
+  } else {
+
  double C[Dim];
  double D[Dim];

@ -167,9 +284,70 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
  }

  return QMCKL_SUCCESS;
+}
 }
    #+end_src

+    #+RESULTS: naive_python
+
+    qmckl_exit_code qmckl_sherman_morrison_{Dim}_{LDS}(const qmckl_context context,
+				    const uint64_t N_updates,
+				    const double* Updates,
+				    const uint64_t* Updates_index,
+				    const double breakdown,
+				    double* Slater_inv,
+				    double* determinant) {
+
+      if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
+	return QMCKL_NULL_CONTEXT;
+      }
+
+      double C[{Dim}];
+      double D[{Dim}];
+
+      uint64_t l = 0;
+      // For each update
+      while (l < N_updates) {
+	// C = A^{-1} x U_l
+	for (uint64_t i = 0; i < {Dim}; i++) {
+	  C[i] = 0;
+	  for (uint64_t j = 0; j < {Dim}; j++) {
+	    C[i] += Slater_inv[i * {LDS} + j] * Updates[l * {Dim} + j];
+	  }
+	}
+
+	// Denominator
+	double den = 1 + C[Updates_index[l] - 1];
+
+	if (fabs(den) < breakdown) {
+	  return QMCKL_FAILURE;
+	}
+	double iden = 1 / den;
+
+	// Update det(A)
+	if (determinant != NULL)
+	  *determinant *= den;
+
+	// D = v^T x A^{-1}
+	for (uint64_t j = 0; j < {Dim}; j++) {
+	  D[j] = Slater_inv[(Updates_index[l] - 1) * {LDS} + j];
+	}
+
+	// A^{-1} = A^{-1} - C x D / den
+	for (uint64_t i = 0; i < {Dim}; i++) {
+	  for (uint64_t j = 0; j < {Dim}; j++) {
+	    double update = C[i] * D[j] * iden;
+	    Slater_inv[i * {LDS} + j] -= update;
+	  }
+	}
+
+	l += 1;
+      }
+
+      return QMCKL_SUCCESS;
+    }
+    None
+
 *** Performance

    This function performs best when there is only 1 rank-1 update in the update cycle. It is not useful to