From 27b1134a4c2807525a128faa8ee45a0dfef508af Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 30 Nov 2023 01:17:18 +0100
Subject: [PATCH] Precision adjusted for MOs

---
 org/qmckl_ao.org      | 11 +-----
 org/qmckl_mo.org      | 54 ++++++++++++++++++-----------
 org/qmckl_numprec.org | 79 ++++++++++++++++++++++---------------------
 3 files changed, 75 insertions(+), 69 deletions(-)

diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org
index 6eff600..3fd9320 100644
--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
@@ -5539,7 +5539,7 @@ end function qmckl_compute_ao_value_doc
   /* Don't compute polynomials when the radial part is zero. */
   const int precision = ctx->numprec.precision;
   const double cutoff = log( (double) ( ((uint64_t) 1) << (precision-2) ) );
-  const bool double_precision = precision > 23;
+  const bool double_precision = precision > 22;
 
      #+end_src
 
@@ -5736,15 +5736,6 @@ IVDEP
           const int32_t n = lstart[l+1]-lstart[l];
 
           if (s1 == 0.0) {
-/*
-IVDEP
-#ifdef HAVE_OPENMP
-#pragma omp simd
-#endif
-            for (int64_t il=0 ; il<n ; ++il) {
-              ao_value_1[il] = 0.;
-            }
-*/
             continue;
           }
 
diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org
index bf58dac..31a2292 100644
--- a/org/qmckl_mo.org
+++ b/org/qmckl_mo.org
@@ -1324,7 +1324,6 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
       double* restrict const vgl1  = &(mo_value[ipoint*mo_num]);
       const double* restrict avgl1 = &(ao_value[ipoint*ao_num]);
 
-    //  memset(vgl_sp, 0, mo_num*sizeof(float));
       float*   __attribute__((aligned(64))) vgl_sp = calloc((size_t) mo_num, sizeof(float));
       assert (vgl_sp != NULL);
 
@@ -1409,7 +1408,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
 
   /* Don't compute polynomials when the radial part is zero. */
   const int precision = ctx->numprec.precision;
-  const bool single_precision = precision <= 23;
+  const bool single_precision = precision <= 24;
 
   if (single_precision) {
     return qmckl_compute_mo_basis_mo_value_hpc_sp (context,
@@ -1442,6 +1441,7 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
       int64_t nidx=0;
       int64_t idx[ao_num];
       double  av1[ao_num];
+
       for (int64_t k=0 ; k<ao_num ; ++k) {
         if (avgl1[k] != 0.) {
           idx[nidx] = k;
@@ -1924,7 +1924,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
 
   /* Don't compute polynomials when the radial part is zero. */
   const int precision = ctx->numprec.precision;
-  const bool single_precision = precision <= 23;
+  const bool single_precision = precision <= 26;
 
   if (single_precision) {
     return qmckl_compute_mo_basis_mo_vgl_hpc_sp (context,
@@ -3384,7 +3384,7 @@ print ( "[4][1][15][14] : %25.15e"% lf(a,x,y))
   printf("\n");
 
   /* Check single precision */
-  rc = qmckl_set_numprec_precision(context,23);
+  rc = qmckl_set_numprec_precision(context,53);
   rc = qmckl_context_touch(context);
   assert (rc == QMCKL_SUCCESS);
 
@@ -3394,7 +3394,7 @@ print ( "[4][1][15][14] : %25.15e"% lf(a,x,y))
   rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), point_num*5*chbrclf_mo_num);
   assert (rc == QMCKL_SUCCESS);
 
-  rc = qmckl_set_numprec_precision(context,24);
+  rc = qmckl_set_numprec_precision(context,23);
   rc = qmckl_context_touch(context);
   assert (rc == QMCKL_SUCCESS);
 
@@ -3407,31 +3407,45 @@ print ( "[4][1][15][14] : %25.15e"% lf(a,x,y))
   assert (rc == QMCKL_SUCCESS);
 
 
+  uint64_t average_prec = 0;
+  int32_t nbits;
   for (int i=0 ; i< point_num; ++i) {
     for (int k=0 ; k< chbrclf_mo_num ; ++k) {
-      printf("%d %d %e %e\n", i, k, mo_value_sp[i][k], mo_value[i][k]);
-      assert(fabs((mo_value_sp[i][k] - mo_value[i][k])) < 1.e-4) ;
+      nbits = qmckl_test_precision_64(mo_value_sp[i][k], mo_value[i][k]);
+//      printf("%d %d %25.15e %25.15e %d\n", i, k, mo_value_sp[i][k], mo_value[i][k], nbits);
+      average_prec += nbits;
     }
-    printf("\n");
   }
+  printf("Average precision for %d: %d\n", qmckl_get_numprec_precision(context),
+            (int) (average_prec/(point_num*chbrclf_mo_num)));
+  fflush(stdout);
 
+  average_prec = 0;
   for (int i=0 ; i< point_num; ++i) {
     for (int k=0 ; k< chbrclf_mo_num ; ++k) {
-      printf("%d %d\n", i, k);
-      printf("%e %e\n", mo_vgl_sp[i][0][k], mo_vgl[i][0][k]);
-      printf("%e %e\n", mo_vgl_sp[i][1][k], mo_vgl[i][1][k]);
-      printf("%e %e\n", mo_vgl_sp[i][2][k], mo_vgl[i][2][k]);
-      printf("%e %e\n", mo_vgl_sp[i][3][k], mo_vgl[i][3][k]);
-      printf("%e %e\n", mo_vgl_sp[i][4][k], mo_vgl[i][4][k]);
-      assert(fabs((mo_vgl_sp[i][0][k] - mo_vgl[i][0][k])) < 1.e-4) ;
-      assert(fabs((mo_vgl_sp[i][1][k] - mo_vgl[i][1][k])) < 1.e-3) ;
-      assert(fabs((mo_vgl_sp[i][2][k] - mo_vgl[i][2][k])) < 1.e-3) ;
-      assert(fabs((mo_vgl_sp[i][3][k] - mo_vgl[i][3][k])) < 1.e-3) ;
-      assert(fabs((mo_vgl_sp[i][4][k] - mo_vgl[i][4][k])) < 1.e-2) ;
-      printf("\n");
+//      printf("%d %d\n", i, k);
+      nbits = qmckl_test_precision_64(mo_vgl_sp[i][0][k], mo_vgl[i][0][k]);
+      average_prec += nbits;
+//      printf("%25.15e %25.15e %d\n", mo_vgl_sp[i][0][k], mo_vgl[i][0][k], nbits);
+      nbits = qmckl_test_precision_64(mo_vgl_sp[i][1][k], mo_vgl[i][1][k]);
+      average_prec += nbits;
+//      printf("%25.15e %25.15e %d\n", mo_vgl_sp[i][1][k], mo_vgl[i][1][k], nbits);
+      nbits = qmckl_test_precision_64(mo_vgl_sp[i][2][k], mo_vgl[i][2][k]);
+      average_prec += nbits;
+//      printf("%25.15e %25.15e %d\n", mo_vgl_sp[i][2][k], mo_vgl[i][2][k], nbits);
+      nbits = qmckl_test_precision_64(mo_vgl_sp[i][3][k], mo_vgl[i][3][k]);
+      average_prec += nbits;
+//      printf("%25.15e %25.15e %d\n", mo_vgl_sp[i][3][k], mo_vgl[i][3][k], nbits);
+      nbits = qmckl_test_precision_64(mo_vgl_sp[i][4][k], mo_vgl[i][4][k]);
+      average_prec += nbits;
+//      printf("%25.15e %25.15e %d\n", mo_vgl_sp[i][4][k], mo_vgl[i][4][k], nbits);
     }
   }
+  nbits = (int) (average_prec/(point_num*chbrclf_mo_num*5));
+  printf("Average precision for %d: %d\n", qmckl_get_numprec_precision(context), nbits);
+  fflush(stdout);
   rc = qmckl_set_numprec_precision(context,53);
+  return -1;
 
 
   /* Check selection of MOs */
diff --git a/org/qmckl_numprec.org b/org/qmckl_numprec.org
index 945b1ac..1127789 100644
--- a/org/qmckl_numprec.org
+++ b/org/qmckl_numprec.org
@@ -1,4 +1,4 @@
-#+TITLE: Numerical precision
+3+TITLE: Numerical precision
 #+SETUPFILE: ../tools/theme.setup
 #+INCLUDE: ../tools/lib.org
 
@@ -330,7 +330,7 @@ int qmckl_get_numprec_range(const qmckl_context context) {
 * Helper functions
 
 ** Epsilon
-   
+
    ~qmckl_get_numprec_epsilon~ returns $\epsilon = 2^{1-n}$ where ~n~ is the precision.
    We need to remove the sign bit from the precision.
 
@@ -366,13 +366,11 @@ double qmckl_get_numprec_epsilon(const qmckl_context context) {
    unchanged, we need a function that returns the number of unchanged
    bits in the range, and in the precision.
 
-   #+begin_src c :comments org :tangle (eval h_func) :exports none
-int32_t qmckl_test_precision_64(double a, double b);
-int32_t qmckl_test_precision_32(float a, float b);
-   #+end_src
+   For this, we first count by how many units in the last place (ulps) two
+   numbers differ.
 
    #+begin_src c :tangle (eval c)
-int32_t qmckl_test_precision_64(double a, double b) {
+int64_t countUlpDifference_64(double a, double b) {
 
   union int_or_float {
     int64_t i;
@@ -382,15 +380,38 @@ int32_t qmckl_test_precision_64(double a, double b) {
   x.f = a;
   y.f = b;
 
-  uint64_t diff = x.i > y.i ? x.i - y.i : y.i - x.i;
+  // Handle sign bit discontinuity: if the signs are different and either value is not zero
+  if ((x.i < 0) != (y.i < 0) && (x.f != 0.0) && (y.f != 0.0)) {
+    // Use the absolute values and add the distance to zero for both numbers
+    int64_t distanceToZeroForX = x.i < 0 ? INT64_MAX + x.i : INT64_MAX - x.i;
+    int64_t distanceToZeroForY = y.i < 0 ? INT64_MAX + y.i : INT64_MAX - y.i;
+    return distanceToZeroForX + distanceToZeroForY;
+  }
 
-  if (diff == 0) return 64;
+  // Calculate the difference in their binary representations
+  int64_t result = x.i - y.i;
+  result = result > 0 ? result : -result;
+  return result;
+}
+   #+end_src
 
-  int32_t result = 0;
+   #+begin_src c :comments org :tangle (eval h_func) :exports none
+int32_t qmckl_test_precision_64(double a, double b);
+int32_t qmckl_test_precision_32(float a, float b);
+   #+end_src
 
-  while (!(diff & 1)) {
+   #+begin_src c :tangle (eval c)
+int32_t qmckl_test_precision_64(double a, double b) {
+
+  int64_t diff = countUlpDifference_64(a,b);
+
+  if (diff == 0) return 53;
+
+  int32_t result = 53;
+
+  for (int i=0 ; i<53 && diff != 0 ; ++i) {
     diff >>= 1;
-    result++;
+    result--;
   }
 
   return result;
@@ -400,27 +421,7 @@ int32_t qmckl_test_precision_64(double a, double b) {
 
    #+begin_src c :tangle (eval c)
 int32_t qmckl_test_precision_32(float a, float b) {
-
-  union int_or_float {
-    int32_t i;
-    float   f;
-  } x, y;
-
-  x.f = a;
-  y.f = b;
-
-  uint32_t diff = x.i > y.i ? x.i - y.i : y.i - x.i;
-
-  if (diff == 0) return 32;
-
-  int32_t result = 0;
-
-  while (!(diff & 1)) {
-    diff >>= 1;
-    result++;
-  }
-
-  return result;
+  return qmckl_test_precision_64( (double) a, (double) b );
 }
    #+end_src
 
@@ -441,9 +442,9 @@ int32_t qmckl_test_precision_32(float a, float b) {
      end function qmckl_test_precision_64
   end interface
    #+end_src
-   
+
 * Approximate functions
-  
+
 ** Exponential
 
    Fast exponential function, adapted from Johan Rade's implementation
@@ -453,19 +454,19 @@ int32_t qmckl_test_precision_32(float a, float b) {
         N. Schraudolph, "A Fast, Compact Approximation of the Exponential Function",
         /Neural Computation/ *11*, 853–862 (1999).
         (available at https://nic.schraudolph.org/pubs/Schraudolph99.pdf)
-   
+
    #+begin_src c :tangle (eval c)
 float fastExpf(float x)
 {
   const float a = 12102203.0;
   const float b = 1064986816.0;
   x = a * x + b;
-  
+
   const float c = 8388608.0;
   const float d = 2139095040.0;
   if (x < c || x > d)
     x = (x < c) ? 0.0f : d;
-  
+
   uint32_t n = (uint32_t) x;
   memcpy(&x, &n, 4);
   return x;
@@ -482,7 +483,7 @@ double fastExp(double x)
   const double d = 9218868437227405312.0;
   if (x < c || x > d)
     x = (x < c) ? 0.0 : d;
-  
+
   uint64_t n = (uint64_t) x;
   memcpy(&x, &n, 8);
   return x;