From 8669b46de1a561fc1760554944c503ff228d8119 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 17 Oct 2024 00:25:02 +0200
Subject: [PATCH] improv cpx erf calculation

---
 src/hartree_fock/deb_ao_2e_int.irp.f | 102 +++++++++++++++++++-
 src/utils/cpx_boys.irp.f             | 138 +++++++++++++++++----------
 src/utils/cpx_erf.irp.f              | 107 +++++++++++++++------
 3 files changed, 266 insertions(+), 81 deletions(-)

diff --git a/src/hartree_fock/deb_ao_2e_int.irp.f b/src/hartree_fock/deb_ao_2e_int.irp.f
index 29608603..94dc0bda 100644
--- a/src/hartree_fock/deb_ao_2e_int.irp.f
+++ b/src/hartree_fock/deb_ao_2e_int.irp.f
@@ -3,10 +3,11 @@ program deb_ao_2e_int
 
   implicit none
 
-  call check_ao_two_e_integral_cgtos()
+  !call check_ao_two_e_integral_cgtos()
   !call check_crint1()
   !call check_crint2()
   !call check_crint3()
+  call check_crint4()
 
 end
 
@@ -344,3 +345,102 @@ end
 
 ! ---
 
+subroutine check_crint4()
+
+  implicit none
+
+  integer                    :: i_test, n_test
+  integer                    :: i, seed_size, clock_time
+  double precision           :: xr(1), x, shift
+  double precision           :: yr(1), y
+  double precision           :: dif_re, dif_im, acc_re, nrm_re, acc_im, nrm_im
+  double precision           :: delta_ref
+  double precision           :: t1, t2, t_int1, t_int2
+  complex*16                 :: rho
+  complex*16                 :: int1, int2, int3
+  integer, allocatable       :: seed(:)
+
+
+
+  call random_seed(size=seed_size)
+  allocate(seed(seed_size))
+  call system_clock(count=clock_time)                  
+  seed = clock_time + 37 * (/ (i, i=0, seed_size-1) /) 
+  !seed = 123456789
+  call random_seed(put=seed)
+
+
+  t_int1 = 0.d0
+  t_int2 = 0.d0
+
+  n_test = 100
+  shift = 15.d0
+
+  acc_re = 0.d0
+  nrm_re = 0.d0
+  acc_im = 0.d0
+  nrm_im = 0.d0
+  do i_test = 1, n_test
+
+    call random_number(xr)
+    call random_number(yr)
+
+    x = 1.d0 * (2.d0 * shift * xr(1) - shift)
+    y = 1.d0 * (2.d0 * shift * yr(1) - shift)
+
+    rho = x + (0.d0, 1.d0) * y
+
+    call wall_time(t1)
+    call zboysfun00_1(rho, int1)
+    call wall_time(t2)
+    t_int1 = t_int1 + t2 - t1
+
+    call wall_time(t1)
+    call zboysfun00_2(rho, int2)
+    call wall_time(t2)
+    t_int2 = t_int2 + t2 - t1
+
+    dif_re = dabs(real(int1) - real(int2))
+    dif_im = dabs(aimag(int1) - aimag(int2))
+    if((dif_re .gt. 1d-10) .or. (dif_im .gt. 1d-10)) then
+      print*, ' important error found: '
+      print*, " rho = ", x, y
+      print*, real(int1), real(int2), dif_re
+      print*, aimag(int1), aimag(int2), dif_im
+      call crint_quad_12(0, rho, 10000000, int3)
+      if(zabs(int1 - int3) .lt. zabs(int2 - int3)) then
+        print*, ' implementation 2 seems to be wrong'
+      else
+        print*, ' implementation 1 seems to be wrong'
+        print*, ' quad 10000000:', real(int3), aimag(int3)
+        call crint_quad_12(0, rho, 100000000, int3)
+        print*, ' quad 100000000:', real(int3), aimag(int3)
+      endif
+      !print*, ' quad:', real(int3), aimag(int3)
+      !stop
+    endif
+
+    if((real(int1) /= real(int1)) .or. (aimag(int1) /= aimag(int1)) .or. &
+       (real(int2) /= real(int2)) .or. (aimag(int2) /= aimag(int2)) ) then
+      cycle
+    else
+      acc_re += dif_re
+      acc_im += dif_im
+      nrm_re += dabs(real(int1))
+      nrm_im += dabs(aimag(int1))
+    endif
+  enddo
+
+  print*, "accuracy on real part (%):", 100.d0 * acc_re / (nrm_re + 1d-15)
+  print*, "accuracy on imag part (%):", 100.d0 * acc_im / (nrm_im + 1d-15)
+
+  print*, "zerf_1 wall time (sec) = ", t_int1
+  print*, "zerf_2 wall time (sec) = ", t_int2
+
+
+  deallocate(seed)
+
+end
+
+! ---
+
diff --git a/src/utils/cpx_boys.irp.f b/src/utils/cpx_boys.irp.f
index 687433e4..acac1f6f 100644
--- a/src/utils/cpx_boys.irp.f
+++ b/src/utils/cpx_boys.irp.f
@@ -10,44 +10,100 @@ complex*16 function crint_1(n, rho)
   complex*16, intent(in) :: rho
 
   integer                :: i, mmax
-  double precision       :: rho_mod, rho_re, rho_im 
-  double precision       :: sq_rho_re, sq_rho_im
-  double precision       :: n_tmp
+  double precision       :: rho_mod
+  double precision       :: tmp
   complex*16             :: sq_rho, rho_inv, rho_exp
 
-  complex*16             :: crint_smallz, cpx_erf_1
+  complex*16             :: crint_smallz
 
-  rho_re  = real (rho)
-  rho_im  = aimag(rho)
-  rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im)
+  rho_mod = zabs(rho)
+
+  if(rho_mod < 3.5d0) then
+
+    if(rho_mod .lt. 0.35d0) then
+
+      select case(n)
+      case(0)
+        crint_1 = (((((((((1.3122532963802805073d-08 * rho &
+                - 1.450385222315046877d-07) * rho &
+                + 1.458916900093370682d-06) * rho &
+                - 0.132275132275132275d-04) * rho &
+                + 0.106837606837606838d-03) * rho &
+                - 0.757575757575757576d-03) * rho &
+                + 0.462962962962962963d-02) * rho &
+                - 0.238095238095238095d-01) * rho &
+                + 0.10000000000000000000d0) * rho &
+                - 0.33333333333333333333d0) * rho &
+                + 1.0d0
+      case(1)
+        crint_1 = (((((((((1.198144314086343d-08 * rho &
+                - 1.312253296380281d-07) * rho &
+                + 1.305346700083542d-06) * rho &
+                - 1.167133520074696d-05) * rho &
+                + 9.259259259259259d-05) * rho &
+                - 6.410256410256410d-04) * rho &
+                + 3.787878787878788d-03) * rho &
+                - 1.851851851851852d-02) * rho &
+                + 7.142857142857142d-02) * rho &
+                - 2.000000000000000d-01) * rho &
+                + 3.333333333333333d-01
+      case(2)
+        crint_1 = (((((((((1.102292768959436d-08 * rho &
+                - 1.198144314086343d-07) * rho &
+                + 1.181027966742252d-06) * rho &
+                - 1.044277360066834d-05) * rho &
+                + 8.169934640522875d-05) * rho &
+                - 5.555555555555556d-04) * rho &
+                + 3.205128205128205d-03) * rho &
+                - 1.515151515151515d-02) * rho &
+                + 5.555555555555555d-02) * rho &
+                - 1.428571428571428d-01) * rho &
+                + 2.000000000000000d-01 
+      case(3)
+        crint_1 = (((((((((1.020641452740218d-08 * rho &
+                - 1.102292768959436d-07) * rho &
+                + 1.078329882677709d-06) * rho &
+                - 9.448223733938020d-06) * rho &
+                + 7.309941520467836d-05) * rho &
+                - 4.901960784313725d-04) * rho &
+                + 2.777777777777778d-03) * rho &
+                - 1.282051282051282d-02) * rho &
+                + 4.545454545454546d-02) * rho &
+                - 1.111111111111111d-01) * rho &
+                + 1.428571428571428d-01 
+      case default
+        tmp = dble(n + n + 1)
+        crint_1 = (((((((((2.755731922398589d-07 * rho / (tmp + 20.d0) &
+                - 2.755731922398589d-06 / (tmp + 18.d0)) * rho &
+                + 2.480158730158730d-05 / (tmp + 16.d0)) * rho &
+                - 1.984126984126984d-04 / (tmp + 14.d0)) * rho &
+                + 1.388888888888889d-03 / (tmp + 12.d0)) * rho &
+                - 8.333333333333333d-03 / (tmp + 10.d0)) * rho &
+                + 4.166666666666666d-02 / (tmp +  8.d0)) * rho &
+                - 1.666666666666667d-01 / (tmp +  6.d0)) * rho &
+                + 5.000000000000000d-01 / (tmp +  4.d0)) * rho &
+                - 1.000000000000000d+00 / (tmp +  2.d0)) * rho &
+                + 1.0d0 / tmp
+      end select 
 
-  if(rho_mod < 10.d0) then
-    ! small z
-    if(rho_mod .lt. 1.d-15) then
-      crint_1 = 1.d0 / dble(n + n + 1)
     else
+
       crint_1 = crint_smallz(n, rho)
+
     endif
 
   else
-    ! large z
 
     if(rho_mod .gt. 40.d0) then
 
-      n_tmp = dble(n) + 0.5d0
-      crint_1 = 0.5d0 * gamma(n_tmp) / (rho**n_tmp)
+      crint_1 = 0.5d0 * gamma(dble(n) + 0.5d0) / (rho**n * zsqrt(rho))
 
     else
 
-      ! get \sqrt(rho)
-      sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod)
-      sq_rho_im = 0.5d0 * rho_im / sq_rho_re 
-      sq_rho    = sq_rho_re + (0.d0, 1.d0) * sq_rho_im
-
       rho_exp = 0.5d0 * zexp(-rho)
       rho_inv = (1.d0, 0.d0) / rho
 
-      crint_1 = 0.5d0 * sqpi * cpx_erf_1(sq_rho_re, sq_rho_im) / sq_rho
+      call zboysfun00_1(rho, crint_1)
       mmax = n
       if(mmax .gt. 0) then
         do i = 0, mmax-1
@@ -56,9 +112,9 @@ complex*16 function crint_1(n, rho)
       endif
 
     endif
-
   endif
 
+  return
 end
 
 ! ---
@@ -72,27 +128,15 @@ complex*16 function crint_sum_1(n_pt_out, rho, d1)
   complex*16, intent(in)  :: rho, d1(0:n_pt_out)
 
   integer                 :: n, i, mmax
-  double precision        :: rho_mod, rho_re, rho_im 
-  double precision        :: sq_rho_re, sq_rho_im
+  double precision        :: rho_mod
   complex*16              :: sq_rho, F0
   complex*16              :: rho_tmp, rho_inv, rho_exp
   complex*16, allocatable :: Fm(:)
 
-  complex*16              :: crint_smallz, cpx_erf_1
+  complex*16              :: crint_smallz
 
 
-  rho_re  = real (rho)
-  rho_im  = aimag(rho)
-  rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im)
-
-!  ! debug
-!  double precision        :: d1_real(0:n_pt_out)
-!  double precision        :: rint_sum
-!  do i = 0, n_pt_out
-!    d1_real(i) = real(d1(i))
-!  enddo
-!  crint_sum_1 = rint_sum(n_pt_out, rho_re, d1_real)
-!  return
+  rho_mod = zabs(rho)
 
   if(rho_mod < 10.d0) then
     ! small z
@@ -132,12 +176,7 @@ complex*16 function crint_sum_1(n_pt_out, rho, d1)
 
     else
 
-      ! get \sqrt(rho)
-      sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod)
-      sq_rho_im = 0.5d0 * rho_im / sq_rho_re 
-      sq_rho    = sq_rho_re + (0.d0, 1.d0) * sq_rho_im
-
-      F0        = 0.5d0 * sqpi * cpx_erf_1(sq_rho_re, sq_rho_im) / sq_rho
+      call zboysfun00_1(rho, F0) 
       crint_sum_1 = F0 * d1(0)
 
       rho_exp = 0.5d0 * zexp(-rho)
@@ -219,15 +258,16 @@ complex*16 function crint_2(n, rho)
   integer,    intent(in) :: n
   complex*16, intent(in) :: rho
 
-  double precision       :: tmp
-  complex*16             :: rho2
+  double precision       :: tmp, abs_rho
   complex*16             :: vals(0:n)
 
   complex*16, external   :: crint_smallz
 
-  if(abs(rho) < 3.5d0) then
+  abs_rho = zabs(rho)
 
-    if(abs(rho) .lt. 0.35d0) then
+  if(abs_rho < 3.5d0) then
+
+    if(abs_rho .lt. 0.35d0) then
 
       select case(n)
       case(0)
@@ -343,7 +383,7 @@ subroutine zboysfun(n_max, x, vals)
   integer                 :: n
   complex*16              :: yy, x_inv
 
-  call zboysfun00(x, vals(0))
+  call zboysfun00_2(x, vals(0))
 
   yy = 0.5d0 * zexp(-x)
   x_inv = (1.d0, 0.d0) / x
@@ -434,7 +474,7 @@ subroutine crint_2_vec(n_max, rho, vals)
 
   integer                 :: n
   double precision        :: tmp, abs_rho
-  complex*16              :: rho2, rho3, erho
+  complex*16              :: erho
 
 
   abs_rho = abs(rho)
@@ -855,8 +895,6 @@ subroutine crint_quad_12_vec(n_max, rho, vals)
   complex*16, intent(out) :: vals(0:n_max)
 
   integer                 :: n
-  double precision        :: tmp, abs_rho
-  complex*16              :: rho2, rho3, erho
 
   do n = 0, n_max
     call crint_quad_12(n, rho, 10000000, vals(n))
diff --git a/src/utils/cpx_erf.irp.f b/src/utils/cpx_erf.irp.f
index 914a5157..6afac49e 100644
--- a/src/utils/cpx_erf.irp.f
+++ b/src/utils/cpx_erf.irp.f
@@ -1,6 +1,37 @@
 
 ! ---
 
+subroutine zboysfun00_1(rho, F0)
+
+  implicit none
+
+  include 'constants.include.F'
+  
+  complex*16, intent(in)  :: rho
+  complex*16, intent(out) :: F0
+
+  double precision        :: rho_re, rho_im, rho_mod
+  double precision        :: sq_rho_re, sq_rho_im
+  complex*16              :: sq_rho
+
+  complex*16, external    :: cpx_erf_1
+
+
+  rho_re  = real(rho)
+  rho_im  = aimag(rho)
+  rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im)
+
+  sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod)
+  sq_rho_im = 0.5d0 * rho_im / sq_rho_re
+  sq_rho    = sq_rho_re + (0.d0, 1.d0) * sq_rho_im
+
+  F0 = 0.5d0 * sqpi * cpx_erf_1(sq_rho_re, sq_rho_im) / sq_rho
+
+  return
+end 
+
+! ---
+
 complex*16 function cpx_erf_1(x, y)
 
   BEGIN_DOC
@@ -43,6 +74,7 @@ complex*16 function cpx_erf_1(x, y)
     cpx_erf_1 = conjg(erf_tot)
   endif
 
+  return
 end
 
 ! ---
@@ -54,22 +86,42 @@ complex*16 function erf_E(x, yabs)
 
   double precision, intent(in) :: x, yabs
 
-  if((dabs(x).gt.6.d0) .or. (x==0.d0)) then
+  double precision             :: xy
+
+
+  if(dabs(x) .gt. 6.d0) then
     erf_E = (0.d0, 0.d0)
     return
   endif
 
-  if(dabs(x) .lt. 1.d-7) then
+  xy = x * yabs
 
-    erf_E = -inv_pi * (0.d0, 1.d0) * yabs
+  if(dabs(xy) .lt. 0.1d0) then
+
+    erf_E = ((((((((((((-(0.d0, 9.3968347936601903d-8) * xy + (6.5777843555621328d-7, 0.d0) &
+          ) * xy + (0.d0, 4.2755598311153869d-6)    &
+          ) * xy - (2.5653358986692322d-5, 0.d0)    &
+          ) * xy - (0.d0, 0.00014109347442680775d0) &
+          ) * xy + (0.00070546737213403885d0, 0.d0) &
+          ) * xy + (0.d0, 0.0031746031746031746d0)  &
+          ) * xy - (0.012698412698412698d0, 0.d0)   &
+          ) * xy - (0.d0, 0.044444444444444446d0)   &
+          ) * xy + (0.13333333333333333d0, 0.d0)    &
+          ) * xy + (0.d0, 0.33333333333333331d0)    &
+          ) * xy - (0.66666666666666663d0, 0.d0)    &
+          ) * xy - (0.d0, 1.d0)                     &
+          ) * xy + (1.d0, 0.d0)
+
+    erf_E = erf_E * (0.d0, 1.d0) * yabs * inv_pi * dexp(-x*x)
 
   else
 
     erf_E = 0.5d0 * inv_pi * dexp(-x*x) &
-          * ((1.d0, 0.d0) - zexp(-(2.d0, 0.d0) * x * yabs)) / x
+          * ((1.d0, 0.d0) - zexp(-(0.d0, 2.d0) * xy)) / x
 
   endif
 
+  return
 end
 
 ! ---
@@ -84,31 +136,30 @@ double precision function erf_F(x, yabs)
   integer, parameter           :: Nmax = 13
 
   integer                      :: i
-  double precision             :: tmp1, tmp2, x2, ct 
+  double precision             :: tmp1, tmp2, x2
 
 
   if(dabs(x) .gt. 5.8d0) then
-
     erf_F = 0.d0
+    return
+  endif
 
-  else
 
-    x2 = x * x
-    ct = x * inv_pi
+  x2 = x * x
 
-    erf_F = 0.d0
-    do i = 1, Nmax
+  erf_F = 0.d0
+  do i = 1, Nmax
 
-      tmp1  = 0.25d0 * dble(i) * dble(i) + x2
-      tmp2  = dexp(-tmp1) / tmp1
-      erf_F = erf_F + tmp2
+    tmp1  = 0.25d0 * dble(i*i) + x2
+    tmp2  = dexp(-tmp1) / tmp1
+    erf_F = erf_F + tmp2
 
-      if(dabs(tmp2) .lt. 1d-15) exit
-    enddo
-    erf_F = ct * erf_F
+    if(tmp2 .lt. 1d-15) exit
+  enddo
 
-  endif 
+  erf_F = x * inv_pi * erf_F
 
+  return
 end
 
 ! ---
@@ -149,6 +200,7 @@ complex*16 function erf_G(x, yabs)
 
   enddo
 
+  return
 end
 
 ! ---
@@ -163,19 +215,14 @@ complex*16 function erf_H(x, yabs)
   integer, parameter           :: Nmax = 13
 
   integer                      :: i
-  double precision             :: tmp0, tmp1, tmp_mod, x2, ct, idble
+  double precision             :: tmp0, tmp1, x2, idble
   complex*16                   :: tmp2
 
-  if(x .eq. 0.d0) then
-    erf_H = (0.d0, 0.d0)
-    return
-  endif
 
-
-  if((dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0)) then
+  if((dabs(x) .lt. 107d0) .and. (yabs .lt. 6.1d0)) then
+  !if((dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0)) then
 
     x2 = x * x
-    ct = 0.5d0 * inv_pi
 
     erf_H = 0.d0
     do i = 1, Nmax
@@ -186,10 +233,10 @@ complex*16 function erf_H(x, yabs)
       tmp2  = dexp(-tmp1-idble*yabs) * (x + (0.d0, 1.d0)*tmp0) / tmp1
       erf_H = erf_H + tmp2
 
-      tmp_mod = dsqrt(real(tmp2)*real(tmp2) + aimag(tmp2)*aimag(tmp2))
-      if(tmp_mod .lt. 1d-15) exit
+      if(zabs(tmp2) .lt. 1d-15) exit
     enddo
-    erf_H = ct * erf_H
+
+    erf_H = 0.5d0 * inv_pi * erf_H
 
   else
 
@@ -201,7 +248,7 @@ end
 
 ! ---
 
-subroutine zboysfun00(z, val)
+subroutine zboysfun00_2(z, val)
 
   BEGIN_DOC
   !