From 8669b46de1a561fc1760554944c503ff228d8119 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 17 Oct 2024 00:25:02 +0200 Subject: [PATCH] improv cpx erf calculation --- src/hartree_fock/deb_ao_2e_int.irp.f | 102 +++++++++++++++++++- src/utils/cpx_boys.irp.f | 138 +++++++++++++++++---------- src/utils/cpx_erf.irp.f | 107 +++++++++++++++------ 3 files changed, 266 insertions(+), 81 deletions(-) diff --git a/src/hartree_fock/deb_ao_2e_int.irp.f b/src/hartree_fock/deb_ao_2e_int.irp.f index 29608603..94dc0bda 100644 --- a/src/hartree_fock/deb_ao_2e_int.irp.f +++ b/src/hartree_fock/deb_ao_2e_int.irp.f @@ -3,10 +3,11 @@ program deb_ao_2e_int implicit none - call check_ao_two_e_integral_cgtos() + !call check_ao_two_e_integral_cgtos() !call check_crint1() !call check_crint2() !call check_crint3() + call check_crint4() end @@ -344,3 +345,102 @@ end ! --- +subroutine check_crint4() + + implicit none + + integer :: i_test, n_test + integer :: i, seed_size, clock_time + double precision :: xr(1), x, shift + double precision :: yr(1), y + double precision :: dif_re, dif_im, acc_re, nrm_re, acc_im, nrm_im + double precision :: delta_ref + double precision :: t1, t2, t_int1, t_int2 + complex*16 :: rho + complex*16 :: int1, int2, int3 + integer, allocatable :: seed(:) + + + + call random_seed(size=seed_size) + allocate(seed(seed_size)) + call system_clock(count=clock_time) + seed = clock_time + 37 * (/ (i, i=0, seed_size-1) /) + !seed = 123456789 + call random_seed(put=seed) + + + t_int1 = 0.d0 + t_int2 = 0.d0 + + n_test = 100 + shift = 15.d0 + + acc_re = 0.d0 + nrm_re = 0.d0 + acc_im = 0.d0 + nrm_im = 0.d0 + do i_test = 1, n_test + + call random_number(xr) + call random_number(yr) + + x = 1.d0 * (2.d0 * shift * xr(1) - shift) + y = 1.d0 * (2.d0 * shift * yr(1) - shift) + + rho = x + (0.d0, 1.d0) * y + + call wall_time(t1) + call zboysfun00_1(rho, int1) + call wall_time(t2) + t_int1 = t_int1 + t2 - t1 + + call wall_time(t1) + call zboysfun00_2(rho, int2) + call wall_time(t2) + t_int2 = t_int2 + t2 - t1 + + dif_re = dabs(real(int1) - real(int2)) + dif_im = dabs(aimag(int1) - aimag(int2)) + if((dif_re .gt. 1d-10) .or. (dif_im .gt. 1d-10)) then + print*, ' important error found: ' + print*, " rho = ", x, y + print*, real(int1), real(int2), dif_re + print*, aimag(int1), aimag(int2), dif_im + call crint_quad_12(0, rho, 10000000, int3) + if(zabs(int1 - int3) .lt. zabs(int2 - int3)) then + print*, ' implementation 2 seems to be wrong' + else + print*, ' implementation 1 seems to be wrong' + print*, ' quad 10000000:', real(int3), aimag(int3) + call crint_quad_12(0, rho, 100000000, int3) + print*, ' quad 100000000:', real(int3), aimag(int3) + endif + !print*, ' quad:', real(int3), aimag(int3) + !stop + endif + + if((real(int1) /= real(int1)) .or. (aimag(int1) /= aimag(int1)) .or. & + (real(int2) /= real(int2)) .or. (aimag(int2) /= aimag(int2)) ) then + cycle + else + acc_re += dif_re + acc_im += dif_im + nrm_re += dabs(real(int1)) + nrm_im += dabs(aimag(int1)) + endif + enddo + + print*, "accuracy on real part (%):", 100.d0 * acc_re / (nrm_re + 1d-15) + print*, "accuracy on imag part (%):", 100.d0 * acc_im / (nrm_im + 1d-15) + + print*, "zerf_1 wall time (sec) = ", t_int1 + print*, "zerf_2 wall time (sec) = ", t_int2 + + + deallocate(seed) + +end + +! --- + diff --git a/src/utils/cpx_boys.irp.f b/src/utils/cpx_boys.irp.f index 687433e4..acac1f6f 100644 --- a/src/utils/cpx_boys.irp.f +++ b/src/utils/cpx_boys.irp.f @@ -10,44 +10,100 @@ complex*16 function crint_1(n, rho) complex*16, intent(in) :: rho integer :: i, mmax - double precision :: rho_mod, rho_re, rho_im - double precision :: sq_rho_re, sq_rho_im - double precision :: n_tmp + double precision :: rho_mod + double precision :: tmp complex*16 :: sq_rho, rho_inv, rho_exp - complex*16 :: crint_smallz, cpx_erf_1 + complex*16 :: crint_smallz - rho_re = real (rho) - rho_im = aimag(rho) - rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) + rho_mod = zabs(rho) + + if(rho_mod < 3.5d0) then + + if(rho_mod .lt. 0.35d0) then + + select case(n) + case(0) + crint_1 = (((((((((1.3122532963802805073d-08 * rho & + - 1.450385222315046877d-07) * rho & + + 1.458916900093370682d-06) * rho & + - 0.132275132275132275d-04) * rho & + + 0.106837606837606838d-03) * rho & + - 0.757575757575757576d-03) * rho & + + 0.462962962962962963d-02) * rho & + - 0.238095238095238095d-01) * rho & + + 0.10000000000000000000d0) * rho & + - 0.33333333333333333333d0) * rho & + + 1.0d0 + case(1) + crint_1 = (((((((((1.198144314086343d-08 * rho & + - 1.312253296380281d-07) * rho & + + 1.305346700083542d-06) * rho & + - 1.167133520074696d-05) * rho & + + 9.259259259259259d-05) * rho & + - 6.410256410256410d-04) * rho & + + 3.787878787878788d-03) * rho & + - 1.851851851851852d-02) * rho & + + 7.142857142857142d-02) * rho & + - 2.000000000000000d-01) * rho & + + 3.333333333333333d-01 + case(2) + crint_1 = (((((((((1.102292768959436d-08 * rho & + - 1.198144314086343d-07) * rho & + + 1.181027966742252d-06) * rho & + - 1.044277360066834d-05) * rho & + + 8.169934640522875d-05) * rho & + - 5.555555555555556d-04) * rho & + + 3.205128205128205d-03) * rho & + - 1.515151515151515d-02) * rho & + + 5.555555555555555d-02) * rho & + - 1.428571428571428d-01) * rho & + + 2.000000000000000d-01 + case(3) + crint_1 = (((((((((1.020641452740218d-08 * rho & + - 1.102292768959436d-07) * rho & + + 1.078329882677709d-06) * rho & + - 9.448223733938020d-06) * rho & + + 7.309941520467836d-05) * rho & + - 4.901960784313725d-04) * rho & + + 2.777777777777778d-03) * rho & + - 1.282051282051282d-02) * rho & + + 4.545454545454546d-02) * rho & + - 1.111111111111111d-01) * rho & + + 1.428571428571428d-01 + case default + tmp = dble(n + n + 1) + crint_1 = (((((((((2.755731922398589d-07 * rho / (tmp + 20.d0) & + - 2.755731922398589d-06 / (tmp + 18.d0)) * rho & + + 2.480158730158730d-05 / (tmp + 16.d0)) * rho & + - 1.984126984126984d-04 / (tmp + 14.d0)) * rho & + + 1.388888888888889d-03 / (tmp + 12.d0)) * rho & + - 8.333333333333333d-03 / (tmp + 10.d0)) * rho & + + 4.166666666666666d-02 / (tmp + 8.d0)) * rho & + - 1.666666666666667d-01 / (tmp + 6.d0)) * rho & + + 5.000000000000000d-01 / (tmp + 4.d0)) * rho & + - 1.000000000000000d+00 / (tmp + 2.d0)) * rho & + + 1.0d0 / tmp + end select - if(rho_mod < 10.d0) then - ! small z - if(rho_mod .lt. 1.d-15) then - crint_1 = 1.d0 / dble(n + n + 1) else + crint_1 = crint_smallz(n, rho) + endif else - ! large z if(rho_mod .gt. 40.d0) then - n_tmp = dble(n) + 0.5d0 - crint_1 = 0.5d0 * gamma(n_tmp) / (rho**n_tmp) + crint_1 = 0.5d0 * gamma(dble(n) + 0.5d0) / (rho**n * zsqrt(rho)) else - ! get \sqrt(rho) - sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) - sq_rho_im = 0.5d0 * rho_im / sq_rho_re - sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im - rho_exp = 0.5d0 * zexp(-rho) rho_inv = (1.d0, 0.d0) / rho - crint_1 = 0.5d0 * sqpi * cpx_erf_1(sq_rho_re, sq_rho_im) / sq_rho + call zboysfun00_1(rho, crint_1) mmax = n if(mmax .gt. 0) then do i = 0, mmax-1 @@ -56,9 +112,9 @@ complex*16 function crint_1(n, rho) endif endif - endif + return end ! --- @@ -72,27 +128,15 @@ complex*16 function crint_sum_1(n_pt_out, rho, d1) complex*16, intent(in) :: rho, d1(0:n_pt_out) integer :: n, i, mmax - double precision :: rho_mod, rho_re, rho_im - double precision :: sq_rho_re, sq_rho_im + double precision :: rho_mod complex*16 :: sq_rho, F0 complex*16 :: rho_tmp, rho_inv, rho_exp complex*16, allocatable :: Fm(:) - complex*16 :: crint_smallz, cpx_erf_1 + complex*16 :: crint_smallz - rho_re = real (rho) - rho_im = aimag(rho) - rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) - -! ! debug -! double precision :: d1_real(0:n_pt_out) -! double precision :: rint_sum -! do i = 0, n_pt_out -! d1_real(i) = real(d1(i)) -! enddo -! crint_sum_1 = rint_sum(n_pt_out, rho_re, d1_real) -! return + rho_mod = zabs(rho) if(rho_mod < 10.d0) then ! small z @@ -132,12 +176,7 @@ complex*16 function crint_sum_1(n_pt_out, rho, d1) else - ! get \sqrt(rho) - sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) - sq_rho_im = 0.5d0 * rho_im / sq_rho_re - sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im - - F0 = 0.5d0 * sqpi * cpx_erf_1(sq_rho_re, sq_rho_im) / sq_rho + call zboysfun00_1(rho, F0) crint_sum_1 = F0 * d1(0) rho_exp = 0.5d0 * zexp(-rho) @@ -219,15 +258,16 @@ complex*16 function crint_2(n, rho) integer, intent(in) :: n complex*16, intent(in) :: rho - double precision :: tmp - complex*16 :: rho2 + double precision :: tmp, abs_rho complex*16 :: vals(0:n) complex*16, external :: crint_smallz - if(abs(rho) < 3.5d0) then + abs_rho = zabs(rho) - if(abs(rho) .lt. 0.35d0) then + if(abs_rho < 3.5d0) then + + if(abs_rho .lt. 0.35d0) then select case(n) case(0) @@ -343,7 +383,7 @@ subroutine zboysfun(n_max, x, vals) integer :: n complex*16 :: yy, x_inv - call zboysfun00(x, vals(0)) + call zboysfun00_2(x, vals(0)) yy = 0.5d0 * zexp(-x) x_inv = (1.d0, 0.d0) / x @@ -434,7 +474,7 @@ subroutine crint_2_vec(n_max, rho, vals) integer :: n double precision :: tmp, abs_rho - complex*16 :: rho2, rho3, erho + complex*16 :: erho abs_rho = abs(rho) @@ -855,8 +895,6 @@ subroutine crint_quad_12_vec(n_max, rho, vals) complex*16, intent(out) :: vals(0:n_max) integer :: n - double precision :: tmp, abs_rho - complex*16 :: rho2, rho3, erho do n = 0, n_max call crint_quad_12(n, rho, 10000000, vals(n)) diff --git a/src/utils/cpx_erf.irp.f b/src/utils/cpx_erf.irp.f index 914a5157..6afac49e 100644 --- a/src/utils/cpx_erf.irp.f +++ b/src/utils/cpx_erf.irp.f @@ -1,6 +1,37 @@ ! --- +subroutine zboysfun00_1(rho, F0) + + implicit none + + include 'constants.include.F' + + complex*16, intent(in) :: rho + complex*16, intent(out) :: F0 + + double precision :: rho_re, rho_im, rho_mod + double precision :: sq_rho_re, sq_rho_im + complex*16 :: sq_rho + + complex*16, external :: cpx_erf_1 + + + rho_re = real(rho) + rho_im = aimag(rho) + rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) + + sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) + sq_rho_im = 0.5d0 * rho_im / sq_rho_re + sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im + + F0 = 0.5d0 * sqpi * cpx_erf_1(sq_rho_re, sq_rho_im) / sq_rho + + return +end + +! --- + complex*16 function cpx_erf_1(x, y) BEGIN_DOC @@ -43,6 +74,7 @@ complex*16 function cpx_erf_1(x, y) cpx_erf_1 = conjg(erf_tot) endif + return end ! --- @@ -54,22 +86,42 @@ complex*16 function erf_E(x, yabs) double precision, intent(in) :: x, yabs - if((dabs(x).gt.6.d0) .or. (x==0.d0)) then + double precision :: xy + + + if(dabs(x) .gt. 6.d0) then erf_E = (0.d0, 0.d0) return endif - if(dabs(x) .lt. 1.d-7) then + xy = x * yabs - erf_E = -inv_pi * (0.d0, 1.d0) * yabs + if(dabs(xy) .lt. 0.1d0) then + + erf_E = ((((((((((((-(0.d0, 9.3968347936601903d-8) * xy + (6.5777843555621328d-7, 0.d0) & + ) * xy + (0.d0, 4.2755598311153869d-6) & + ) * xy - (2.5653358986692322d-5, 0.d0) & + ) * xy - (0.d0, 0.00014109347442680775d0) & + ) * xy + (0.00070546737213403885d0, 0.d0) & + ) * xy + (0.d0, 0.0031746031746031746d0) & + ) * xy - (0.012698412698412698d0, 0.d0) & + ) * xy - (0.d0, 0.044444444444444446d0) & + ) * xy + (0.13333333333333333d0, 0.d0) & + ) * xy + (0.d0, 0.33333333333333331d0) & + ) * xy - (0.66666666666666663d0, 0.d0) & + ) * xy - (0.d0, 1.d0) & + ) * xy + (1.d0, 0.d0) + + erf_E = erf_E * (0.d0, 1.d0) * yabs * inv_pi * dexp(-x*x) else erf_E = 0.5d0 * inv_pi * dexp(-x*x) & - * ((1.d0, 0.d0) - zexp(-(2.d0, 0.d0) * x * yabs)) / x + * ((1.d0, 0.d0) - zexp(-(0.d0, 2.d0) * xy)) / x endif + return end ! --- @@ -84,31 +136,30 @@ double precision function erf_F(x, yabs) integer, parameter :: Nmax = 13 integer :: i - double precision :: tmp1, tmp2, x2, ct + double precision :: tmp1, tmp2, x2 if(dabs(x) .gt. 5.8d0) then - erf_F = 0.d0 + return + endif - else - x2 = x * x - ct = x * inv_pi + x2 = x * x - erf_F = 0.d0 - do i = 1, Nmax + erf_F = 0.d0 + do i = 1, Nmax - tmp1 = 0.25d0 * dble(i) * dble(i) + x2 - tmp2 = dexp(-tmp1) / tmp1 - erf_F = erf_F + tmp2 + tmp1 = 0.25d0 * dble(i*i) + x2 + tmp2 = dexp(-tmp1) / tmp1 + erf_F = erf_F + tmp2 - if(dabs(tmp2) .lt. 1d-15) exit - enddo - erf_F = ct * erf_F + if(tmp2 .lt. 1d-15) exit + enddo - endif + erf_F = x * inv_pi * erf_F + return end ! --- @@ -149,6 +200,7 @@ complex*16 function erf_G(x, yabs) enddo + return end ! --- @@ -163,19 +215,14 @@ complex*16 function erf_H(x, yabs) integer, parameter :: Nmax = 13 integer :: i - double precision :: tmp0, tmp1, tmp_mod, x2, ct, idble + double precision :: tmp0, tmp1, x2, idble complex*16 :: tmp2 - if(x .eq. 0.d0) then - erf_H = (0.d0, 0.d0) - return - endif - - if((dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0)) then + if((dabs(x) .lt. 107d0) .and. (yabs .lt. 6.1d0)) then + !if((dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0)) then x2 = x * x - ct = 0.5d0 * inv_pi erf_H = 0.d0 do i = 1, Nmax @@ -186,10 +233,10 @@ complex*16 function erf_H(x, yabs) tmp2 = dexp(-tmp1-idble*yabs) * (x + (0.d0, 1.d0)*tmp0) / tmp1 erf_H = erf_H + tmp2 - tmp_mod = dsqrt(real(tmp2)*real(tmp2) + aimag(tmp2)*aimag(tmp2)) - if(tmp_mod .lt. 1d-15) exit + if(zabs(tmp2) .lt. 1d-15) exit enddo - erf_H = ct * erf_H + + erf_H = 0.5d0 * inv_pi * erf_H else @@ -201,7 +248,7 @@ end ! --- -subroutine zboysfun00(z, val) +subroutine zboysfun00_2(z, val) BEGIN_DOC !