From fac03ea53b387a38be093474e85fe572c69bb097 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 9 Feb 2022 18:06:46 +0100 Subject: [PATCH] Accelerate AOs --- org/qmckl_ao.org | 128 +++++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 60 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8a6fbf6..28a831b 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3443,7 +3443,7 @@ integer function qmckl_compute_ao_basis_shell_gaussian_vgl_f( & ! Don't compute exponentials when the result will be almost zero. ! TODO : Use numerical precision here - cutoff = -dlog(1.d-15) + cutoff = -dlog(1.d-12) do inucl=1,nucl_num @@ -4094,7 +4094,7 @@ integer function qmckl_ao_polynomial_vgl_f (context, & lmax_array(1:3) = lmax if (lmax == 0) then VGL(1,1) = 1.d0 - vgL(2:5,1) = 0.d0 + VGL(2:5,1) = 0.d0 l(1:3,1) = 0 n=1 else if (lmax > 0) then @@ -4109,19 +4109,19 @@ integer function qmckl_ao_polynomial_vgl_f (context, & l (1:3,1:4) = 0 VGL(1 ,1 ) = 1.d0 - vgl(1:5,2:4) = 0.d0 + VGL(1:5,2:4) = 0.d0 l (1,2) = 1 - vgl(1,2) = pows(1,1) - vgL(2,2) = 1.d0 + VGL(1,2) = pows(1,1) + VGL(2,2) = 1.d0 l (2,3) = 1 - vgl(1,3) = pows(1,2) - vgL(3,3) = 1.d0 + VGL(1,3) = pows(1,2) + VGL(3,3) = 1.d0 l (3,4) = 1 - vgl(1,4) = pows(1,3) - vgL(4,4) = 1.d0 + VGL(1,4) = pows(1,3) + VGL(4,4) = 1.d0 n=4 endif @@ -4145,17 +4145,17 @@ integer function qmckl_ao_polynomial_vgl_f (context, & yz = pows(b,2) * pows(c,3) xz = pows(a,1) * pows(c,3) - vgl(1,n) = xy * pows(c,3) + VGL(1,n) = xy * pows(c,3) xy = dc * xy xz = db * xz yz = da * yz - vgl(2,n) = pows(a-1,1) * yz - vgl(3,n) = pows(b-1,2) * xz - vgl(4,n) = pows(c-1,3) * xy + VGL(2,n) = pows(a-1,1) * yz + VGL(3,n) = pows(b-1,2) * xz + VGL(4,n) = pows(c-1,3) * xy - vgl(5,n) = & + VGL(5,n) = & (da-1.d0) * pows(a-2,1) * yz + & (db-1.d0) * pows(b-2,2) * xz + & (dc-1.d0) * pows(c-2,3) * xy @@ -4378,7 +4378,7 @@ integer function qmckl_compute_ao_vgl_f(context, & double precision :: e_coord(3), n_coord(3) integer*8 :: n_poly - integer :: l, il, k + integer :: l, il, k, m, n integer*8 :: ipoint, inucl, ishell integer*8 :: ishell_start, ishell_end integer :: lstart(0:20) @@ -4388,25 +4388,45 @@ integer function qmckl_compute_ao_vgl_f(context, & double precision, allocatable :: poly_vgl(:,:) integer , allocatable :: powers(:,:) + integer , allocatable :: kil(:), knucl(:), kshell(:) - allocate(poly_vgl(5,ao_num), powers(3,ao_num)) + allocate(poly_vgl(8,ao_num), powers(8,ao_num)) + allocate(kil(ao_num), kshell(ao_num), knucl(nucl_num+1)) ! Pre-computed data - do l=0,20 - lstart(l) = l*(l+1)*(l+2)/6 +1 + + k=1 + do inucl=1,nucl_num + knucl(inucl) = k + ishell_start = nucleus_index(inucl) + 1 + ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell = ishell_start, ishell_end + l = shell_ang_mom(ishell) + m = l*(l+1)*(l+2)/6 +1 + n = (l+1)*(l+2)*(l+3)/6 + do il = m, n + kil(k) = il + kshell(k) = ishell + k = k+1 + end do + end do end do + knucl(nucl_num+1) = ao_num+1 + info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. ! TODO : Use numerical precision here - cutoff = -dlog(1.d-15) + cutoff = -dlog(1.d-12) do ipoint = 1, point_num e_coord(1) = coord(ipoint,1) e_coord(2) = coord(ipoint,2) e_coord(3) = coord(ipoint,3) - k=1 + + ! Express the radial part in the AO basis + do inucl=1,nucl_num n_coord(1) = nucl_coord(inucl,1) n_coord(2) = nucl_coord(inucl,2) @@ -4417,62 +4437,50 @@ integer function qmckl_compute_ao_vgl_f(context, & y = e_coord(2) - n_coord(2) z = e_coord(3) - n_coord(3) - r2 = x*x + z*z + z*z + r2 = x*x + y*y + z*z if (r2 > cutoff*nucleus_range(inucl)) then + do k = knucl(inucl), knucl(inucl+1)-1 + ao_vgl(k,ipoint,1) = 0.d0 + ao_vgl(k,ipoint,2) = 0.d0 + ao_vgl(k,ipoint,3) = 0.d0 + ao_vgl(k,ipoint,4) = 0.d0 + ao_vgl(k,ipoint,5) = 0.d0 + end do cycle end if ! Compute polynomials info = qmckl_ao_polynomial_vgl_f(context, e_coord, n_coord, & - nucleus_max_ang_mom(inucl), n_poly, powers, 3_8, & - poly_vgl, 5_8) + nucleus_max_ang_mom(inucl), n_poly, powers, 8_8, poly_vgl, 8_8) - ! Loop over shells - ishell_start = nucleus_index(inucl) + 1 - ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) - do ishell = ishell_start, ishell_end - l = shell_ang_mom(ishell) - do il = lstart(l), lstart(l+1)-1 - ! Value - ao_vgl(k,ipoint,1) = & - poly_vgl(1,il) * shell_vgl(ishell,ipoint,1) * ao_factor(k) + do k = knucl(inucl), knucl(inucl+1)-1 + y = shell_vgl(kshell(k),ipoint,1) * ao_factor(k) - ! Grad_x - ao_vgl(k,ipoint,2) = ( & - poly_vgl(2,il) * shell_vgl(ishell,ipoint,1) + & - poly_vgl(1,il) * shell_vgl(ishell,ipoint,2) & - ) * ao_factor(k) + ao_vgl(k,ipoint,1) = y * poly_vgl(1,kil(k)) + ao_vgl(k,ipoint,2) = y * poly_vgl(2,kil(k)) + ao_vgl(k,ipoint,3) = y * poly_vgl(3,kil(k)) + ao_vgl(k,ipoint,4) = y * poly_vgl(4,kil(k)) + ao_vgl(k,ipoint,5) = y * poly_vgl(5,kil(k)) - ! Grad_y - ao_vgl(k,ipoint,3) = ( & - poly_vgl(3,il) * shell_vgl(ishell,ipoint,1) + & - poly_vgl(1,il) * shell_vgl(ishell,ipoint,3) & - ) * ao_factor(k) + x = poly_vgl(1,kil(k)) * ao_factor(k) - ! Grad_z - ao_vgl(k,ipoint,4) = ( & - poly_vgl(4,il) * shell_vgl(ishell,ipoint,1) + & - poly_vgl(1,il) * shell_vgl(ishell,ipoint,4) & - ) * ao_factor(k) + ao_vgl(k,ipoint,2) = ao_vgl(k,ipoint,2) + x * shell_vgl(kshell(k),ipoint,2) + ao_vgl(k,ipoint,3) = ao_vgl(k,ipoint,3) + x * shell_vgl(kshell(k),ipoint,3) + ao_vgl(k,ipoint,4) = ao_vgl(k,ipoint,4) + x * shell_vgl(kshell(k),ipoint,4) + ao_vgl(k,ipoint,5) = ao_vgl(k,ipoint,5) + x * shell_vgl(kshell(k),ipoint,5) - ! Lapl_z - ao_vgl(k,ipoint,5) = ( & - poly_vgl(5,il) * shell_vgl(ishell,ipoint,1) + & - poly_vgl(1,il) * shell_vgl(ishell,ipoint,5) + & - 2.d0 * ( & - poly_vgl(2,il) * shell_vgl(ishell,ipoint,2) + & - poly_vgl(3,il) * shell_vgl(ishell,ipoint,3) + & - poly_vgl(4,il) * shell_vgl(ishell,ipoint,4) ) & - ) * ao_factor(k) - - k = k+1 - end do + ao_vgl(k,ipoint,5) = ao_vgl(k,ipoint,5) + & + (ao_factor(k) + ao_factor(k)) * (& + poly_vgl(2,kil(k)) * shell_vgl(kshell(k),ipoint,2) + & + poly_vgl(3,kil(k)) * shell_vgl(kshell(k),ipoint,3) + & + poly_vgl(4,kil(k)) * shell_vgl(kshell(k),ipoint,4) ) end do + end do end do - deallocate(poly_vgl, powers) + deallocate(poly_vgl, powers, kshell, kil, knucl) end function qmckl_compute_ao_vgl_f #+end_src