diff --git a/Makefile b/Makefile index dd3222f..a2dc791 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ IRPF90 = irpf90 --codelet=factor_een:100000 #-a -d -FC = ifort -xHost -g +FC = ifort -xHost -g -mkl=sequential FCFLAGS= -O2 -ffree-line-length-none -I . NINJA = ninja AR = ar diff --git a/README.org b/README.org index 223b723..0ed98e9 100644 --- a/README.org +++ b/README.org @@ -1,3 +1,36 @@ * IRPJAST -CHAMP's Jastrow factor computation using the IRPF90 method + CHAMP's Jastrow factor computation using the IRPF90 method + + Original equation: + + $$ + \sum_{i=2}^{Ne} \sum_{j=1}^i \sum_{pkl} \sum_a^{Nn} c_{apkl}\, r_{ij}^k\, ( R_{ia}^l + R_{ja}^l) ( R_{ia} R_{ja})^m + $$ + + Expanding, one obtains: + + $$ + \sum_{i=2}^{Ne} \sum_{j=1}^i \sum_{pkl} \sum_a^{Nn} c_{apkl} R_{ia}^{p-k-l}\, r_{ij}^k\, R_{ja}^{p-k+l} + c_{apkl} R_{ia}^{p-k+l}\, r_{ij}^k\, R_{ja}^{p-k-l} + $$ + + The equation is symmetric if we exchange $i$ and $j$, so we can rewrite + + $$ + \sum_{i=1}^{Ne} \sum_{j=1}^{Ne} \sum_{pkl} \sum_a^{Nn} c_{apkl} R_{ia}^{p-k-l}\, r_{ij}^k\, R_{ja}^{p-k+l} + $$ + + If we reshape $R_{ja}^p$ as a matrix $R_{j,al}$ of size + $N_e \times N_n(N_c+1)$, + for every $k$, we can pre-compute the matrix product + + $$ + C_{i,al}^{k} = \sum_j r_{ij}^k\, R_{i,al} + $$ + + and express the total Jastrow as: + + $$ + \sum_{i=1}^{Ne} \sum_{pkl} \sum_a^{Nn} + c_{apkl} R_{ia}^{p-k-l}\, C_{i,a(p-k+l)}^k + $$ diff --git a/deriv_num b/deriv_num index 97d3421..0d892b8 100755 Binary files a/deriv_num and b/deriv_num differ diff --git a/el_nuc_el.irp.f b/el_nuc_el.irp.f index 36e4313..bff5d4d 100644 --- a/el_nuc_el.irp.f +++ b/el_nuc_el.irp.f @@ -2,10 +2,17 @@ BEGIN_PROVIDER [ double precision, factor_een ] implicit none BEGIN_DOC ! ElectronE-electron-nuclei contribution to Jastrow factor + ! + ! 5436.20340250000 END_DOC integer :: i, j, a, p, k, l, lmax, m, n double precision :: cn, accu2, accu double precision :: f(nnuc,0:ncord-2,0:ncord-2) + double precision :: tmp_c(nelec,nnuc,0:ncord,0:ncord-1) + + factor_een = factor_een_blas + return + factor_een = 0.0d0 @@ -18,21 +25,25 @@ BEGIN_PROVIDER [ double precision, factor_een ] endif do l = 0, lmax if ( iand(p - k - l, 1) == 1) cycle + m = (p - k - l) / 2 + do a = 1, nnuc - cn = cord_vect_lkp(l, k, p, typenuc_arr(a)) accu2 = 0.d0 + cn = cord_vect_lkp(l, k, p, typenuc_arr(a)) do j = 2, nelec accu = 0.d0 - do i = 1, j - 1 + do i = 1, j-1 accu = accu + rescale_een_e(i,j,k) * & - (rescale_een_n(i,a,l) + rescale_een_n(j,a,l)) * & - rescale_een_n(i,a,m) + rescale_een_n(i,a,m) * rescale_een_n(j,a,m+l) + & + rescale_een_e(i,j,k) * & + rescale_een_n(i,a,m+l) * rescale_een_n(j,a,m) enddo - accu2 = accu2 + accu * rescale_een_n(j, a, m) + accu2 = accu2 + accu enddo factor_een = factor_een + accu2 * cn enddo + enddo enddo enddo @@ -77,7 +88,7 @@ BEGIN_PROVIDER [ double precision, factor_een_deriv_e, (4, nelec) ] enddo do i = 1, nelec - rial = rescale_een_n(i, a, l) + rial = rescale_een_n(i, a, l) + rjal riam = rescale_een_n(i, a, m) rijk = rescale_een_e(i, j, k) @@ -85,24 +96,24 @@ BEGIN_PROVIDER [ double precision, factor_een_deriv_e, (4, nelec) ] drijk(ii) = rescale_een_e_deriv_e(ii, j, i, k) enddo - v1 = rijk * (rial + rjal) ! v(x) + v1 = rijk * rial ! v(x) v2 = rjam_cn * riam ! u(x) lap1 = 0.0d0 lap2 = 0.0d0 do ii = 1, 3 - d1 = drijk(ii) * (rial + rjal) + rijk * drjal(ii) + d1 = drijk(ii) * rial + rijk * drjal(ii) d2 = drjam_cn(ii) * riam lap1 = lap1 + d1 * d2 lap2 = lap2 + drijk(ii) * drjal(ii) - factor_een_deriv_e(ii, j) += v1 * d2 + d1 * v2 + factor_een_deriv_e(ii, j) = factor_een_deriv_e(ii, j) + v1 * d2 + d1 * v2 enddo ! v(x) u''(x) + 2 * u'(x) v'(x) + u(x) v''(x) ii = 4 - d1 = drijk(ii) * (rial + rjal) + rijk * drjal(ii) + 2.0d0 * lap2 + d1 = drijk(ii) * rial + rijk * drjal(ii) + lap2 + lap2 d2 = drjam_cn(ii) * riam - factor_een_deriv_e(ii, j) += v1 * d2 + d1 * v2 + 2.0d0 * lap1 + factor_een_deriv_e(ii, j) = factor_een_deriv_e(ii, j) + v1 * d2 + d1 * v2 + lap1 + lap1 enddo enddo diff --git a/el_nuc_el_blas.irp.f b/el_nuc_el_blas.irp.f new file mode 100644 index 0000000..16fde54 --- /dev/null +++ b/el_nuc_el_blas.irp.f @@ -0,0 +1,126 @@ +BEGIN_PROVIDER [ double precision, factor_een_blas ] + implicit none + BEGIN_DOC + ! ElectronE-electron-nuclei contribution to Jastrow factor + ! + ! 4124.84239750000 + END_DOC + integer :: i, j, a, p, k, l, lmax, m, n + double precision :: cn(nnuc), accu + double precision :: f(nnuc,0:ncord-2,0:ncord-2) + double precision :: tmp_c(nelec,nnuc,0:ncord,0:ncord-1) + + factor_een_blas = 0.0d0 + + ! r_{ij}^k . R_{ja}^l -> tmp_c_{ia}^{kl} + do k=0,ncord-1 + call dgemm('N','N', nelec, nnuc*(ncord+1), nelec, 1.d0, & + rescale_een_e(1,1,k), size(rescale_een_e,1), & + rescale_een_n(1,1,0), size(rescale_een_n,1), 0.d0, & + tmp_c(1,1,0,k), size(tmp_c,1)) + enddo + + do p = 2, ncord + do k = 0, p - 1 + m = p-k + if (k > 0) then + lmax = m + else + lmax = m - 2 + endif + + n = shiftr(m,1) + do l = iand(m, 1), lmax, 2 + + do a = 1, nnuc + cn(a) = cord_vect_lkp(l, k, p, typenuc_arr(a)) + enddo + + do a = 1, nnuc + accu = 0.d0 + do i=1,nelec + accu = accu + rescale_een_n(i,a,n) * tmp_c(i,a,n+l,k) + enddo + factor_een_blas = factor_een_blas + accu * cn(a) + enddo + n = n-1 + + enddo + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, factor_een_deriv_e_blas, (4, nelec) ] + implicit none + BEGIN_DOC + ! Dimensions 1-3 : dx, dy, dz + ! Dimension 4 : d2x + d2y + d2z + END_DOC + integer :: i, ii, j, a, p, k, l, lmax, m + double precision :: riam, rjam_cn, rial, rjal, rijk + double precision, dimension(4) :: driam, drjam_cn, drial, drjal, drijk + double precision :: cn, v1, v2, d1, d2, lap1, lap2 + + factor_een_deriv_e_blas = 0.0d0 + + do p = 2, ncord + do k = 0 , p - 1 + if (k /= 0) then + lmax = p - k + else + lmax = p - k - 2 + endif + + do l = 0, lmax + if ( iand(p - k - l, 1) == 1) cycle + m = (p - k - l) / 2 + + do a = 1, nnuc + cn = cord_vect_lkp(l, k, p, typenuc_arr(a)) + + do j = 1, nelec + rjal = rescale_een_n(j, a, l) + rjam_cn = rescale_een_n(j, a, m) * cn + + do ii = 1, 4 + drjal(ii) = rescale_een_n_deriv_e(ii, j, a, l) + drjam_cn(ii) = rescale_een_n_deriv_e(ii, j, a, m) * cn + enddo + + do i = 1, nelec + rial = rescale_een_n(i, a, l) + rjal + riam = rescale_een_n(i, a, m) + rijk = rescale_een_e(i, j, k) + + do ii = 1, 4 + drijk(ii) = rescale_een_e_deriv_e(ii, j, i, k) + enddo + + v1 = rijk * rial ! v(x) + v2 = rjam_cn * riam ! u(x) + + lap1 = 0.0d0 + lap2 = 0.0d0 + do ii = 1, 3 + d1 = drijk(ii) * rial + rijk * drjal(ii) + d2 = drjam_cn(ii) * riam + lap1 = lap1 + d1 * d2 + lap2 = lap2 + drijk(ii) * drjal(ii) + factor_een_deriv_e_blas(ii, j) = factor_een_deriv_e_blas(ii, j) + v1 * d2 + d1 * v2 + enddo + + ! v(x) u''(x) + 2 * u'(x) v'(x) + u(x) v''(x) + ii = 4 + d1 = drijk(ii) * rial + rijk * drjal(ii) + lap2 + lap2 + d2 = drjam_cn(ii) * riam + factor_een_deriv_e_blas(ii, j) = factor_een_deriv_e_blas(ii, j) + v1 * d2 + d1 * v2 + lap1 + lap1 + + enddo + enddo + enddo + enddo + enddo + enddo + +END_PROVIDER diff --git a/jastrow b/jastrow index 68e5eea..4fd9e54 100755 Binary files a/jastrow and b/jastrow differ