From 3c7a10934f51aea2b97ea3196fae6442b1c0030a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 19:54:00 +0200 Subject: [PATCH] Accelerated Cholesky --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- src/ao_two_e_ints/cholesky.irp.f | 42 ++++++++++++++++++++------------ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/external/ezfio b/external/ezfio index 0520b5e2..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index e0d0e02e..f40bde09 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 +Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6a78e9ff..dc5040be 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -73,7 +73,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 - double precision, allocatable :: D(:), Delta(:,:) + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:) integer :: i,j,k,m,p,q, qj, dj @@ -138,7 +138,16 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - allocate(Delta(np,nq)) + allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) do m=1,nq do k=1,np @@ -149,17 +158,13 @@ subroutine direct_cholesky(L, ndim, rank, tau) addr(2,Dset(m)), & ao_integrals_map) enddo - - do p=1,N - f = L(Dset(m),p) - do k=1,np - Delta(k,m) = Delta(k,m) - L(Lset(k),p) * f - enddo - enddo enddo !$OMP END PARALLEL DO - ! f. + call dgemm('N','T',np,nq,N,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) @@ -184,19 +189,26 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! iii. f = 1.d0/dsqrt(Qmax) do p=1,np - L(Lset(p), rank) = Delta(p,dj) * f + Ltmp_p(p,1) = Delta(p,dj) * f + L(Lset(p), rank) = Ltmp_p(p,1) + enddo + + do q=1,nq + Ltmp_q(q,1) = L(Dset(q), rank) enddo ! iv. +! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) + !$OMP PARALLEL DO PRIVATE(f,m,k) do m=1, nq - f = L(Dset(m),rank) do k=1, np - Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * f + Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo + !$OMP END PARALLEL DO do k=1, np - D(Lset(k)) = D(Lset(k)) - L(Lset(k),rank) * L(Lset(k),rank) + D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) enddo Qmax = D(Dset(1)) @@ -206,7 +218,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo - deallocate(Delta) + deallocate(Delta, Ltmp_p, Ltmp_q) ! i. N = N+j