From 9d355e575260f1fdb4ed0d6b1e21e58fb28a3d9e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 18 Mar 2021 10:08:47 +0100 Subject: [PATCH] Split memory-intensive loop --- Makefile | 1 + el_nuc_el_blas.irp.f | 35 +++++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 68aa4c3..dd997d9 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high +#FC = ifort -xCORE-AVX2 -g -mkl=sequential FCFLAGS= -O3 -I . NINJA = ninja ARCHIVE = ar crs diff --git a/el_nuc_el_blas.irp.f b/el_nuc_el_blas.irp.f index 88699d2..c5d5660 100644 --- a/el_nuc_el_blas.irp.f +++ b/el_nuc_el_blas.irp.f @@ -40,7 +40,7 @@ END_PROVIDER END_DOC integer :: i, j, a, p, k, l, lmax, m, n, ii - double precision :: accu, cn + double precision :: accu, cn, cn2 ! double precision,dimension(:),allocatable :: cn factor_een_blas = 0.0d0 @@ -63,27 +63,38 @@ END_PROVIDER enddo factor_een_blas = factor_een_blas + accu * cn + cn2 = cn+cn do ii=1,4 do j=1,nelec factor_een_deriv_e_blas(j,ii) = factor_een_deriv_e_blas(j,ii) + (& - tmp_c(j,a,m,k) * rescale_een_n_deriv_e(j,ii,a,m+l) + & - dtmp_c(j,ii,a,m,k) * rescale_een_n(j,a,m+l) + & - dtmp_c(j,ii,a,m+l,k) * rescale_een_n(j,a,m) + & - tmp_c(j,a,m+l,k)*rescale_een_n_deriv_e(j,ii,a,m) & - ) * cn + tmp_c (j,a,m+l,k) *rescale_een_n_deriv_e(j,ii,a,m) + & + dtmp_c(j,ii,a,m+l,k) * rescale_een_n(j,a,m) ) * cn + enddo + enddo + + do j=1,nelec + factor_een_deriv_e_blas(j,4) = factor_een_deriv_e_blas(j,4) + (& + dtmp_c(j,1,a,m+l,k) * rescale_een_n_deriv_e(j,1,a,m ) + & + dtmp_c(j,2,a,m+l,k) * rescale_een_n_deriv_e(j,2,a,m ) + & + dtmp_c(j,3,a,m+l,k) * rescale_een_n_deriv_e(j,3,a,m ) & + )*cn2 + enddo + + + do ii=1,4 + do j=1,nelec + factor_een_deriv_e_blas(j,ii) = factor_een_deriv_e_blas(j,ii) + (& + tmp_c(j,a,m,k) * rescale_een_n_deriv_e(j,ii,a,m+l) + & + dtmp_c(j,ii,a,m,k) * rescale_een_n(j,a,m+l) ) * cn enddo enddo - cn = cn+cn do j=1,nelec factor_een_deriv_e_blas(j,4) = factor_een_deriv_e_blas(j,4) + (& dtmp_c(j,1,a,m ,k) * rescale_een_n_deriv_e(j,1,a,m+l) + & dtmp_c(j,2,a,m ,k) * rescale_een_n_deriv_e(j,2,a,m+l) + & - dtmp_c(j,3,a,m ,k) * rescale_een_n_deriv_e(j,3,a,m+l) + & - dtmp_c(j,1,a,m+l,k) * rescale_een_n_deriv_e(j,1,a,m ) + & - dtmp_c(j,2,a,m+l,k) * rescale_een_n_deriv_e(j,2,a,m ) + & - dtmp_c(j,3,a,m+l,k) * rescale_een_n_deriv_e(j,3,a,m ) & - )*cn + dtmp_c(j,3,a,m ,k) * rescale_een_n_deriv_e(j,3,a,m+l) & + )*cn2 enddo enddo