1
0
mirror of https://github.com/TREX-CoE/irpjast.git synced 2024-12-22 20:36:08 +01:00

Split memory-intensive loop

This commit is contained in:
Anthony Scemama 2021-03-18 10:08:47 +01:00
parent ddba2253ac
commit 9d355e5752
2 changed files with 24 additions and 12 deletions

View File

@ -1,5 +1,6 @@
IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d
FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high
#FC = ifort -xCORE-AVX2 -g -mkl=sequential
FCFLAGS= -O3 -I . FCFLAGS= -O3 -I .
NINJA = ninja NINJA = ninja
ARCHIVE = ar crs ARCHIVE = ar crs

View File

@ -40,7 +40,7 @@ END_PROVIDER
END_DOC END_DOC
integer :: i, j, a, p, k, l, lmax, m, n, ii integer :: i, j, a, p, k, l, lmax, m, n, ii
double precision :: accu, cn double precision :: accu, cn, cn2
! double precision,dimension(:),allocatable :: cn ! double precision,dimension(:),allocatable :: cn
factor_een_blas = 0.0d0 factor_een_blas = 0.0d0
@ -63,27 +63,38 @@ END_PROVIDER
enddo enddo
factor_een_blas = factor_een_blas + accu * cn factor_een_blas = factor_een_blas + accu * cn
cn2 = cn+cn
do ii=1,4
do j=1,nelec
factor_een_deriv_e_blas(j,ii) = factor_een_deriv_e_blas(j,ii) + (&
tmp_c (j,a,m+l,k) *rescale_een_n_deriv_e(j,ii,a,m) + &
dtmp_c(j,ii,a,m+l,k) * rescale_een_n(j,a,m) ) * cn
enddo
enddo
do j=1,nelec
factor_een_deriv_e_blas(j,4) = factor_een_deriv_e_blas(j,4) + (&
dtmp_c(j,1,a,m+l,k) * rescale_een_n_deriv_e(j,1,a,m ) + &
dtmp_c(j,2,a,m+l,k) * rescale_een_n_deriv_e(j,2,a,m ) + &
dtmp_c(j,3,a,m+l,k) * rescale_een_n_deriv_e(j,3,a,m ) &
)*cn2
enddo
do ii=1,4 do ii=1,4
do j=1,nelec do j=1,nelec
factor_een_deriv_e_blas(j,ii) = factor_een_deriv_e_blas(j,ii) + (& factor_een_deriv_e_blas(j,ii) = factor_een_deriv_e_blas(j,ii) + (&
tmp_c(j,a,m,k) * rescale_een_n_deriv_e(j,ii,a,m+l) + & tmp_c(j,a,m,k) * rescale_een_n_deriv_e(j,ii,a,m+l) + &
dtmp_c(j,ii,a,m,k) * rescale_een_n(j,a,m+l) + & dtmp_c(j,ii,a,m,k) * rescale_een_n(j,a,m+l) ) * cn
dtmp_c(j,ii,a,m+l,k) * rescale_een_n(j,a,m) + &
tmp_c(j,a,m+l,k)*rescale_een_n_deriv_e(j,ii,a,m) &
) * cn
enddo enddo
enddo enddo
cn = cn+cn
do j=1,nelec do j=1,nelec
factor_een_deriv_e_blas(j,4) = factor_een_deriv_e_blas(j,4) + (& factor_een_deriv_e_blas(j,4) = factor_een_deriv_e_blas(j,4) + (&
dtmp_c(j,1,a,m ,k) * rescale_een_n_deriv_e(j,1,a,m+l) + & dtmp_c(j,1,a,m ,k) * rescale_een_n_deriv_e(j,1,a,m+l) + &
dtmp_c(j,2,a,m ,k) * rescale_een_n_deriv_e(j,2,a,m+l) + & dtmp_c(j,2,a,m ,k) * rescale_een_n_deriv_e(j,2,a,m+l) + &
dtmp_c(j,3,a,m ,k) * rescale_een_n_deriv_e(j,3,a,m+l) + & dtmp_c(j,3,a,m ,k) * rescale_een_n_deriv_e(j,3,a,m+l) &
dtmp_c(j,1,a,m+l,k) * rescale_een_n_deriv_e(j,1,a,m ) + & )*cn2
dtmp_c(j,2,a,m+l,k) * rescale_een_n_deriv_e(j,2,a,m ) + &
dtmp_c(j,3,a,m+l,k) * rescale_een_n_deriv_e(j,3,a,m ) &
)*cn
enddo enddo
enddo enddo