diff --git a/Makefile b/Makefile index acfa64c..68aa4c3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ -IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 #-s nelec:10 -s nnuc:2 -s ncord:5 #-a -d -FC = ifort -xCORE-AVX512 -g -mkl=sequential -FCFLAGS= -O2 -I . +IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d +FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high +FCFLAGS= -O3 -I . NINJA = ninja ARCHIVE = ar crs RANLIB = ranlib diff --git a/codelet_factor_een_blas.irp.f b/codelet_factor_een_blas.irp.f index e3e62da..26f4004 100644 --- a/codelet_factor_een_blas.irp.f +++ b/codelet_factor_een_blas.irp.f @@ -3,7 +3,7 @@ program codelet_factor_een_blas implicit none integer :: i double precision :: ticks_0, ticks_1, cpu_0, cpu_1 - integer, parameter :: irp_imax = 200 + integer, parameter :: irp_imax = 100 PROVIDE factor_een_blas tmp_c diff --git a/el_nuc_el_blas.irp.f b/el_nuc_el_blas.irp.f index b6d7580..88699d2 100644 --- a/el_nuc_el_blas.irp.f +++ b/el_nuc_el_blas.irp.f @@ -1,5 +1,5 @@ - BEGIN_PROVIDER [ double precision, tmp_c, (nelec,nnuc,0:ncord,0:ncord-1) ] -&BEGIN_PROVIDER [ double precision, dtmp_c, (nelec,4,nnuc,0:ncord,0:ncord-1) ] + BEGIN_PROVIDER [ double precision, tmp_c, (nelec_8,nnuc,0:ncord,0:ncord-1) ] +&BEGIN_PROVIDER [ double precision, dtmp_c, (nelec_8,4,nnuc,0:ncord,0:ncord-1) ] implicit none BEGIN_DOC ! Calculate the intermediate buffers @@ -32,7 +32,7 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, factor_een_blas ] -&BEGIN_PROVIDER [ double precision, factor_een_deriv_e_blas, (nelec,4) ] +&BEGIN_PROVIDER [ double precision, factor_een_deriv_e_blas, (nelec_8,4) ] implicit none BEGIN_DOC ! Dimensions 1-3 : dx, dy, dz @@ -44,7 +44,7 @@ END_PROVIDER ! double precision,dimension(:),allocatable :: cn factor_een_blas = 0.0d0 - factor_een_deriv_e_blas(1:nelec,1:4) = 0.0d0 + factor_een_deriv_e_blas(:,:) = 0.0d0 do n = 1, dim_cord_vect diff --git a/electrons.irp.f b/electrons.irp.f index b2096d6..5161d34 100644 --- a/electrons.irp.f +++ b/electrons.irp.f @@ -1,3 +1,25 @@ +integer function size_8(n) + implicit none + integer, intent(in) :: n + integer :: n8 + + n8 = ((n-1)/8+1) * 8 + if (popcnt(n8) == 1) then + ! Power of two, shift by 8 + n8 = n8 + 8 + endif + size_8 = n8 +end + +BEGIN_PROVIDER [ integer, nelec_8 ] + implicit none + integer, external :: size_8 + nelec_8 = size_8(nelec) +END_PROVIDER + + + + BEGIN_PROVIDER [ integer, nelec ] implicit none BEGIN_DOC diff --git a/nuclei.irp.f b/nuclei.irp.f index 25e16f4..4901eb0 100644 --- a/nuclei.irp.f +++ b/nuclei.irp.f @@ -8,6 +8,12 @@ BEGIN_PROVIDER [ integer, nnuc ] nnuc = nelec/5 END_PROVIDER +BEGIN_PROVIDER [ integer, nnuc_8 ] + implicit none + integer, external :: size_8 + nnuc_8 = size_8(nnuc) +END_PROVIDER + BEGIN_PROVIDER [ integer, typenuc ] &BEGIN_PROVIDER [integer, typenuc_arr, (nnuc)] diff --git a/rescale.irp.f b/rescale.irp.f index 2bd89ac..d9c8f6c 100644 --- a/rescale.irp.f +++ b/rescale.irp.f @@ -14,7 +14,7 @@ BEGIN_PROVIDER [ double precision, kappa_inv ] kappa_inv = 1.0d0 / kappa END_PROVIDER -BEGIN_PROVIDER [ double precision, rescale_ee, (nelec, nelec) ] +BEGIN_PROVIDER [ double precision, rescale_ee, (nelec_8, nelec) ] implicit none BEGIN_DOC ! R = (1 - exp(-kappa r))/kappa for electron-electron for $J_{ee}$ @@ -60,7 +60,7 @@ BEGIN_PROVIDER [ double precision, rescale_ee_deriv_e, (4, nelec, nelec) ] enddo END_PROVIDER -BEGIN_PROVIDER [ double precision, rescale_en, (nelec, nnuc) ] +BEGIN_PROVIDER [ double precision, rescale_en, (nelec_8, nnuc) ] implicit none BEGIN_DOC ! R = (1 - exp(-kappa r))/kappa for electron-nucleus for $J_{en}$ @@ -102,7 +102,7 @@ BEGIN_PROVIDER [ double precision, rescale_en_deriv_e, (4, nelec, nnuc) ] enddo END_PROVIDER -BEGIN_PROVIDER [double precision, rescale_een_e, (nelec, nelec, 0:ncord)] +BEGIN_PROVIDER [double precision, rescale_een_e, (nelec_8, nelec, 0:ncord)] implicit none BEGIN_DOC ! R = exp(-kappa r) for electron-electron for $J_{een}$ @@ -159,7 +159,7 @@ BEGIN_PROVIDER [double precision, rescale_een_e_ij, (nelec*(nelec-1)/2, 0:ncord) END_PROVIDER -BEGIN_PROVIDER [double precision, rescale_een_n, (nelec, nnuc, 0:ncord)] +BEGIN_PROVIDER [double precision, rescale_een_n, (nelec_8, nnuc, 0:ncord)] implicit none BEGIN_DOC ! R = exp(-kappa r) for electron-electron for $J_{een}$ @@ -186,7 +186,7 @@ BEGIN_PROVIDER [double precision, rescale_een_n, (nelec, nnuc, 0:ncord)] END_PROVIDER -BEGIN_PROVIDER [double precision, rescale_een_n_deriv_e, (nelec, 4, nnuc, 0:ncord)] +BEGIN_PROVIDER [double precision, rescale_een_n_deriv_e, (nelec_8, 4, nnuc, 0:ncord)] implicit none BEGIN_DOC ! Derivative of the scaled distance J_{een} wrt R_{ia} @@ -243,7 +243,7 @@ BEGIN_PROVIDER [double precision, elnuc_dist_deriv_e, (4, nelec, nnuc)] end do END_PROVIDER -BEGIN_PROVIDER [double precision, rescale_een_e_deriv_e, (nelec, 4, nelec, 0:ncord)] +BEGIN_PROVIDER [double precision, rescale_een_e_deriv_e, (nelec_8, 4, nelec, 0:ncord)] BEGIN_DOC ! Derivative of the scaled distance J_{een} wrt R_{ia} END_DOC @@ -279,7 +279,7 @@ BEGIN_PROVIDER [double precision, rescale_een_e_deriv_e, (nelec, 4, nelec, 0:nco enddo END_PROVIDER -BEGIN_PROVIDER [double precision, rescale_een_e_deriv_e_t, (nelec, 4, nelec, 0:ncord)] +BEGIN_PROVIDER [double precision, rescale_een_e_deriv_e_t, (nelec_8, 4, nelec, 0:ncord)] implicit none BEGIN_DOC ! Transposed rescale_een_e_deriv_e