Using Intel IPP for sorting

This commit is contained in:
Anthony Scemama 2021-05-31 01:48:34 +02:00
parent 9a3bd2b278
commit 32e2afca90
13 changed files with 297 additions and 87 deletions

View File

@ -7,9 +7,9 @@
#
[COMMON]
FC : ifort -fpic
LAPACK_LIB : -mkl=parallel
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32
IRPF90_FLAGS : --ninja --align=32 -DINTEL
# Global options
################

View File

@ -7,9 +7,9 @@
#
[COMMON]
FC : ifort -fpic
LAPACK_LIB : -mkl=parallel
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32
IRPF90_FLAGS : --ninja --align=32 -DINTEL
# Global options
################

View File

@ -7,9 +7,9 @@
#
[COMMON]
FC : mpiifort -fpic
LAPACK_LIB : -mkl=parallel
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32 -DMPI
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL
# Global options
################

View File

@ -7,9 +7,9 @@
#
[COMMON]
FC : ifort -fpic
LAPACK_LIB : -mkl=parallel
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32 --assert
IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL
# Global options
################

View File

@ -7,9 +7,9 @@
#
[COMMON]
FC : mpiifort -fpic
LAPACK_LIB : -mkl=parallel
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32 -DMPI
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL
# Global options
################

View File

@ -7,9 +7,9 @@
#
[COMMON]
FC : ifort -fpic
LAPACK_LIB : -mkl=parallel
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32
IRPF90_FLAGS : --ninja --align=32 -DINTEL
# Global options
################

View File

@ -6,10 +6,10 @@
# --align=32 : Align all provided arrays on a 32-byte boundary
#
[COMMON]
FC : ifort -fpic
LAPACK_LIB : -mkl=parallel
FC : ifort -fpic
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=64
IRPF90_FLAGS : --ninja --align=64 -DINTEL
# Global options
################

View File

@ -96,8 +96,12 @@ end
! x=cos(theta)
double precision function ylm_real(l,m,x,phi)
implicit double precision (a-h,o-z)
DIMENSION PM(0:100,0:100)
implicit none
integer :: MM, iabs_m, m, l
double precision :: pi, fourpi, factor, x, phi, coef
double precision :: xchap, ychap, zchap
double precision, external :: fact
double precision :: PM(0:100,0:100), plm
MM=100
pi=dacos(-1.d0)
fourpi=4.d0*pi
@ -1150,8 +1154,10 @@ end
! Output: PM(m,n) --- Pmn(x)
! =====================================================
!
IMPLICIT DOUBLE PRECISION (P,X)
DIMENSION PM(0:MM,0:(N+1))
implicit none
! IMPLICIT DOUBLE PRECISION (P,X)
integer :: MM, N, I, J, M
double precision :: PM(0:MM,0:(N+1)), X, XQ, XS
DOUBLE PRECISION, SAVE :: INVERSE(100) = 0.D0
DOUBLE PRECISION :: LS, II, JJ
IF (INVERSE(1) == 0.d0) THEN
@ -1202,8 +1208,9 @@ end
! P_l^|m|(cos(theta)) exp(i m phi)
subroutine erreur(x,n,rmoy,error)
implicit double precision(a-h,o-z)
dimension x(n)
implicit none
integer :: i, n
double precision :: x(n), rn, rn1, error, rmoy
! calcul de la moyenne
rmoy=0.d0
do i=1,n

View File

@ -253,12 +253,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
deallocate(exc_degree)
nmax=k-1
allocate(iorder(nmax))
do i=1,nmax
iorder(i) = i
enddo
call isort(indices,iorder,nmax)
deallocate(iorder)
call isort_noidx(indices,nmax)
! Start with 32 elements. Size will double along with the filtering.
allocate(preinteresting(0:32), prefullinteresting(0:32), &

View File

@ -1,54 +1,62 @@
BEGIN_PROVIDER [ integer, NSOMOMax]
&BEGIN_PROVIDER [ integer, NCSFMax]
&BEGIN_PROVIDER [ integer*8, NMO]
&BEGIN_PROVIDER [ integer, NBFMax]
&BEGIN_PROVIDER [ integer, n_CSF]
&BEGIN_PROVIDER [ integer, maxDetDimPerBF]
implicit none
BEGIN_DOC
! Documentation for NSOMOMax
! The maximum number of SOMOs for the current calculation.
! required for the calculation of prototype arrays.
END_DOC
NSOMOMax = min(elec_num, cfg_nsomo_max + 2)
! Note that here we need NSOMOMax + 2 sizes
NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0
NBFMax = NCSFMax
maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2))))
NMO = n_act_orb
integer i,j,k,l
integer startdet,enddet
integer ncfg,ncfgprev
integer NSOMO
integer dimcsfpercfg
integer detDimperBF
real*8 :: coeff
integer MS
integer ncfgpersomo
detDimperBF = 0
MS = elec_alpha_num-elec_beta_num
! number of cfgs = number of dets for 0 somos
n_CSF = cfg_seniority_index(0)-1
ncfgprev = cfg_seniority_index(0)
do i = 0-iand(MS,1)+2, NSOMOMax,2
if(cfg_seniority_index(i) .EQ. -1)then
ncfgpersomo = N_configuration + 1
BEGIN_PROVIDER [ integer, NSOMOMax]
&BEGIN_PROVIDER [ integer, NCSFMax]
&BEGIN_PROVIDER [ integer*8, NMO]
&BEGIN_PROVIDER [ integer, NBFMax]
&BEGIN_PROVIDER [ integer, n_CSF]
&BEGIN_PROVIDER [ integer, maxDetDimPerBF]
implicit none
BEGIN_DOC
! The maximum number of SOMOs for the current calculation.
! required for the calculation of prototype arrays.
END_DOC
integer :: i,j,k,l
integer :: startdet,enddet
integer :: ncfg,ncfgprev
integer :: NSOMO
integer :: dimcsfpercfg
integer :: detDimperBF
real*8 :: coeff
integer :: MS
integer :: ncfgpersomo
double precision :: bin_1, bin_2
NSOMOMax = min(elec_num, cfg_nsomo_max + 2)
bin_1 = binom(NSOMOMax, (NSOMOMax+1)/2)
bin_2 = binom(NSOMOMax,((NSOMOMax+1)/2)+1)
! Note that here we need NSOMOMax + 2 sizes
NCSFMax = max(1,int(bin_1-bin_2))
NBFMax = NCSFMax
maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2))))
NMO = n_act_orb
detDimperBF = 0
MS = elec_alpha_num-elec_beta_num
! number of cfgs = number of dets for 0 somos
n_CSF = cfg_seniority_index(0)-1
ncfgprev = cfg_seniority_index(0)
do i = 0-iand(MS,1)+2, cfg_nsomo_max,2
if(cfg_seniority_index(i) == -1)then
ncfgpersomo = N_configuration + 1
else
ncfgpersomo = cfg_seniority_index(i)
ncfgpersomo = cfg_seniority_index(i)
endif
ncfg = ncfgpersomo - ncfgprev
!detDimperBF = max(1,nint((binom(i,(i+1)/2))))
if (i > 2) then
dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1))))
else
dimcsfpercfg = 1
endif
n_CSF += ncfg * dimcsfpercfg
!if(cfg_seniority_index(i+2) == -1) EXIT
!if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF
ncfgprev = cfg_seniority_index(i)
enddo
END_PROVIDER
ncfg = ncfgpersomo - ncfgprev
if (i > 2) then
bin_1 = binom(i-2, (i-2+1)/2)
bin_2 = binom(i-2,((i-2+1)/2)+1)
dimcsfpercfg = max(1,int(bin_1-bin_2))
else
dimcsfpercfg = 1
endif
n_CSF += ncfg * dimcsfpercfg
ncfgprev = cfg_seniority_index(i)
enddo
END_PROVIDER
subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout)

View File

@ -197,6 +197,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
call write_int(6,N_st,'Number of states')
call write_int(6,N_st_diag,'Number of states in diagonalization')
call write_int(6,sze,'Number of determinants')
call write_int(6,sze_csf,'Number of CSFs')
call write_int(6,nproc_target,'Number of threads for diagonalization')
call write_double(6, r1, 'Memory(Gb)')
if (disk_based) then

70
src/utils/intel.f90 Normal file
View File

@ -0,0 +1,70 @@
module intel
use, intrinsic :: iso_c_binding
interface
subroutine ippsSortRadixIndexGetBufferSize(len, dataType, pBufSize) bind(C, name='ippsSortRadixIndexGetBufferSize')
use iso_c_binding
integer, intent(in), value :: len
integer, intent(in), value :: dataType
integer, intent(out) :: pBufSize
end
end interface
interface
subroutine ippsSortAscend_32s_I(pSrc, len) bind(C, name='ippsSortAscend_32s_I')
use iso_c_binding
integer, intent(in), value :: len
integer, intent(inout) :: pSrc(len)
end
end interface
interface
subroutine ippsSortRadixAscend_32s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32s_I')
use iso_c_binding
integer, intent(in), value :: len
integer, intent(inout) :: pSrc(len)
integer, intent(inout) :: pTmp(len)
end
end interface
interface
subroutine ippsSortRadixIndexAscend_32s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_32s')
use iso_c_binding
integer, intent(in), value :: len
integer, intent(inout) :: pSrc(len)
integer, intent(in), value :: srcStrideBytes
integer, intent(inout) :: pDstIndx(len)
integer, intent(inout) :: pTmpIndx(len)
end
end interface
interface
subroutine ippsSortRadixIndexAscend_32f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_32f')
use iso_c_binding
integer, intent(in), value :: len
real , intent(inout) :: pSrc(len)
integer, intent(in), value :: srcStrideBytes
integer, intent(inout) :: pDstIndx(len)
integer, intent(inout) :: pTmpIndx(len)
end
end interface
interface
subroutine ippsSortIndexAscend_32f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32f_I')
use iso_c_binding
real(4), intent(in) :: pSrcDst(*)
integer(4), intent(inout) :: pDstIndx(*)
integer(4), intent(in), value :: len
end
end interface
interface
subroutine ippsSortIndexAscend_32s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32s_I')
use iso_c_binding
integer(4), intent(in) :: pSrcDst(*)
integer(4), intent(inout) :: pDstIndx(*)
integer(4), intent(in), value :: len
end
end interface
interface
subroutine ippsSortIndexAscend_64f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64f_I')
use iso_c_binding
real(8), intent(in) :: pSrcDst(*)
integer(4), intent(inout) :: pDstIndx(*)
integer(4), intent(in), value :: len
end
end interface
end module

View File

@ -57,7 +57,7 @@ BEGIN_TEMPLATE
$type :: c, tmp
integer :: itmp
integer :: i, j
if(isize<2)return
c = x( shiftr(first+last,1) )
@ -262,6 +262,104 @@ SUBST [ X, type ]
i2 ; integer*2 ;;
END_TEMPLATE
IRP_IF INTEL
subroutine sort(x,iorder,isize)
use intel
implicit none
BEGIN_DOC
! Sort array x(isize).
! iorder in input should be (1,2,3,...,isize), and in output
! contains the new order of the elements.
END_DOC
integer,intent(in) :: isize
real ,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize)
integer :: n
call ippsSortIndexAscend_32f_I(x, iorder, isize)
iorder(:) = iorder(:)+1
end subroutine sort
subroutine dsort(x,iorder,isize)
use intel
implicit none
BEGIN_DOC
! Sort array x(isize).
! iorder in input should be (1,2,3,...,isize), and in output
! contains the new order of the elements.
END_DOC
integer,intent(in) :: isize
real(8) ,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize)
integer :: n
call ippsSortIndexAscend_64f_I(x, iorder, isize)
iorder(:) = iorder(:)+1
end subroutine dsort
subroutine isort(x,iorder,isize)
use intel
implicit none
BEGIN_DOC
! Sort array x(isize).
! iorder in input should be (1,2,3,...,isize), and in output
! contains the new order of the elements.
END_DOC
integer,intent(in) :: isize
integer ,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize)
integer :: n
integer, allocatable :: iorder1(:)
allocate(iorder1(isize*2))
n=4
call ippsSortRadixIndexAscend_32s(x, n, iorder, isize, iorder1)
iorder(1:isize) = iorder(1:isize)+1
deallocate(iorder1)
call iset_order(x,iorder,isize)
end subroutine isort
subroutine isort_noidx(x,isize)
use intel
implicit none
BEGIN_DOC
! Sort array x(isize).
! iorder in input should be (1,2,3,...,isize), and in output
! contains the new order of the elements.
END_DOC
integer,intent(in) :: isize
integer ,intent(inout) :: x(isize)
integer, allocatable :: iorder1(:)
integer :: n
call ippsSortRadixIndexGetBufferSize(isize, 11, n)
n = n/4
allocate(iorder1(n))
call ippsSortRadixAscend_32s_I(x, isize, iorder1)
deallocate(iorder1)
end subroutine isort_noidx
BEGIN_TEMPLATE
subroutine $Xsort(x,iorder,isize)
implicit none
BEGIN_DOC
! Sort array x(isize).
! iorder in input should be (1,2,3,...,isize), and in output
! contains the new order of the elements.
END_DOC
integer,intent(in) :: isize
$type,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize)
integer :: n
! call $Xradix_sort(x,iorder,isize,-1)
call quick_$Xsort(x,iorder,isize)
end subroutine $Xsort
SUBST [ X, type ]
i8 ; integer*8 ;;
i2 ; integer*2 ;;
END_TEMPLATE
IRP_ELSE
BEGIN_TEMPLATE
subroutine $Xsort(x,iorder,isize)
implicit none
@ -289,9 +387,9 @@ BEGIN_TEMPLATE
endif
end subroutine $Xsort
SUBST [ X, type, Y ]
; real ; i ;;
d ; double precision ; i8 ;;
SUBST [ X, type ]
; real ;;
d ; double precision ;;
END_TEMPLATE
BEGIN_TEMPLATE
@ -316,6 +414,22 @@ SUBST [ X, type ]
i2 ; integer*2 ;;
END_TEMPLATE
subroutine isort_noidx(x,isize)
implicit none
BEGIN_DOC
! Sort array x(isize).
END_DOC
integer,intent(in) :: isize
$type,intent(inout) :: x(isize)
integer, allocatable :: iorder
allocate(iorder)
iorder=0
call $Xradix_sort(x,iorder,isize,-1)
deallocate(iorder)
end subroutine $Xsort
IRP_ENDIF
BEGIN_TEMPLATE
subroutine $Xset_order(x,iorder,isize)
implicit none
@ -413,10 +527,15 @@ SUBST [ X, type ]
i2; integer*2 ;;
END_TEMPLATE
BEGIN_TEMPLATE
recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
IRP_IF INTEL
use intel
IRP_ENDIF
implicit none
BEGIN_DOC
! Sort integer array x(isize) using the radix sort algorithm.
! iorder in input should be (1,2,3,...,isize), and in output
@ -448,6 +567,15 @@ BEGIN_TEMPLATE
stop
endif
IRP_IF INTEL
if ( ($type == 4).and.($integer_size == 32).and.($is_big == .False.) ) then
$intel
iorder(:) = iorder(:)+1
return
endif
IRP_ENDIF
i1=1_$int_type
i2=1_$int_type
do i=1_$int_type,isize
@ -637,12 +765,13 @@ BEGIN_TEMPLATE
end
SUBST [ X, type, integer_size, is_big, big, int_type ]
i ; 4 ; 32 ; .False. ; ; 4 ;;
i8 ; 8 ; 64 ; .False. ; ; 4 ;;
i2 ; 2 ; 16 ; .False. ; ; 4 ;;
i ; 4 ; 32 ; .True. ; _big ; 8 ;;
i8 ; 8 ; 64 ; .True. ; _big ; 8 ;;
SUBST [ X, type, integer_size, is_big, big, int_type, intel ]
i ; 4 ; 32 ; .False. ; ; 4 ; call ippsSortRadixIndexAscend_32s(x, 4, iorder, isize, iorder1) ;;
i8 ; 8 ; 64 ; .False. ; ; 4 ; ;;
i2 ; 2 ; 16 ; .False. ; ; 4 ; ;;
i ; 4 ; 32 ; .True. ; _big ; 8 ; ;;
i8 ; 8 ; 64 ; .True. ; _big ; 8 ; ;;
END_TEMPLATE