diff --git a/config/ifort.cfg b/config/ifort.cfg index 866aae3d..714c4b10 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_avx.cfg b/config/ifort_avx.cfg index d689050e..a2cb4c8a 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_avx.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_avx_mpi.cfg b/config/ifort_avx_mpi.cfg index 9a839e66..f2bb8889 100644 --- a/config/ifort_avx_mpi.cfg +++ b/config/ifort_avx_mpi.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : mpiifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DMPI +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL # Global options ################ diff --git a/config/ifort_debug.cfg b/config/ifort_debug.cfg index f2fbf8ce..9b718380 100644 --- a/config/ifort_debug.cfg +++ b/config/ifort_debug.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL # Global options ################ diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg index 57087847..e0d489a0 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_mpi.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : mpiifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DMPI +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL # Global options ################ diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index a4bce680..5ed01227 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ @@ -31,8 +31,8 @@ OPENMP : 1 ; Append OpenMP flags # -ftz : Flushes denormal results to zero # [OPT] -FC : -traceback -FCFLAGS : -O2 -ip -g -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer +FC : -traceback -shared-intel +FCFLAGS : -O2 -ip -g -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer # Profiling flags ################# diff --git a/config/ifort_xHost.cfg b/config/ifort_xHost.cfg index 5d952e54..ddb4aa2d 100644 --- a/config/ifort_xHost.cfg +++ b/config/ifort_xHost.cfg @@ -6,10 +6,10 @@ # --align=32 : Align all provided arrays on a 32-byte boundary # [COMMON] -FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=64 +IRPF90_FLAGS : --ninja --align=64 -DINTEL # Global options ################ diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 6c8e5c83..48e3803e 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -96,8 +96,12 @@ end ! x=cos(theta) double precision function ylm_real(l,m,x,phi) - implicit double precision (a-h,o-z) - DIMENSION PM(0:100,0:100) + implicit none + integer :: MM, iabs_m, m, l + double precision :: pi, fourpi, factor, x, phi, coef + double precision :: xchap, ychap, zchap + double precision, external :: fact + double precision :: PM(0:100,0:100), plm MM=100 pi=dacos(-1.d0) fourpi=4.d0*pi @@ -1150,8 +1154,10 @@ end ! Output: PM(m,n) --- Pmn(x) ! ===================================================== ! - IMPLICIT DOUBLE PRECISION (P,X) - DIMENSION PM(0:MM,0:(N+1)) + implicit none +! IMPLICIT DOUBLE PRECISION (P,X) + integer :: MM, N, I, J, M + double precision :: PM(0:MM,0:(N+1)), X, XQ, XS DOUBLE PRECISION, SAVE :: INVERSE(100) = 0.D0 DOUBLE PRECISION :: LS, II, JJ IF (INVERSE(1) == 0.d0) THEN @@ -1202,8 +1208,9 @@ end ! P_l^|m|(cos(theta)) exp(i m phi) subroutine erreur(x,n,rmoy,error) - implicit double precision(a-h,o-z) - dimension x(n) + implicit none + integer :: i, n + double precision :: x(n), rn, rn1, error, rmoy ! calcul de la moyenne rmoy=0.d0 do i=1,n diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 11492652..eda9642c 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -253,12 +253,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d deallocate(exc_degree) nmax=k-1 - allocate(iorder(nmax)) - do i=1,nmax - iorder(i) = i - enddo - call isort(indices,iorder,nmax) - deallocate(iorder) + call isort_noidx(indices,nmax) ! Start with 32 elements. Size will double along with the filtering. allocate(preinteresting(0:32), prefullinteresting(0:32), & diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 265d2384..c1b0b228 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1,3 +1,9 @@ + real*8 function lgamma(x) + implicit none + real*8, intent(in) :: x + lgamma = log(abs(gamma(x))) + end function lgamma + BEGIN_PROVIDER [ integer, NSOMOMax] &BEGIN_PROVIDER [ integer, NCSFMax] &BEGIN_PROVIDER [ integer*8, NMO] @@ -22,33 +28,65 @@ integer NSOMO integer dimcsfpercfg integer detDimperBF - real*8 :: coeff + real*8 :: coeff, binom1, binom2 integer MS integer ncfgpersomo + real*8, external :: lgamma detDimperBF = 0 MS = elec_alpha_num-elec_beta_num ! number of cfgs = number of dets for 0 somos - n_CSF = cfg_seniority_index(0)-1 + n_CSF = 0 ncfgprev = cfg_seniority_index(0) - do i = 0-iand(MS,1)+2, NSOMOMax,2 - if(cfg_seniority_index(i) .EQ. -1)then - ncfgpersomo = N_configuration + 1 - else - ncfgpersomo = cfg_seniority_index(i) - endif - ncfg = ncfgpersomo - ncfgprev - !detDimperBF = max(1,nint((binom(i,(i+1)/2)))) - if (i > 2) then - dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) - else - dimcsfpercfg = 1 - endif - n_CSF += ncfg * dimcsfpercfg - !if(cfg_seniority_index(i+2) == -1) EXIT - !if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF - ncfgprev = cfg_seniority_index(i) + ncfgpersomo = ncfgprev + do i = iand(MS,1), NSOMOMax-2,2 + if(cfg_seniority_index(i) .EQ. -1) then + cycle + endif + if(cfg_seniority_index(i+2) .EQ. -1) then + ncfgpersomo = N_configuration + 1 + else + if(cfg_seniority_index(i+2) > ncfgpersomo) then + ncfgpersomo = cfg_seniority_index(i+2) + else + k = 0 + do while(cfg_seniority_index(i+2+k) < ncfgpersomo) + k = k + 2 + ncfgpersomo = cfg_seniority_index(i+2+k) + enddo + endif + endif + ncfg = ncfgpersomo - ncfgprev + if(iand(MS,1) .EQ. 0) then + !dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) + binom1 = dexp(lgamma(1.0d0*(i+1)) & + - lgamma(1.0d0*((i/2)+1)) & + - lgamma(1.0d0*(i-((i/2))+1))); + binom2 = dexp(lgamma(1.0d0*(i+1)) & + - lgamma(1.0d0*(((i/2)+1)+1)) & + - lgamma(1.0d0*(i-((i/2)+1)+1))); + dimcsfpercfg = max(1,nint(binom1 - binom2)) + else + !dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) + binom1 = dexp(lgamma(1.0d0*(i+1)) & + - lgamma(1.0d0*(((i+1)/2)+1)) & + - lgamma(1.0d0*(i-(((i+1)/2))+1))); + binom2 = dexp(lgamma(1.0d0*(i+1)) & + - lgamma(1.0d0*((((i+3)/2)+1)+1)) & + - lgamma(1.0d0*(i-(((i+3)/2)+1)+1))); + dimcsfpercfg = max(1,nint(binom1 - binom2)) + endif + n_CSF += ncfg * dimcsfpercfg + if(cfg_seniority_index(i+2) > ncfgprev) then + ncfgprev = cfg_seniority_index(i+2) + else + k = 0 + do while(cfg_seniority_index(i+2+k) < ncfgprev) + k = k + 2 + ncfgprev = cfg_seniority_index(i+2+k) + enddo + endif enddo - END_PROVIDER +END_PROVIDER subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 2a83cc28..da23b919 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -197,6 +197,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call write_int(6,N_st,'Number of states') call write_int(6,N_st_diag,'Number of states in diagonalization') call write_int(6,sze,'Number of determinants') + call write_int(6,sze_csf,'Number of CSFs') call write_int(6,nproc_target,'Number of threads for diagonalization') call write_double(6, r1, 'Memory(Gb)') if (disk_based) then diff --git a/src/utils/intel.f90 b/src/utils/intel.f90 new file mode 100644 index 00000000..2b61cb38 --- /dev/null +++ b/src/utils/intel.f90 @@ -0,0 +1,173 @@ +module intel + use, intrinsic :: iso_c_binding + interface + subroutine ippsSortAscend_32s_I(pSrc, len) bind(C, name='ippsSortAscend_32s_I') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + end + end interface + interface + subroutine ippsSortAscend_32f_I(pSrc, len) bind(C, name='ippsSortAscend_32f_I') + use iso_c_binding + integer, intent(in), value :: len + real, intent(inout) :: pSrc(len) + end + end interface + interface + subroutine ippsSortAscend_64s_I(pSrc, len) bind(C, name='ippsSortAscend_64s_I') + use iso_c_binding + integer, intent(in), value :: len + integer*8, intent(inout) :: pSrc(len) + end + end interface + interface + subroutine ippsSortAscend_64f_I(pSrc, len) bind(C, name='ippsSortAscend_64f_I') + use iso_c_binding + integer, intent(in), value :: len + double precision, intent(inout) :: pSrc(len) + end + end interface + + interface + subroutine ippsSortRadixIndexGetBufferSize(len, dataType, pBufSize) bind(C, name='ippsSortRadixIndexGetBufferSize') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(in), value :: dataType + integer, intent(out) :: pBufSize + end + end interface + + interface + subroutine ippsSortRadixAscend_16s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_16s_I') + use iso_c_binding + integer, intent(in), value :: len + integer*2, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_32s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32s_I') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_32f_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32f_I') + use iso_c_binding + integer, intent(in), value :: len + real, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_64s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_64s_I') + use iso_c_binding + integer, intent(in), value :: len + integer*8, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_64f_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_64f_I') + use iso_c_binding + integer, intent(in), value :: len + double precision, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + + interface + subroutine ippsSortRadixIndexAscend_16s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_16s') + use iso_c_binding + integer, intent(in), value :: len + integer*2, intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_32s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_32s') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_32f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_32f') + use iso_c_binding + integer, intent(in), value :: len + real , intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_64s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_64s') + use iso_c_binding + integer, intent(in), value :: len + integer*8, intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_64f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_64f') + use iso_c_binding + integer, intent(in), value :: len + real*8 , intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + + interface + subroutine ippsSortIndexAscend_32f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32f_I') + use iso_c_binding + real(4), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_32s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32s_I') + use iso_c_binding + integer(4), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_64f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64f_I') + use iso_c_binding + real(8), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_64s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64s_I') + use iso_c_binding + integer(8), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_16s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_16s_I') + use iso_c_binding + integer(2), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface +end module diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index 2a655eed..a63eb4a3 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -38,15 +38,7 @@ BEGIN_TEMPLATE $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) integer, external :: omp_get_num_threads - if (omp_get_num_threads() == 1) then - !$OMP PARALLEL DEFAULT(SHARED) - !$OMP SINGLE - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - !$OMP END SINGLE - !$OMP END PARALLEL - else - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - endif + call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) end recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) @@ -57,7 +49,7 @@ BEGIN_TEMPLATE $type :: c, tmp integer :: itmp integer :: i, j - + if(isize<2)return c = x( shiftr(first+last,1) ) @@ -89,16 +81,11 @@ BEGIN_TEMPLATE endif else if (first < i-1) then - !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(isize,first,i,level) call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - !$OMP END TASK endif if (j+1 < last) then - !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(isize,last,j,level) call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - !$OMP END TASK endif - !$OMP TASKWAIT endif end @@ -262,7 +249,60 @@ SUBST [ X, type ] i2 ; integer*2 ;; END_TEMPLATE + +!---------------------- INTEL +IRP_IF INTEL + BEGIN_TEMPLATE + subroutine $Xsort(x,iorder,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + character, allocatable :: tmp(:) + if (isize < 2) return + call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n) + allocate(tmp(n)) + call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp) + deallocate(tmp) + iorder(1:isize) = iorder(1:isize)+1 + call $Xset_order(x,iorder,isize) + end + + subroutine $Xsort_noidx(x,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer :: n + character, allocatable :: tmp(:) + if (isize < 2) return + call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n) + allocate(tmp(n)) + call ippsSortRadixAscend_$ityp_I(x, isize, tmp) + deallocate(tmp) + end + +SUBST [ X, type, ityp, n, ippsz ] + ; real ; 32f ; 4 ; 13 ;; + i ; integer ; 32s ; 4 ; 11 ;; + i2 ; integer*2 ; 16s ; 2 ; 7 ;; +END_TEMPLATE + +BEGIN_TEMPLATE + subroutine $Xsort(x,iorder,isize) implicit none BEGIN_DOC @@ -289,12 +329,12 @@ BEGIN_TEMPLATE endif end subroutine $Xsort -SUBST [ X, type, Y ] - ; real ; i ;; - d ; double precision ; i8 ;; +SUBST [ X, type ] + d ; double precision ;; END_TEMPLATE BEGIN_TEMPLATE + subroutine $Xsort(x,iorder,isize) implicit none BEGIN_DOC @@ -306,8 +346,112 @@ BEGIN_TEMPLATE $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) integer :: n -! call $Xradix_sort(x,iorder,isize,-1) - call quick_$Xsort(x,iorder,isize) + if (isize < 2) then + return + endif + call sorted_$Xnumber(x,isize,n) + if (isize == n) then + return + endif + if ( isize < 32) then + call insertion_$Xsort(x,iorder,isize) + else + call $Xradix_sort(x,iorder,isize,-1) + endif + end subroutine $Xsort + +SUBST [ X, type ] + i8 ; integer*8 ;; +END_TEMPLATE + +!---------------------- END INTEL +IRP_ELSE +!---------------------- NON-INTEL +BEGIN_TEMPLATE + + subroutine $Xsort_noidx(x,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer, allocatable :: iorder(:) + integer :: i + allocate(iorder(isize)) + do i=1,isize + iorder(i)=i + enddo + call $Xsort(x,iorder,isize) + deallocate(iorder) + end subroutine $Xsort_noidx + +SUBST [ X, type ] + ; real ;; + d ; double precision ;; + i ; integer ;; + i8 ; integer*8 ;; + i2 ; integer*2 ;; +END_TEMPLATE + +BEGIN_TEMPLATE + + subroutine $Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + if (isize < 2) then + return + endif +! call sorted_$Xnumber(x,isize,n) +! if (isize == n) then +! return +! endif + if ( isize < 32) then + call insertion_$Xsort(x,iorder,isize) + else +! call heap_$Xsort(x,iorder,isize) + call quick_$Xsort(x,iorder,isize) + endif + end subroutine $Xsort + +SUBST [ X, type ] + ; real ;; + d ; double precision ;; +END_TEMPLATE + +BEGIN_TEMPLATE + + subroutine $Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + if (isize < 2) then + return + endif + call sorted_$Xnumber(x,isize,n) + if (isize == n) then + return + endif + if ( isize < 32) then + call insertion_$Xsort(x,iorder,isize) + else + call $Xradix_sort(x,iorder,isize,-1) + endif end subroutine $Xsort SUBST [ X, type ] @@ -316,6 +460,11 @@ SUBST [ X, type ] i2 ; integer*2 ;; END_TEMPLATE +IRP_ENDIF +!---------------------- END NON-INTEL + + + BEGIN_TEMPLATE subroutine $Xset_order(x,iorder,isize) implicit none @@ -413,10 +562,12 @@ SUBST [ X, type ] i2; integer*2 ;; END_TEMPLATE + BEGIN_TEMPLATE - recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) +recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) implicit none + BEGIN_DOC ! Sort integer array x(isize) using the radix sort algorithm. ! iorder in input should be (1,2,3,...,isize), and in output @@ -552,24 +703,14 @@ BEGIN_TEMPLATE endif -! !$OMP PARALLEL DEFAULT(SHARED) if (isize > 1000000) -! !$OMP SINGLE if (i3>1_$int_type) then -! !$OMP TASK FIRSTPRIVATE(iradix_new,i3) SHARED(x,iorder) if(i3 > 1000000) call $Xradix_sort$big(x,iorder,i3,iradix_new-1) -! !$OMP END TASK endif if (isize-i3>1_$int_type) then -! !$OMP TASK FIRSTPRIVATE(iradix_new,i3) SHARED(x,iorder) if(isize-i3 > 1000000) call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) -! !$OMP END TASK endif -! !$OMP TASKWAIT -! !$OMP END SINGLE -! !$OMP END PARALLEL - return endif @@ -624,16 +765,11 @@ BEGIN_TEMPLATE if (i1>1_$int_type) then - !$OMP TASK FIRSTPRIVATE(i0,iradix,i1) SHARED(x,iorder) if(i1 >1000000) call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) - !$OMP END TASK endif if (i0>1) then - !$OMP TASK FIRSTPRIVATE(i0,iradix) SHARED(x,iorder) if(i0 >1000000) call $Xradix_sort$big(x,iorder,i0,iradix-1) - !$OMP END TASK endif - !$OMP TASKWAIT end @@ -646,3 +782,4 @@ SUBST [ X, type, integer_size, is_big, big, int_type ] END_TEMPLATE +