Merge branch 'dev' into fix_ncsf

2024-12-22 20:34:58 +01:00 · 2021-05-31 13:22:28 +02:00 · 2021-05-31 13:22:28 +02:00 · 3d03161a78
commit 3d03161a78
parent 94a069c2d5 d45f6091da
24 changed files with 496 additions and 85 deletions
--- a/RELEASE_NOTES.org
+++ b/RELEASE_NOTES.org
@ -29,7 +29,7 @@
  - Disk-based Davidson when too much memory is required
  - Fixed bug in DIIS
  - Fixed bug in molden (Au -> Angs)
-    
+
 *** User interface

    - Added ~qp_basis~ script to install a basis set from the ~bse~
@ -38,7 +38,7 @@
 ~psi_coef_qp_edit~ to accelerate the opening of qp_edit with
      large wave functions
    - Removed ~etc/ninja.rc~
-    - Added flag to specify if the AOs are normalized 
+    - Added flag to specify if the AOs are normalized
    - Added flag to specify if the primitive Gaussians are normalized
    - Added ~lin_dep_cutoff~, the cutoff for linear dependencies
    - Davidson convergence threshold can be adapted from PT2
@ -51,7 +51,9 @@
    - Added ~print_energy~
    - Added ~print_hamiltonian~
    - Added input for two body RDM
-    - Added keyword ~save_wf_after_selection~ 
+    - Added keyword ~save_wf_after_selection~
+    - Added a ~restore_symm~ flag to enforce the restoration of
+      symmetry in matrices

 *** Code

@ -75,11 +77,11 @@
    - Added ~V_ne_psi_energy~
    - Added ~h_core_guess~ routine
    - Fixed Laplacians in real space (indices)
-    - 
+    - Added LIB file to add extra libs in plugin

    ao_one_e_integral_zero
    banned_excitations
-      


-    
+
+
--- a/bin/qp_export_as_tgz
+++ b/bin/qp_export_as_tgz
@ -99,7 +99,9 @@ function find_libs () {
 }

 function find_exec () {
-  find ${QP_ROOT}/$1 -perm /u+x -type f
+  for i in $@ ; do
+     find ${QP_ROOT}/$i -perm /u+x -type f
+  done
 }


@ -119,7 +121,7 @@ fi
 echo "Copying binary files"
 #     --------------------

-FORTRAN_EXEC=$(find_exec src)
+FORTRAN_EXEC=$(find_exec src/*/)
 if [[ -z $FORTRAN_EXEC ]] ; then
  error 'No Fortran binaries found.'
  exit 1
--- a/config/ifort.cfg
+++ b/config/ifort.cfg
@ -7,9 +7,9 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=32 
+IRPF90_FLAGS : --ninja --align=32  -DINTEL

 # Global options
 ################
--- a/config/ifort_avx.cfg
+++ b/config/ifort_avx.cfg
@ -7,9 +7,9 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=32
+IRPF90_FLAGS : --ninja --align=32 -DINTEL

 # Global options
 ################
--- a/config/ifort_avx_mpi.cfg
+++ b/config/ifort_avx_mpi.cfg
@ -7,9 +7,9 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=32 -DMPI
+IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL

 # Global options
 ################
--- a/config/ifort_debug.cfg
+++ b/config/ifort_debug.cfg
@ -7,9 +7,9 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=32  --assert
+IRPF90_FLAGS : --ninja --align=32  --assert -DINTEL

 # Global options
 ################
--- a/config/ifort_mpi.cfg
+++ b/config/ifort_mpi.cfg
@ -7,9 +7,9 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=32 -DMPI
+IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL

 # Global options
 ################
--- a/config/ifort_rome.cfg
+++ b/config/ifort_rome.cfg
@ -7,9 +7,9 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=32 
+IRPF90_FLAGS : --ninja --align=32  -DINTEL

 # Global options
 ################
@ -31,8 +31,8 @@ OPENMP  : 1          ; Append OpenMP flags
 # -ftz                       : Flushes denormal results to zero
 #
 [OPT]
-FC       : -traceback
-FCFLAGS  : -O2 -ip -g  -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer
+FC       : -traceback -shared-intel
+FCFLAGS  : -O2 -ip -g  -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer 

 # Profiling flags
 #################
--- a/config/ifort_xHost.cfg
+++ b/config/ifort_xHost.cfg
@ -6,10 +6,10 @@
 # --align=32                 : Align all provided arrays on a 32-byte boundary
 #
 [COMMON]
-FC           : ifort -fpic 
-LAPACK_LIB   : -mkl=parallel
+FC           : ifort -fpic
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
 IRPF90       : irpf90
-IRPF90_FLAGS : --ninja --align=64
+IRPF90_FLAGS : --ninja --align=64 -DINTEL

 # Global options
 ################
--- a/etc/local.rc
+++ b/etc/local.rc
@ -19,4 +19,3 @@
 # export QP_NIC=lo
 # export QP_NIC=ib0

-
--- a/external/irpf90
+++ b/external/irpf90
@ -1 +1 @@
-Subproject commit 132a4a1661c9878d21dcbf0ac14f7fe9a3b110d0
+Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271
--- a/scripts/compilation/qp_create_ninja
+++ b/scripts/compilation/qp_create_ninja
@ -108,6 +108,17 @@ def ninja_create_env_variable(pwd_config_file):
    lib_usr = get_compilation_option(pwd_config_file, "LIB")

    str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr])
+
+    # Read all LIB files in modules
+    libfile = "LIB"
+    try:
+        content = ""
+        with open(libfile,'r') as f:
+            content = f.read()
+            str_lib += " "+content
+    except IOError:
+        pass
+
    l_string.append("LIB = {0} ".format(str_lib))

    l_string.append("")
--- a/scripts/ezfio_interface/ezfio_generate_provider.py
+++ b/scripts/ezfio_interface/ezfio_generate_provider.py
@ -82,6 +82,8 @@ END_PROVIDER
    mpi_correspondance   = {"integer": "MPI_INTEGER",
                            "integer*8": "MPI_INTEGER8",
                            "character*(32)": "MPI_CHARACTER",
+                            "character*(64)": "MPI_CHARACTER",
+                            "character*(256)": "MPI_CHARACTER",
                            "logical": "MPI_LOGICAL",
                            "double precision": "MPI_DOUBLE_PRECISION"}

--- a/src/ao_one_e_ints/pseudopot.f90
+++ b/src/ao_one_e_ints/pseudopot.f90
@ -96,8 +96,12 @@ end
 ! x=cos(theta)

        double precision function ylm_real(l,m,x,phi)
-        implicit double precision (a-h,o-z)
-        DIMENSION PM(0:100,0:100)
+        implicit none
+        integer :: MM, iabs_m, m, l
+        double precision :: pi, fourpi, factor, x, phi, coef
+        double precision :: xchap, ychap, zchap
+        double precision, external :: fact
+        double precision :: PM(0:100,0:100), plm
        MM=100
        pi=dacos(-1.d0)
        fourpi=4.d0*pi
@ -1150,8 +1154,10 @@ end
 !       Output:  PM(m,n) --- Pmn(x)
 !       =====================================================
 !
-        IMPLICIT DOUBLE PRECISION (P,X)
-        DIMENSION PM(0:MM,0:(N+1))
+        implicit none
+!        IMPLICIT DOUBLE PRECISION (P,X)
+        integer :: MM, N, I, J, M
+        double precision :: PM(0:MM,0:(N+1)), X, XQ, XS
        DOUBLE PRECISION, SAVE ::  INVERSE(100) = 0.D0
        DOUBLE PRECISION       :: LS, II, JJ
        IF (INVERSE(1) == 0.d0) THEN
@ -1202,8 +1208,9 @@ end
 !  P_l^|m|(cos(theta))  exp(i m phi)

      subroutine erreur(x,n,rmoy,error)
-      implicit double precision(a-h,o-z)
-      dimension x(n)
+      implicit none
+      integer :: i, n
+      double precision :: x(n), rn, rn1, error, rmoy
 ! calcul de la moyenne
      rmoy=0.d0
      do i=1,n
--- a/src/cipsi/EZFIO.cfg
+++ b/src/cipsi/EZFIO.cfg
@ -16,6 +16,12 @@ doc: Maximum number of allowed open shells. Using -1 selects all determinants
 interface: ezfio,ocaml,provider
 default: -1

+[excitation_ref]
+type: integer
+doc: 1: Hartree-Fock determinant, 2:All determinants of the dominant configuration
+interface: ezfio,ocaml,provider
+default: 1
+
 [excitation_max]
 type: integer
 doc: Maximum number of excitation with respect to the Hartree-Fock determinant. Using -1 selects all determinants
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@ -253,12 +253,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
  deallocate(exc_degree)
  nmax=k-1

-  allocate(iorder(nmax))
-  do i=1,nmax
-    iorder(i) = i
-  enddo
-  call isort(indices,iorder,nmax)
-  deallocate(iorder)
+  call isort_noidx(indices,nmax)

  ! Start with 32 elements. Size will double along with the filtering.
  allocate(preinteresting(0:32), prefullinteresting(0:32),     &
@ -676,34 +671,48 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
      logical :: do_cycle
      if (excitation_max >= 0) then
        do_cycle = .True.
-        do k=1,N_dominant_dets_of_cfgs
-          call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int)
+        if (excitation_ref == 1) then
+          call get_excitation_degree(HF_bitmask,det(1,1),degree,N_int)
          do_cycle = do_cycle .and. (degree > excitation_max)
-        enddo
+        else if (excitation_ref == 2) then
+          do k=1,N_dominant_dets_of_cfgs
+            call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int)
+            do_cycle = do_cycle .and. (degree > excitation_max)
+          enddo
+        endif
        if (do_cycle) cycle
      endif


      if (excitation_alpha_max >= 0) then
        do_cycle = .True.
-        do k=1,N_dominant_dets_of_cfgs
-          call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int)
-          do_cycle = do_cycle .and. (degree > excitation_alpha_max)
-        enddo
+        if (excitation_ref == 1) then
+          call get_excitation_degree_spin(HF_bitmask,det(1,1),degree,N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        else if (excitation_ref == 2) then
+          do k=1,N_dominant_dets_of_cfgs
+            call get_excitation_degree_spin(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int)
+            do_cycle = do_cycle .and. (degree > excitation_alpha_max)
+          enddo
+        endif
        if (do_cycle) cycle
      endif


      if (excitation_beta_max >= 0) then
        do_cycle = .True.
-        do k=1,N_dominant_dets_of_cfgs
-          call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int)
-          do_cycle = do_cycle .and. (degree > excitation_beta_max)
-        enddo
+        if (excitation_ref == 1) then
+          call get_excitation_degree_spin(HF_bitmask,det(1,2),degree,N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        else if (excitation_ref == 2) then
+          do k=1,N_dominant_dets_of_cfgs
+            call get_excitation_degree(dominant_dets_of_cfgs(1,2,k),det(1,2),degree,N_int)
+            do_cycle = do_cycle .and. (degree > excitation_beta_max)
+          enddo
+        endif
        if (do_cycle) cycle
      endif

-
      Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)

      w = 0d0
@ -735,7 +744,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d

      double precision :: eigvalues(N_states+1)
      double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2)
-      integer :: iwork(3+5*(N_states+1)), info, k 
+      integer :: info, k , iwork(N_states+1)

      if (do_diag) then
        double precision :: pt2_matrix(N_states+1,N_states+1)
@ -747,8 +756,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
          pt2_matrix(N_states+1,istate) = mat(istate,p1,p2)
        enddo

-        call DSYEVD( 'V', 'U', N_states+1, pt2_matrix, N_states+1, eigvalues, &
-                     work, size(work), iwork, size(iwork), info )
+        call DSYEV( 'V', 'U', N_states+1, pt2_matrix, N_states+1, eigvalues, &
+                     work, size(work), info )
        if (info /= 0) then
          print *, 'error in '//irp_here
          stop -1
@ -756,7 +765,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
        pt2_matrix = dabs(pt2_matrix)
        iwork(1:N_states+1) = maxloc(pt2_matrix,DIM=1)
        do k=1,N_states
-          e_pert(iwork(k)) = eigvalues(k) - E0(iwork(k))
+          e_pert(k) = eigvalues(iwork(k)) - E0(k)
        enddo
      endif

--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@ -1,9 +1,9 @@
-  BEGIN_PROVIDER [ integer, NSOMOMax]
- &BEGIN_PROVIDER [ integer, NCSFMax]
- &BEGIN_PROVIDER [ integer*8, NMO]
- &BEGIN_PROVIDER [ integer, NBFMax]
- &BEGIN_PROVIDER [ integer, n_CSF]
- &BEGIN_PROVIDER [ integer, maxDetDimPerBF]
+ BEGIN_PROVIDER [ integer, NSOMOMax]
+&BEGIN_PROVIDER [ integer, NCSFMax]
+&BEGIN_PROVIDER [ integer*8, NMO]
+&BEGIN_PROVIDER [ integer, NBFMax]
+&BEGIN_PROVIDER [ integer, n_CSF]
+&BEGIN_PROVIDER [ integer, maxDetDimPerBF]
  implicit none
  BEGIN_DOC
  ! Documentation for NSOMOMax
@ -45,7 +45,7 @@
    n_CSF += ncfg * dimcsfpercfg
    ncfgprev = cfg_seniority_index(i+2)
  enddo
-  END_PROVIDER
+END_PROVIDER


 subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout)
--- a/src/davidson/diagonalization_hcsf_dressed.irp.f
+++ b/src/davidson/diagonalization_hcsf_dressed.irp.f
@ -197,6 +197,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
  call write_int(6,N_st,'Number of states')
  call write_int(6,N_st_diag,'Number of states in diagonalization')
  call write_int(6,sze,'Number of determinants')
+  call write_int(6,sze_csf,'Number of CSFs')
  call write_int(6,nproc_target,'Number of threads for diagonalization')
  call write_double(6, r1, 'Memory(Gb)')
  if (disk_based) then
--- a/src/mo_one_e_ints/EZFIO.cfg
+++ b/src/mo_one_e_ints/EZFIO.cfg
@ -47,10 +47,3 @@ type: Disk_access
 doc: Read/Write |MO| one-electron integrals from/to disk [ Write | Read | None ]
 interface: ezfio,provider,ocaml
 default: None
-
-
-[restore_symm]
-type: logical
-doc: If true, try to find symmetry in the MO coefficient matrices
-interface: ezfio,provider,ocaml
-default: True
--- a/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f
+++ b/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f
@ -26,3 +26,43 @@ BEGIN_PROVIDER [double precision, mo_pseudo_integrals, (mo_num,mo_num)]
 END_PROVIDER


+BEGIN_PROVIDER [double precision, mo_pseudo_integrals_local, (mo_num,mo_num)]
+  implicit none
+  BEGIN_DOC
+  ! Pseudopotential integrals in |MO| basis
+  END_DOC
+
+  if (do_pseudo) then
+      call ao_to_mo(                                                 &
+        ao_pseudo_integrals_local,                                   &
+        size(ao_pseudo_integrals_local,1),                           &
+        mo_pseudo_integrals_local,                                   &
+        size(mo_pseudo_integrals_local,1)                            &
+        )
+  else
+      mo_pseudo_integrals_local = 0.d0
+  endif
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [double precision, mo_pseudo_integrals_non_local, (mo_num,mo_num)]
+  implicit none
+  BEGIN_DOC
+  ! Pseudopotential integrals in |MO| basis
+  END_DOC
+
+  if (do_pseudo) then
+      call ao_to_mo(                                                 &
+        ao_pseudo_integrals_non_local,                               &
+        size(ao_pseudo_integrals_non_local,1),                       &
+        mo_pseudo_integrals_non_local,                               &
+        size(mo_pseudo_integrals_non_local,1)                        &
+        )
+  else
+      mo_pseudo_integrals_non_local = 0.d0
+  endif
+
+END_PROVIDER
+
+
--- a/src/two_body_rdm/two_e_dm_mo.irp.f
+++ b/src/two_body_rdm/two_e_dm_mo.irp.f
@ -5,18 +5,14 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)]
   !
   ! <Psi| a^{\dagger}_{i \beta} a^{\dagger}_{j \beta} a_{l \beta} a_{k \beta} |Psi>
   !
-   ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active
+   ! where the indices (i,j,k,l) belong to all MOs.
   !
-   ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)/2
+   ! The normalization (i.e. sum of diagonal elements) is set to $N_{elec} * (N_{elec} - 1)/2$
   !
-   !  !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act"
+   !  !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO are set to zero
+   ! The state-averaged two-electron energy :
   !
-   !  !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero
-   ! The two-electron energy of each state can be computed as:
-   !
-   !   \sum_{i,j,k,l = 1, n_core_inact_act_orb} two_e_dm_mo(i,j,k,l,istate) * < ii jj | kk ll >
-   !
-   !   with ii = list_core_inact_act(i), jj = list_core_inact_act(j), kk = list_core_inact_act(k), ll = list_core_inact_act(l)
+   !   \sum_{i,j,k,l = 1, mo_num} two_e_dm_mo(i,j,k,l) * < ii jj | kk ll >
   END_DOC
   two_e_dm_mo = 0.d0
   integer                        :: i,j,k,l,iorb,jorb,korb,lorb,istate
--- a/src/utils/EZFIO.cfg
+++ b/src/utils/EZFIO.cfg
@ -0,0 +1,5 @@
+[restore_symm]
+type: logical
+doc: If true, try to find symmetry in the MO coefficient matrices
+interface: ezfio,provider,ocaml
+default: False
--- a/src/utils/intel.f90
+++ b/src/utils/intel.f90
@ -0,0 +1,173 @@
+module intel
+  use, intrinsic :: iso_c_binding
+  interface
+    subroutine ippsSortAscend_32s_I(pSrc, len) bind(C, name='ippsSortAscend_32s_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer, intent(inout) :: pSrc(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortAscend_32f_I(pSrc, len) bind(C, name='ippsSortAscend_32f_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     real, intent(inout) :: pSrc(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortAscend_64s_I(pSrc, len) bind(C, name='ippsSortAscend_64s_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer*8, intent(inout) :: pSrc(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortAscend_64f_I(pSrc, len) bind(C, name='ippsSortAscend_64f_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     double precision, intent(inout) :: pSrc(len)
+    end
+  end interface
+
+  interface
+    subroutine ippsSortRadixIndexGetBufferSize(len, dataType, pBufSize) bind(C, name='ippsSortRadixIndexGetBufferSize')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer, intent(in), value :: dataType
+     integer, intent(out) :: pBufSize
+    end
+  end interface
+
+  interface
+    subroutine ippsSortRadixAscend_16s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_16s_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer*2, intent(inout) :: pSrc(len)
+     character, intent(inout) :: pTmp(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixAscend_32s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32s_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer, intent(inout) :: pSrc(len)
+     character, intent(inout) :: pTmp(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixAscend_32f_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32f_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     real, intent(inout) :: pSrc(len)
+     character, intent(inout) :: pTmp(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixAscend_64s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_64s_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer*8, intent(inout) :: pSrc(len)
+     character, intent(inout) :: pTmp(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixAscend_64f_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_64f_I')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     double precision, intent(inout) :: pSrc(len)
+     character, intent(inout) :: pTmp(len)
+    end
+  end interface
+
+  interface
+    subroutine ippsSortRadixIndexAscend_16s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_16s')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer*2, intent(inout) :: pSrc(len)
+     integer, intent(in), value :: srcStrideBytes
+     integer, intent(inout) :: pDstIndx(len)
+     character, intent(inout) :: pTmpIndx(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixIndexAscend_32s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_32s')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer, intent(inout) :: pSrc(len)
+     integer, intent(in), value :: srcStrideBytes
+     integer, intent(inout) :: pDstIndx(len)
+     character, intent(inout) :: pTmpIndx(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixIndexAscend_32f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_32f')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     real   , intent(inout) :: pSrc(len)
+     integer, intent(in), value :: srcStrideBytes
+     integer, intent(inout) :: pDstIndx(len)
+     character, intent(inout) :: pTmpIndx(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixIndexAscend_64s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_64s')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     integer*8, intent(inout) :: pSrc(len)
+     integer, intent(in), value :: srcStrideBytes
+     integer, intent(inout) :: pDstIndx(len)
+     character, intent(inout) :: pTmpIndx(len)
+    end
+  end interface
+  interface
+    subroutine ippsSortRadixIndexAscend_64f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_64f')
+     use  iso_c_binding
+     integer, intent(in), value :: len
+     real*8 , intent(inout) :: pSrc(len)
+     integer, intent(in), value :: srcStrideBytes
+     integer, intent(inout) :: pDstIndx(len)
+     character, intent(inout) :: pTmpIndx(len)
+    end
+  end interface
+
+  interface
+    subroutine ippsSortIndexAscend_32f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32f_I')
+     use  iso_c_binding
+     real(4), intent(in) :: pSrcDst(*)
+     integer(4), intent(inout) :: pDstIndx(*)
+     integer(4), intent(in), value :: len
+    end
+  end interface
+  interface
+    subroutine ippsSortIndexAscend_32s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32s_I')
+     use  iso_c_binding
+     integer(4), intent(in) :: pSrcDst(*)
+     integer(4), intent(inout) :: pDstIndx(*)
+     integer(4), intent(in), value :: len
+    end
+  end interface
+  interface
+    subroutine ippsSortIndexAscend_64f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64f_I')
+     use  iso_c_binding
+     real(8), intent(in) :: pSrcDst(*)
+     integer(4), intent(inout) :: pDstIndx(*)
+     integer(4), intent(in), value :: len
+    end
+  end interface
+  interface
+    subroutine ippsSortIndexAscend_64s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64s_I')
+     use  iso_c_binding
+     integer(8), intent(in) :: pSrcDst(*)
+     integer(4), intent(inout) :: pDstIndx(*)
+     integer(4), intent(in), value :: len
+    end
+  end interface
+  interface
+    subroutine ippsSortIndexAscend_16s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_16s_I')
+     use  iso_c_binding
+     integer(2), intent(in) :: pSrcDst(*)
+     integer(4), intent(inout) :: pDstIndx(*)
+     integer(4), intent(in), value :: len
+    end
+  end interface
+end module
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@ -57,7 +57,7 @@ BEGIN_TEMPLATE
  $type                          :: c, tmp
  integer                        :: itmp
  integer                        :: i, j
- 
+
  if(isize<2)return

  c = x( shiftr(first+last,1) )
@ -262,7 +262,60 @@ SUBST [ X, type ]
 i2 ; integer*2 ;;
 END_TEMPLATE

+
+!---------------------- INTEL
+IRP_IF INTEL
+
 BEGIN_TEMPLATE
+ subroutine $Xsort(x,iorder,isize)
+  use intel
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  character, allocatable         :: tmp(:)
+  if (isize < 2) return
+  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
+  allocate(tmp(n))
+  call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
+  deallocate(tmp)
+  iorder(1:isize) = iorder(1:isize)+1
+  call $Xset_order(x,iorder,isize)
+ end
+
+ subroutine $Xsort_noidx(x,isize)
+  use intel
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer                        :: n
+  character, allocatable         :: tmp(:)
+  if (isize < 2) return
+  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
+  allocate(tmp(n))
+  call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
+  deallocate(tmp)
+ end
+
+SUBST [ X, type, ityp, n, ippsz ]
+   ; real ; 32f ; 4 ; 13 ;;
+ i ; integer ; 32s ; 4 ; 11 ;;
+ i2 ; integer*2 ; 16s ; 2 ; 7 ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
 subroutine $Xsort(x,iorder,isize)
  implicit none
  BEGIN_DOC
@ -289,12 +342,12 @@ BEGIN_TEMPLATE
  endif
 end subroutine $Xsort

-SUBST [ X, type, Y ]
-   ; real ; i ;;
- d ; double precision ; i8 ;;
+SUBST [ X, type ]
+ d ; double precision ;;
 END_TEMPLATE

 BEGIN_TEMPLATE
+
 subroutine $Xsort(x,iorder,isize)
  implicit none
  BEGIN_DOC
@ -306,8 +359,112 @@ BEGIN_TEMPLATE
  $type,intent(inout)            :: x(isize)
  integer,intent(inout)          :: iorder(isize)
  integer                        :: n
-!  call $Xradix_sort(x,iorder,isize,-1)
-  call quick_$Xsort(x,iorder,isize)
+  if (isize < 2) then
+    return
+  endif
+  call sorted_$Xnumber(x,isize,n)
+  if (isize == n) then
+    return
+  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+    call $Xradix_sort(x,iorder,isize,-1)
+  endif
+ end subroutine $Xsort
+
+SUBST [ X, type ]
+ i8 ; integer*8 ;;
+END_TEMPLATE
+
+!---------------------- END INTEL
+IRP_ELSE
+!---------------------- NON-INTEL
+BEGIN_TEMPLATE
+
+ subroutine $Xsort_noidx(x,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer, allocatable           :: iorder(:)
+  integer                        :: i
+  allocate(iorder(isize))
+  do i=1,isize
+   iorder(i)=i
+  enddo
+  call $Xsort(x,iorder,isize)
+  deallocate(iorder)
+ end subroutine $Xsort_noidx
+
+SUBST [ X, type ]
+   ; real ;;
+ d ; double precision ;;
+ i ; integer ;;
+ i8 ; integer*8 ;;
+ i2 ; integer*2 ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
+ subroutine $Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  if (isize < 2) then
+    return
+  endif
+!  call sorted_$Xnumber(x,isize,n)
+!  if (isize == n) then
+!    return
+!  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+!    call heap_$Xsort(x,iorder,isize)
+    call quick_$Xsort(x,iorder,isize)
+  endif
+ end subroutine $Xsort
+
+SUBST [ X, type ]
+   ; real ;;
+ d ; double precision ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
+ subroutine $Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  if (isize < 2) then
+    return
+  endif
+  call sorted_$Xnumber(x,isize,n)
+  if (isize == n) then
+    return
+  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+    call $Xradix_sort(x,iorder,isize,-1)
+  endif
 end subroutine $Xsort

 SUBST [ X, type ]
@ -316,6 +473,11 @@ SUBST [ X, type ]
 i2 ; integer*2 ;;
 END_TEMPLATE

+IRP_ENDIF
+!---------------------- END NON-INTEL
+
+
+
 BEGIN_TEMPLATE
 subroutine $Xset_order(x,iorder,isize)
  implicit none
@ -413,10 +575,12 @@ SUBST [ X, type ]
 i2; integer*2 ;;
 END_TEMPLATE

+
 BEGIN_TEMPLATE

- recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
+recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
  implicit none
+
  BEGIN_DOC
  ! Sort integer array x(isize) using the radix sort algorithm.
  ! iorder in input should be (1,2,3,...,isize), and in output
@ -646,3 +810,4 @@ SUBST [ X, type, integer_size, is_big, big, int_type ]
 END_TEMPLATE


+