From cfd41fc13d5a6911da33c4d235b32d9532fb2085 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 29 Apr 2021 14:43:02 +0200 Subject: [PATCH 01/15] Modify exc degree in cipsi --- src/cipsi/EZFIO.cfg | 6 ++++++ src/cipsi/selection.irp.f | 38 ++++++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/cipsi/EZFIO.cfg b/src/cipsi/EZFIO.cfg index 3fdf74d2..19b45ac1 100644 --- a/src/cipsi/EZFIO.cfg +++ b/src/cipsi/EZFIO.cfg @@ -16,6 +16,12 @@ doc: Maximum number of allowed open shells. Using -1 selects all determinants interface: ezfio,ocaml,provider default: -1 +[excitation_ref] +type: integer +doc: 1: Hartree-Fock determinant, 2:All determinants of the dominant configuration +interface: ezfio,ocaml,provider +default: 1 + [excitation_max] type: integer doc: Maximum number of excitation with respect to the Hartree-Fock determinant. Using -1 selects all determinants diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index e688a7bb..59792ea9 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -676,34 +676,48 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d logical :: do_cycle if (excitation_max >= 0) then do_cycle = .True. - do k=1,N_dominant_dets_of_cfgs - call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int) + if (excitation_ref == 1) then + call get_excitation_degree(HF_bitmask,det(1,1),degree,N_int) do_cycle = do_cycle .and. (degree > excitation_max) - enddo + else if (excitation_ref == 2) then + do k=1,N_dominant_dets_of_cfgs + call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + enddo + endif if (do_cycle) cycle endif if (excitation_alpha_max >= 0) then do_cycle = .True. - do k=1,N_dominant_dets_of_cfgs - call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int) - do_cycle = do_cycle .and. (degree > excitation_alpha_max) - enddo + if (excitation_ref == 1) then + call get_excitation_degree_spin(HF_bitmask,det(1,1),degree,N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + else if (excitation_ref == 2) then + do k=1,N_dominant_dets_of_cfgs + call get_excitation_degree_spin(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int) + do_cycle = do_cycle .and. (degree > excitation_alpha_max) + enddo + endif if (do_cycle) cycle endif if (excitation_beta_max >= 0) then do_cycle = .True. - do k=1,N_dominant_dets_of_cfgs - call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int) - do_cycle = do_cycle .and. (degree > excitation_beta_max) - enddo + if (excitation_ref == 1) then + call get_excitation_degree_spin(HF_bitmask,det(1,2),degree,N_int) + do_cycle = do_cycle .and. (degree > excitation_max) + else if (excitation_ref == 2) then + do k=1,N_dominant_dets_of_cfgs + call get_excitation_degree(dominant_dets_of_cfgs(1,2,k),det(1,2),degree,N_int) + do_cycle = do_cycle .and. (degree > excitation_beta_max) + enddo + endif if (do_cycle) cycle endif - Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) w = 0d0 From 2546cdd317cd4483d927c0e561806587001158b0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 May 2021 17:01:21 +0200 Subject: [PATCH 02/15] Add LIB files in plugins --- RELEASE_NOTES.org | 12 ++++++------ scripts/compilation/qp_create_ninja | 9 +++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index e5fb24a7..f9dbcbb0 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -29,7 +29,7 @@ - Disk-based Davidson when too much memory is required - Fixed bug in DIIS - Fixed bug in molden (Au -> Angs) - + *** User interface - Added ~qp_basis~ script to install a basis set from the ~bse~ @@ -38,7 +38,7 @@ ~psi_coef_qp_edit~ to accelerate the opening of qp_edit with large wave functions - Removed ~etc/ninja.rc~ - - Added flag to specify if the AOs are normalized + - Added flag to specify if the AOs are normalized - Added flag to specify if the primitive Gaussians are normalized - Added ~lin_dep_cutoff~, the cutoff for linear dependencies - Davidson convergence threshold can be adapted from PT2 @@ -51,7 +51,7 @@ - Added ~print_energy~ - Added ~print_hamiltonian~ - Added input for two body RDM - - Added keyword ~save_wf_after_selection~ + - Added keyword ~save_wf_after_selection~ *** Code @@ -75,11 +75,11 @@ - Added ~V_ne_psi_energy~ - Added ~h_core_guess~ routine - Fixed Laplacians in real space (indices) - - + - Added LIB file to add extra libs in plugin ao_one_e_integral_zero banned_excitations - - + + diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 38f7e765..0a7f4b71 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -108,6 +108,15 @@ def ninja_create_env_variable(pwd_config_file): lib_usr = get_compilation_option(pwd_config_file, "LIB") str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr]) + + # Read all LIB files in modules + targetPattern = r"src/**/LIB" + for libfile in glob.glob(targetPattern): + content = "" + with open(libfile,'r') as f: + content = f.read() + str_lib += " "+content + l_string.append("LIB = {0} ".format(str_lib)) l_string.append("") From d3699eed5ad2c85688e4d3fb1864cdfedbdfb8bf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 May 2021 17:56:46 +0200 Subject: [PATCH 03/15] Fix for LIB --- scripts/compilation/qp_create_ninja | 10 ++++++---- scripts/ezfio_interface/ezfio_generate_provider.py | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 0a7f4b71..a132bc9e 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -110,12 +110,14 @@ def ninja_create_env_variable(pwd_config_file): str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr]) # Read all LIB files in modules - targetPattern = r"src/**/LIB" - for libfile in glob.glob(targetPattern): + libfile = "LIB" + try: content = "" with open(libfile,'r') as f: - content = f.read() - str_lib += " "+content + content = f.read() + str_lib += " "+content + except IOError: + pass l_string.append("LIB = {0} ".format(str_lib)) diff --git a/scripts/ezfio_interface/ezfio_generate_provider.py b/scripts/ezfio_interface/ezfio_generate_provider.py index 4b43a88a..6b49955b 100755 --- a/scripts/ezfio_interface/ezfio_generate_provider.py +++ b/scripts/ezfio_interface/ezfio_generate_provider.py @@ -82,6 +82,8 @@ END_PROVIDER mpi_correspondance = {"integer": "MPI_INTEGER", "integer*8": "MPI_INTEGER8", "character*(32)": "MPI_CHARACTER", + "character*(64)": "MPI_CHARACTER", + "character*(256)": "MPI_CHARACTER", "logical": "MPI_LOGICAL", "double precision": "MPI_DOUBLE_PRECISION"} From a9d23e88f8cc83f4f6221deb94c315506a31ebda Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 May 2021 13:34:16 +0200 Subject: [PATCH 04/15] Add providers for trexio --- src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f b/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f index 179b33ed..f4c1012d 100644 --- a/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f +++ b/src/mo_one_e_ints/pot_mo_pseudo_ints.irp.f @@ -26,3 +26,43 @@ BEGIN_PROVIDER [double precision, mo_pseudo_integrals, (mo_num,mo_num)] END_PROVIDER +BEGIN_PROVIDER [double precision, mo_pseudo_integrals_local, (mo_num,mo_num)] + implicit none + BEGIN_DOC + ! Pseudopotential integrals in |MO| basis + END_DOC + + if (do_pseudo) then + call ao_to_mo( & + ao_pseudo_integrals_local, & + size(ao_pseudo_integrals_local,1), & + mo_pseudo_integrals_local, & + size(mo_pseudo_integrals_local,1) & + ) + else + mo_pseudo_integrals_local = 0.d0 + endif + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, mo_pseudo_integrals_non_local, (mo_num,mo_num)] + implicit none + BEGIN_DOC + ! Pseudopotential integrals in |MO| basis + END_DOC + + if (do_pseudo) then + call ao_to_mo( & + ao_pseudo_integrals_non_local, & + size(ao_pseudo_integrals_non_local,1), & + mo_pseudo_integrals_non_local, & + size(mo_pseudo_integrals_non_local,1) & + ) + else + mo_pseudo_integrals_non_local = 0.d0 + endif + +END_PROVIDER + + From 7fa1527a3de79015fc70aac4053ed84cc1ab50fc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 16 May 2021 01:32:55 +0200 Subject: [PATCH 05/15] Move restore_symm keyword --- RELEASE_NOTES.org | 2 ++ bin/qp_export_as_tgz | 6 ++++-- src/mo_one_e_ints/EZFIO.cfg | 7 ------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index f9dbcbb0..02176ffa 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -52,6 +52,8 @@ - Added ~print_hamiltonian~ - Added input for two body RDM - Added keyword ~save_wf_after_selection~ + - Added a ~restore_symm~ flag to enforce the restoration of + symmetry in matrices *** Code diff --git a/bin/qp_export_as_tgz b/bin/qp_export_as_tgz index 7332d1ed..20624dba 100755 --- a/bin/qp_export_as_tgz +++ b/bin/qp_export_as_tgz @@ -99,7 +99,9 @@ function find_libs () { } function find_exec () { - find ${QP_ROOT}/$1 -perm /u+x -type f + for i in $@ ; do + find ${QP_ROOT}/$i -perm /u+x -type f + done } @@ -119,7 +121,7 @@ fi echo "Copying binary files" # -------------------- -FORTRAN_EXEC=$(find_exec src) +FORTRAN_EXEC=$(find_exec src/*/) if [[ -z $FORTRAN_EXEC ]] ; then error 'No Fortran binaries found.' exit 1 diff --git a/src/mo_one_e_ints/EZFIO.cfg b/src/mo_one_e_ints/EZFIO.cfg index ca4dabf4..d58b3da1 100644 --- a/src/mo_one_e_ints/EZFIO.cfg +++ b/src/mo_one_e_ints/EZFIO.cfg @@ -47,10 +47,3 @@ type: Disk_access doc: Read/Write |MO| one-electron integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None - - -[restore_symm] -type: logical -doc: If true, try to find symmetry in the MO coefficient matrices -interface: ezfio,provider,ocaml -default: True From c226ffd9dfdbd05f822884f932956cd356191808 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 16 May 2021 11:24:05 +0200 Subject: [PATCH 06/15] forgot file --- src/utils/EZFIO.cfg | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/utils/EZFIO.cfg diff --git a/src/utils/EZFIO.cfg b/src/utils/EZFIO.cfg new file mode 100644 index 00000000..7d367f0c --- /dev/null +++ b/src/utils/EZFIO.cfg @@ -0,0 +1,5 @@ +[restore_symm] +type: logical +doc: If true, try to find symmetry in the MO coefficient matrices +interface: ezfio,provider,ocaml +default: False From f543d386a9aa6ecc97abe06be45b3cf295a6a903 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 17 May 2021 15:08:44 +0200 Subject: [PATCH 07/15] Fix minor bug in multi-state PT2 --- src/cipsi/selection.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index e688a7bb..62c5dc27 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -735,7 +735,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d double precision :: eigvalues(N_states+1) double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2) - integer :: iwork(3+5*(N_states+1)), info, k + integer :: info, k , iwork(N_states+1) if (do_diag) then double precision :: pt2_matrix(N_states+1,N_states+1) @@ -747,8 +747,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d pt2_matrix(N_states+1,istate) = mat(istate,p1,p2) enddo - call DSYEVD( 'V', 'U', N_states+1, pt2_matrix, N_states+1, eigvalues, & - work, size(work), iwork, size(iwork), info ) + call DSYEV( 'V', 'U', N_states+1, pt2_matrix, N_states+1, eigvalues, & + work, size(work), info ) if (info /= 0) then print *, 'error in '//irp_here stop -1 @@ -756,7 +756,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d pt2_matrix = dabs(pt2_matrix) iwork(1:N_states+1) = maxloc(pt2_matrix,DIM=1) do k=1,N_states - e_pert(iwork(k)) = eigvalues(k) - E0(iwork(k)) + e_pert(k) = eigvalues(iwork(k)) - E0(k) enddo endif From bb0743ac1ee439f387809e83f6927883aa320275 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 May 2021 18:38:11 +0200 Subject: [PATCH 08/15] Updated IRPF90 --- etc/local.rc | 2 +- external/irpf90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/local.rc b/etc/local.rc index 954b315b..0e212c24 100644 --- a/etc/local.rc +++ b/etc/local.rc @@ -19,4 +19,4 @@ # export QP_NIC=lo # export QP_NIC=ib0 - +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/scemama/TREX/trexio/_install/lib diff --git a/external/irpf90 b/external/irpf90 index 132a4a16..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 132a4a1661c9878d21dcbf0ac14f7fe9a3b110d0 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 From 1dc2350b328decf2b2b35f4fe5cdcf61e0d85e9d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 May 2021 18:43:48 +0200 Subject: [PATCH 09/15] revert etc/local.rc --- etc/local.rc | 1 - 1 file changed, 1 deletion(-) diff --git a/etc/local.rc b/etc/local.rc index 0e212c24..182599c6 100644 --- a/etc/local.rc +++ b/etc/local.rc @@ -19,4 +19,3 @@ # export QP_NIC=lo # export QP_NIC=ib0 -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/scemama/TREX/trexio/_install/lib From 3ceea5e8b27b1ce82286ac8254964fd8e784e6df Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 20 May 2021 18:19:44 +0200 Subject: [PATCH 10/15] Comment in 2rdm --- src/two_body_rdm/two_e_dm_mo.irp.f | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 19e8d75e..4dadd2e6 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -5,18 +5,14 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] ! ! ! - ! WHERE ALL ORBITALS (i,j,k,l) BELONGS TO ALL OCCUPIED ORBITALS : core, inactive and active + ! where the indices (i,j,k,l) belong to all MOs. ! - ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1)/2 + ! The normalization (i.e. sum of diagonal elements) is set to $N_{elec} * (N_{elec} - 1)/2$ ! - ! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" + ! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO are set to zero + ! The state-averaged two-electron energy : ! - ! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - ! The two-electron energy of each state can be computed as: - ! - ! \sum_{i,j,k,l = 1, n_core_inact_act_orb} two_e_dm_mo(i,j,k,l,istate) * < ii jj | kk ll > - ! - ! with ii = list_core_inact_act(i), jj = list_core_inact_act(j), kk = list_core_inact_act(k), ll = list_core_inact_act(l) + ! \sum_{i,j,k,l = 1, mo_num} two_e_dm_mo(i,j,k,l) * < ii jj | kk ll > END_DOC two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate From 32e2afca90dac6d12a6614fdbec39a03ad4c8f8a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 31 May 2021 01:48:34 +0200 Subject: [PATCH 11/15] Using Intel IPP for sorting --- config/ifort.cfg | 4 +- config/ifort_avx.cfg | 4 +- config/ifort_avx_mpi.cfg | 4 +- config/ifort_debug.cfg | 4 +- config/ifort_mpi.cfg | 4 +- config/ifort_rome.cfg | 4 +- config/ifort_xHost.cfg | 6 +- src/ao_one_e_ints/pseudopot.f90 | 19 ++- src/cipsi/selection.irp.f | 7 +- src/csf/sigma_vector.irp.f | 106 ++++++------ .../diagonalization_hcsf_dressed.irp.f | 1 + src/utils/intel.f90 | 70 ++++++++ src/utils/sort.irp.f | 151 ++++++++++++++++-- 13 files changed, 297 insertions(+), 87 deletions(-) create mode 100644 src/utils/intel.f90 diff --git a/config/ifort.cfg b/config/ifort.cfg index 866aae3d..714c4b10 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_avx.cfg b/config/ifort_avx.cfg index d689050e..a2cb4c8a 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_avx.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_avx_mpi.cfg b/config/ifort_avx_mpi.cfg index 9a839e66..f2bb8889 100644 --- a/config/ifort_avx_mpi.cfg +++ b/config/ifort_avx_mpi.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : mpiifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DMPI +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL # Global options ################ diff --git a/config/ifort_debug.cfg b/config/ifort_debug.cfg index f2fbf8ce..9b718380 100644 --- a/config/ifort_debug.cfg +++ b/config/ifort_debug.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL # Global options ################ diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg index 57087847..e0d489a0 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_mpi.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : mpiifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DMPI +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL # Global options ################ diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index a4bce680..3b70f98b 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -7,9 +7,9 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_xHost.cfg b/config/ifort_xHost.cfg index 5d952e54..ddb4aa2d 100644 --- a/config/ifort_xHost.cfg +++ b/config/ifort_xHost.cfg @@ -6,10 +6,10 @@ # --align=32 : Align all provided arrays on a 32-byte boundary # [COMMON] -FC : ifort -fpic -LAPACK_LIB : -mkl=parallel +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=64 +IRPF90_FLAGS : --ninja --align=64 -DINTEL # Global options ################ diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 6c8e5c83..48e3803e 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -96,8 +96,12 @@ end ! x=cos(theta) double precision function ylm_real(l,m,x,phi) - implicit double precision (a-h,o-z) - DIMENSION PM(0:100,0:100) + implicit none + integer :: MM, iabs_m, m, l + double precision :: pi, fourpi, factor, x, phi, coef + double precision :: xchap, ychap, zchap + double precision, external :: fact + double precision :: PM(0:100,0:100), plm MM=100 pi=dacos(-1.d0) fourpi=4.d0*pi @@ -1150,8 +1154,10 @@ end ! Output: PM(m,n) --- Pmn(x) ! ===================================================== ! - IMPLICIT DOUBLE PRECISION (P,X) - DIMENSION PM(0:MM,0:(N+1)) + implicit none +! IMPLICIT DOUBLE PRECISION (P,X) + integer :: MM, N, I, J, M + double precision :: PM(0:MM,0:(N+1)), X, XQ, XS DOUBLE PRECISION, SAVE :: INVERSE(100) = 0.D0 DOUBLE PRECISION :: LS, II, JJ IF (INVERSE(1) == 0.d0) THEN @@ -1202,8 +1208,9 @@ end ! P_l^|m|(cos(theta)) exp(i m phi) subroutine erreur(x,n,rmoy,error) - implicit double precision(a-h,o-z) - dimension x(n) + implicit none + integer :: i, n + double precision :: x(n), rn, rn1, error, rmoy ! calcul de la moyenne rmoy=0.d0 do i=1,n diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 11492652..eda9642c 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -253,12 +253,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d deallocate(exc_degree) nmax=k-1 - allocate(iorder(nmax)) - do i=1,nmax - iorder(i) = i - enddo - call isort(indices,iorder,nmax) - deallocate(iorder) + call isort_noidx(indices,nmax) ! Start with 32 elements. Size will double along with the filtering. allocate(preinteresting(0:32), prefullinteresting(0:32), & diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 265d2384..0f3f093c 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1,54 +1,62 @@ - BEGIN_PROVIDER [ integer, NSOMOMax] - &BEGIN_PROVIDER [ integer, NCSFMax] - &BEGIN_PROVIDER [ integer*8, NMO] - &BEGIN_PROVIDER [ integer, NBFMax] - &BEGIN_PROVIDER [ integer, n_CSF] - &BEGIN_PROVIDER [ integer, maxDetDimPerBF] - implicit none - BEGIN_DOC - ! Documentation for NSOMOMax - ! The maximum number of SOMOs for the current calculation. - ! required for the calculation of prototype arrays. - END_DOC - NSOMOMax = min(elec_num, cfg_nsomo_max + 2) - ! Note that here we need NSOMOMax + 2 sizes - NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0 - NBFMax = NCSFMax - maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)))) - NMO = n_act_orb - integer i,j,k,l - integer startdet,enddet - integer ncfg,ncfgprev - integer NSOMO - integer dimcsfpercfg - integer detDimperBF - real*8 :: coeff - integer MS - integer ncfgpersomo - detDimperBF = 0 - MS = elec_alpha_num-elec_beta_num - ! number of cfgs = number of dets for 0 somos - n_CSF = cfg_seniority_index(0)-1 - ncfgprev = cfg_seniority_index(0) - do i = 0-iand(MS,1)+2, NSOMOMax,2 - if(cfg_seniority_index(i) .EQ. -1)then - ncfgpersomo = N_configuration + 1 + BEGIN_PROVIDER [ integer, NSOMOMax] +&BEGIN_PROVIDER [ integer, NCSFMax] +&BEGIN_PROVIDER [ integer*8, NMO] +&BEGIN_PROVIDER [ integer, NBFMax] +&BEGIN_PROVIDER [ integer, n_CSF] +&BEGIN_PROVIDER [ integer, maxDetDimPerBF] + implicit none + + BEGIN_DOC + ! The maximum number of SOMOs for the current calculation. + ! required for the calculation of prototype arrays. + END_DOC + + integer :: i,j,k,l + integer :: startdet,enddet + integer :: ncfg,ncfgprev + integer :: NSOMO + integer :: dimcsfpercfg + integer :: detDimperBF + real*8 :: coeff + integer :: MS + integer :: ncfgpersomo + double precision :: bin_1, bin_2 + + NSOMOMax = min(elec_num, cfg_nsomo_max + 2) + + bin_1 = binom(NSOMOMax, (NSOMOMax+1)/2) + bin_2 = binom(NSOMOMax,((NSOMOMax+1)/2)+1) + + ! Note that here we need NSOMOMax + 2 sizes + NCSFMax = max(1,int(bin_1-bin_2)) + NBFMax = NCSFMax + maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)))) + NMO = n_act_orb + + detDimperBF = 0 + MS = elec_alpha_num-elec_beta_num + + ! number of cfgs = number of dets for 0 somos + n_CSF = cfg_seniority_index(0)-1 + ncfgprev = cfg_seniority_index(0) + do i = 0-iand(MS,1)+2, cfg_nsomo_max,2 + if(cfg_seniority_index(i) == -1)then + ncfgpersomo = N_configuration + 1 else - ncfgpersomo = cfg_seniority_index(i) + ncfgpersomo = cfg_seniority_index(i) endif - ncfg = ncfgpersomo - ncfgprev - !detDimperBF = max(1,nint((binom(i,(i+1)/2)))) - if (i > 2) then - dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) - else - dimcsfpercfg = 1 - endif - n_CSF += ncfg * dimcsfpercfg - !if(cfg_seniority_index(i+2) == -1) EXIT - !if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF - ncfgprev = cfg_seniority_index(i) - enddo - END_PROVIDER + ncfg = ncfgpersomo - ncfgprev + if (i > 2) then + bin_1 = binom(i-2, (i-2+1)/2) + bin_2 = binom(i-2,((i-2+1)/2)+1) + dimcsfpercfg = max(1,int(bin_1-bin_2)) + else + dimcsfpercfg = 1 + endif + n_CSF += ncfg * dimcsfpercfg + ncfgprev = cfg_seniority_index(i) + enddo +END_PROVIDER subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 2a83cc28..da23b919 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -197,6 +197,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N call write_int(6,N_st,'Number of states') call write_int(6,N_st_diag,'Number of states in diagonalization') call write_int(6,sze,'Number of determinants') + call write_int(6,sze_csf,'Number of CSFs') call write_int(6,nproc_target,'Number of threads for diagonalization') call write_double(6, r1, 'Memory(Gb)') if (disk_based) then diff --git a/src/utils/intel.f90 b/src/utils/intel.f90 new file mode 100644 index 00000000..681adde9 --- /dev/null +++ b/src/utils/intel.f90 @@ -0,0 +1,70 @@ +module intel + use, intrinsic :: iso_c_binding + interface + subroutine ippsSortRadixIndexGetBufferSize(len, dataType, pBufSize) bind(C, name='ippsSortRadixIndexGetBufferSize') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(in), value :: dataType + integer, intent(out) :: pBufSize + end + end interface + interface + subroutine ippsSortAscend_32s_I(pSrc, len) bind(C, name='ippsSortAscend_32s_I') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + end + end interface + interface + subroutine ippsSortRadixAscend_32s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32s_I') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + integer, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_32s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_32s') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + integer, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_32f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_32f') + use iso_c_binding + integer, intent(in), value :: len + real , intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + integer, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortIndexAscend_32f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32f_I') + use iso_c_binding + real(4), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_32s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32s_I') + use iso_c_binding + integer(4), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_64f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64f_I') + use iso_c_binding + real(8), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface +end module diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index 2a655eed..be58e7a5 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -57,7 +57,7 @@ BEGIN_TEMPLATE $type :: c, tmp integer :: itmp integer :: i, j - + if(isize<2)return c = x( shiftr(first+last,1) ) @@ -262,6 +262,104 @@ SUBST [ X, type ] i2 ; integer*2 ;; END_TEMPLATE +IRP_IF INTEL + + subroutine sort(x,iorder,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + real ,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + call ippsSortIndexAscend_32f_I(x, iorder, isize) + iorder(:) = iorder(:)+1 + end subroutine sort + + subroutine dsort(x,iorder,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + real(8) ,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + call ippsSortIndexAscend_64f_I(x, iorder, isize) + iorder(:) = iorder(:)+1 + end subroutine dsort + + subroutine isort(x,iorder,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + integer ,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + integer, allocatable :: iorder1(:) + allocate(iorder1(isize*2)) + n=4 + call ippsSortRadixIndexAscend_32s(x, n, iorder, isize, iorder1) + iorder(1:isize) = iorder(1:isize)+1 + deallocate(iorder1) + call iset_order(x,iorder,isize) + end subroutine isort + + subroutine isort_noidx(x,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + integer ,intent(inout) :: x(isize) + integer, allocatable :: iorder1(:) + integer :: n + call ippsSortRadixIndexGetBufferSize(isize, 11, n) + n = n/4 + allocate(iorder1(n)) + call ippsSortRadixAscend_32s_I(x, isize, iorder1) + deallocate(iorder1) + end subroutine isort_noidx + + +BEGIN_TEMPLATE + subroutine $Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n +! call $Xradix_sort(x,iorder,isize,-1) + call quick_$Xsort(x,iorder,isize) + end subroutine $Xsort + +SUBST [ X, type ] + i8 ; integer*8 ;; + i2 ; integer*2 ;; +END_TEMPLATE + +IRP_ELSE + BEGIN_TEMPLATE subroutine $Xsort(x,iorder,isize) implicit none @@ -289,9 +387,9 @@ BEGIN_TEMPLATE endif end subroutine $Xsort -SUBST [ X, type, Y ] - ; real ; i ;; - d ; double precision ; i8 ;; +SUBST [ X, type ] + ; real ;; + d ; double precision ;; END_TEMPLATE BEGIN_TEMPLATE @@ -316,6 +414,22 @@ SUBST [ X, type ] i2 ; integer*2 ;; END_TEMPLATE + subroutine isort_noidx(x,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer, allocatable :: iorder + allocate(iorder) + iorder=0 + call $Xradix_sort(x,iorder,isize,-1) + deallocate(iorder) + end subroutine $Xsort +IRP_ENDIF + + BEGIN_TEMPLATE subroutine $Xset_order(x,iorder,isize) implicit none @@ -413,10 +527,15 @@ SUBST [ X, type ] i2; integer*2 ;; END_TEMPLATE + BEGIN_TEMPLATE - recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) +recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) +IRP_IF INTEL + use intel +IRP_ENDIF implicit none + BEGIN_DOC ! Sort integer array x(isize) using the radix sort algorithm. ! iorder in input should be (1,2,3,...,isize), and in output @@ -448,6 +567,15 @@ BEGIN_TEMPLATE stop endif + IRP_IF INTEL + if ( ($type == 4).and.($integer_size == 32).and.($is_big == .False.) ) then + $intel + iorder(:) = iorder(:)+1 + return + endif + IRP_ENDIF + + i1=1_$int_type i2=1_$int_type do i=1_$int_type,isize @@ -637,12 +765,13 @@ BEGIN_TEMPLATE end -SUBST [ X, type, integer_size, is_big, big, int_type ] - i ; 4 ; 32 ; .False. ; ; 4 ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; +SUBST [ X, type, integer_size, is_big, big, int_type, intel ] + i ; 4 ; 32 ; .False. ; ; 4 ; call ippsSortRadixIndexAscend_32s(x, 4, iorder, isize, iorder1) ;; + i8 ; 8 ; 64 ; .False. ; ; 4 ; ;; + i2 ; 2 ; 16 ; .False. ; ; 4 ; ;; + i ; 4 ; 32 ; .True. ; _big ; 8 ; ;; + i8 ; 8 ; 64 ; .True. ; _big ; 8 ; ;; END_TEMPLATE + From c4a91c78c9011de18ae5d866b70693981b5485ed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 31 May 2021 08:28:00 +0200 Subject: [PATCH 12/15] Fix gfortrab compilation --- src/utils/sort.irp.f | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index be58e7a5..b4e30db4 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -420,13 +420,16 @@ END_TEMPLATE ! Sort array x(isize). END_DOC integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer, allocatable :: iorder - allocate(iorder) - iorder=0 - call $Xradix_sort(x,iorder,isize,-1) + integer,intent(inout) :: x(isize) + integer, allocatable :: iorder(:) + integer :: i + allocate(iorder(isize)) + do i=1,isize + iorder(i)=i + enddo + call iradix_sort(x,iorder,isize,-1) deallocate(iorder) - end subroutine $Xsort + end subroutine isort_noidx IRP_ENDIF From 2c3a25e20a6e924de0ac63ba6a353c1db01455b9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 31 May 2021 10:45:37 +0200 Subject: [PATCH 13/15] Cleaned sorting --- src/utils/intel.f90 | 125 ++++++++++++++++++++++++++--- src/utils/sort.irp.f | 187 ++++++++++++++----------------------------- 2 files changed, 173 insertions(+), 139 deletions(-) diff --git a/src/utils/intel.f90 b/src/utils/intel.f90 index 681adde9..4c18af8d 100644 --- a/src/utils/intel.f90 +++ b/src/utils/intel.f90 @@ -1,13 +1,5 @@ module intel use, intrinsic :: iso_c_binding - interface - subroutine ippsSortRadixIndexGetBufferSize(len, dataType, pBufSize) bind(C, name='ippsSortRadixIndexGetBufferSize') - use iso_c_binding - integer, intent(in), value :: len - integer, intent(in), value :: dataType - integer, intent(out) :: pBufSize - end - end interface interface subroutine ippsSortAscend_32s_I(pSrc, len) bind(C, name='ippsSortAscend_32s_I') use iso_c_binding @@ -15,12 +7,86 @@ module intel integer, intent(inout) :: pSrc(len) end end interface + interface + subroutine ippsSortAscend_32f_I(pSrc, len) bind(C, name='ippsSortAscend_32f_I') + use iso_c_binding + integer, intent(in), value :: len + real, intent(inout) :: pSrc(len) + end + end interface + interface + subroutine ippsSortAscend_64s_I(pSrc, len) bind(C, name='ippsSortAscend_64s_I') + use iso_c_binding + integer*8, intent(in), value :: len + integer, intent(inout) :: pSrc(len) + end + end interface + interface + subroutine ippsSortAscend_64f_I(pSrc, len) bind(C, name='ippsSortAscend_64f_I') + use iso_c_binding + double precision, intent(in), value :: len + real, intent(inout) :: pSrc(len) + end + end interface + + interface + subroutine ippsSortRadixIndexGetBufferSize(len, dataType, pBufSize) bind(C, name='ippsSortRadixIndexGetBufferSize') + use iso_c_binding + integer, intent(in), value :: len + integer, intent(in), value :: dataType + integer, intent(out) :: pBufSize + end + end interface + + interface + subroutine ippsSortRadixAscend_16s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_16s_I') + use iso_c_binding + integer, intent(in), value :: len + integer*2, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface interface subroutine ippsSortRadixAscend_32s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32s_I') use iso_c_binding integer, intent(in), value :: len integer, intent(inout) :: pSrc(len) - integer, intent(inout) :: pTmp(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_32f_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_32f_I') + use iso_c_binding + integer, intent(in), value :: len + real, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_64s_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_64s_I') + use iso_c_binding + integer, intent(in), value :: len + integer*8, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + interface + subroutine ippsSortRadixAscend_64f_I(pSrc, len, pTmp) bind(C, name='ippsSortRadixAscend_64f_I') + use iso_c_binding + integer, intent(in), value :: len + double precision, intent(inout) :: pSrc(len) + character, intent(inout) :: pTmp(len) + end + end interface + + interface + subroutine ippsSortRadixIndexAscend_16s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_16s') + use iso_c_binding + integer, intent(in), value :: len + integer*2, intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) end end interface interface @@ -30,7 +96,7 @@ module intel integer, intent(inout) :: pSrc(len) integer, intent(in), value :: srcStrideBytes integer, intent(inout) :: pDstIndx(len) - integer, intent(inout) :: pTmpIndx(len) + character, intent(inout) :: pTmpIndx(len) end end interface interface @@ -40,9 +106,30 @@ module intel real , intent(inout) :: pSrc(len) integer, intent(in), value :: srcStrideBytes integer, intent(inout) :: pDstIndx(len) - integer, intent(inout) :: pTmpIndx(len) + character, intent(inout) :: pTmpIndx(len) end end interface + interface + subroutine ippsSortRadixIndexAscend_64s(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C, name='ippsSortRadixIndexAscend_64s') + use iso_c_binding + integer, intent(in), value :: len + integer*8, intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + interface + subroutine ippsSortRadixIndexAscend_64f(pSrc, srcStrideBytes, pDstIndx, len, pTmpIndx) bind(C,name='ippsSortRadixIndexAscend_64f') + use iso_c_binding + integer, intent(in), value :: len + real*8 , intent(inout) :: pSrc(len) + integer, intent(in), value :: srcStrideBytes + integer, intent(inout) :: pDstIndx(len) + character, intent(inout) :: pTmpIndx(len) + end + end interface + interface subroutine ippsSortIndexAscend_32f_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_32f_I') use iso_c_binding @@ -67,4 +154,20 @@ module intel integer(4), intent(in), value :: len end end interface + interface + subroutine ippsSortIndexAscend_64s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_64s_I') + use iso_c_binding + integer(8), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface + interface + subroutine ippsSortIndexAscend_16s_I(pSrcDst, pDstIndx, len) bind(C,name='ippsSortIndexAscend_16s_I') + use iso_c_binding + integer(2), intent(in) :: pSrcDst(*) + integer(4), intent(inout) :: pDstIndx(*) + integer(4), intent(in), value :: len + end + end interface end module diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index b4e30db4..d40b7d1d 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -262,83 +262,13 @@ SUBST [ X, type ] i2 ; integer*2 ;; END_TEMPLATE + +!---------------------- INTEL IRP_IF INTEL - subroutine sort(x,iorder,isize) - use intel - implicit none - BEGIN_DOC - ! Sort array x(isize). - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - real ,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer :: n - call ippsSortIndexAscend_32f_I(x, iorder, isize) - iorder(:) = iorder(:)+1 - end subroutine sort - - subroutine dsort(x,iorder,isize) - use intel - implicit none - BEGIN_DOC - ! Sort array x(isize). - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - real(8) ,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer :: n - call ippsSortIndexAscend_64f_I(x, iorder, isize) - iorder(:) = iorder(:)+1 - end subroutine dsort - - subroutine isort(x,iorder,isize) - use intel - implicit none - BEGIN_DOC - ! Sort array x(isize). - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - integer ,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer :: n - integer, allocatable :: iorder1(:) - allocate(iorder1(isize*2)) - n=4 - call ippsSortRadixIndexAscend_32s(x, n, iorder, isize, iorder1) - iorder(1:isize) = iorder(1:isize)+1 - deallocate(iorder1) - call iset_order(x,iorder,isize) - end subroutine isort - - subroutine isort_noidx(x,isize) - use intel - implicit none - BEGIN_DOC - ! Sort array x(isize). - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - integer ,intent(inout) :: x(isize) - integer, allocatable :: iorder1(:) - integer :: n - call ippsSortRadixIndexGetBufferSize(isize, 11, n) - n = n/4 - allocate(iorder1(n)) - call ippsSortRadixAscend_32s_I(x, isize, iorder1) - deallocate(iorder1) - end subroutine isort_noidx - - BEGIN_TEMPLATE subroutine $Xsort(x,iorder,isize) + use intel implicit none BEGIN_DOC ! Sort array x(isize). @@ -349,17 +279,46 @@ BEGIN_TEMPLATE $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) integer :: n -! call $Xradix_sort(x,iorder,isize,-1) - call quick_$Xsort(x,iorder,isize) - end subroutine $Xsort + character, allocatable :: tmp(:) + if (isize < 2) return + call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n) + allocate(tmp(n)) + call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp) + deallocate(tmp) + iorder(1:isize) = iorder(1:isize)+1 + call $Xset_order(x,iorder,isize) + end -SUBST [ X, type ] - i8 ; integer*8 ;; - i2 ; integer*2 ;; + subroutine $Xsort_noidx(x,isize) + use intel + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer :: n + character, allocatable :: tmp(:) + if (isize < 2) return + call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n) + allocate(tmp(n)) + call ippsSortRadixAscend_$ityp_I(x, isize, tmp) + deallocate(tmp) + end + +SUBST [ X, type, ityp, n, ippsz ] + ; real ; 32f ; 4 ; 13 ;; + d ; double precision ; 64f ; 8 ; 19;; + i ; integer ; 32s ; 4 ; 11 ;; + i8 ; integer*8 ; 64s ; 8 ; 17;; + i2 ; integer*2 ; 16s ; 2 ; 7 ;; END_TEMPLATE +!---------------------- END INTEL IRP_ELSE - +!---------------------- NON-INTEL BEGIN_TEMPLATE subroutine $Xsort(x,iorder,isize) implicit none @@ -387,50 +346,34 @@ BEGIN_TEMPLATE endif end subroutine $Xsort -SUBST [ X, type ] - ; real ;; - d ; double precision ;; -END_TEMPLATE - -BEGIN_TEMPLATE - subroutine $Xsort(x,iorder,isize) + subroutine $Xsort_noidx(x,isize) implicit none BEGIN_DOC ! Sort array x(isize). - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. END_DOC integer,intent(in) :: isize $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer :: n -! call $Xradix_sort(x,iorder,isize,-1) - call quick_$Xsort(x,iorder,isize) - end subroutine $Xsort + integer, allocatable :: iorder(:) + integer :: i + allocate(iorder(isize)) + do i=1,isize + iorder(i)=i + enddo + call $Xsort(x,iorder,isize) + deallocate(iorder) + end subroutine $Xsort_noidx SUBST [ X, type ] + ; real ;; + d ; double precision ;; i ; integer ;; i8 ; integer*8 ;; i2 ; integer*2 ;; END_TEMPLATE - subroutine isort_noidx(x,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize). - END_DOC - integer,intent(in) :: isize - integer,intent(inout) :: x(isize) - integer, allocatable :: iorder(:) - integer :: i - allocate(iorder(isize)) - do i=1,isize - iorder(i)=i - enddo - call iradix_sort(x,iorder,isize,-1) - deallocate(iorder) - end subroutine isort_noidx IRP_ENDIF +!---------------------- END NON-INTEL + BEGIN_TEMPLATE @@ -534,9 +477,6 @@ END_TEMPLATE BEGIN_TEMPLATE recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) -IRP_IF INTEL - use intel -IRP_ENDIF implicit none BEGIN_DOC @@ -570,15 +510,6 @@ IRP_ENDIF stop endif - IRP_IF INTEL - if ( ($type == 4).and.($integer_size == 32).and.($is_big == .False.) ) then - $intel - iorder(:) = iorder(:)+1 - return - endif - IRP_ENDIF - - i1=1_$int_type i2=1_$int_type do i=1_$int_type,isize @@ -768,12 +699,12 @@ IRP_ENDIF end -SUBST [ X, type, integer_size, is_big, big, int_type, intel ] - i ; 4 ; 32 ; .False. ; ; 4 ; call ippsSortRadixIndexAscend_32s(x, 4, iorder, isize, iorder1) ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ; ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ; ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ; ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ; ;; +SUBST [ X, type, integer_size, is_big, big, int_type ] + i ; 4 ; 32 ; .False. ; ; 4 ;; + i8 ; 8 ; 64 ; .False. ; ; 4 ;; + i2 ; 2 ; 16 ; .False. ; ; 4 ;; + i ; 4 ; 32 ; .True. ; _big ; 8 ;; + i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; END_TEMPLATE From 3837dea58c0a5cc30996b6fec3ecc4824d774db7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 31 May 2021 11:47:34 +0200 Subject: [PATCH 14/15] Improving sort --- src/utils/sort.irp.f | 75 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index d40b7d1d..f3879d4a 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -320,6 +320,34 @@ END_TEMPLATE IRP_ELSE !---------------------- NON-INTEL BEGIN_TEMPLATE + + subroutine $Xsort_noidx(x,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer, allocatable :: iorder(:) + integer :: i + allocate(iorder(isize)) + do i=1,isize + iorder(i)=i + enddo + call $Xsort(x,iorder,isize) + deallocate(iorder) + end subroutine $Xsort_noidx + +SUBST [ X, type ] + ; real ;; + d ; double precision ;; + i ; integer ;; + i8 ; integer*8 ;; + i2 ; integer*2 ;; +END_TEMPLATE + +BEGIN_TEMPLATE + subroutine $Xsort(x,iorder,isize) implicit none BEGIN_DOC @@ -346,26 +374,39 @@ BEGIN_TEMPLATE endif end subroutine $Xsort - subroutine $Xsort_noidx(x,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize). - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer, allocatable :: iorder(:) - integer :: i - allocate(iorder(isize)) - do i=1,isize - iorder(i)=i - enddo - call $Xsort(x,iorder,isize) - deallocate(iorder) - end subroutine $Xsort_noidx - SUBST [ X, type ] ; real ;; d ; double precision ;; +END_TEMPLATE + +BEGIN_TEMPLATE + + subroutine $Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + if (isize < 2) then + return + endif + call sorted_$Xnumber(x,isize,n) + if (isize == n) then + return + endif + if ( isize < 32) then + call insertion_$Xsort(x,iorder,isize) + else + call $Xradix_sort(x,iorder,isize,-1) + endif + end subroutine $Xsort + +SUBST [ X, type ] i ; integer ;; i8 ; integer*8 ;; i2 ; integer*2 ;; From d45f6091daf107a2a18925c9b75d9a8ed4963eee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 31 May 2021 12:16:00 +0200 Subject: [PATCH 15/15] Sorting compatible with old IPP --- config/ifort_rome.cfg | 4 +-- src/utils/intel.f90 | 8 +++--- src/utils/sort.irp.f | 65 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index 3b70f98b..5ed01227 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -31,8 +31,8 @@ OPENMP : 1 ; Append OpenMP flags # -ftz : Flushes denormal results to zero # [OPT] -FC : -traceback -FCFLAGS : -O2 -ip -g -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer +FC : -traceback -shared-intel +FCFLAGS : -O2 -ip -g -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer # Profiling flags ################# diff --git a/src/utils/intel.f90 b/src/utils/intel.f90 index 4c18af8d..2b61cb38 100644 --- a/src/utils/intel.f90 +++ b/src/utils/intel.f90 @@ -17,15 +17,15 @@ module intel interface subroutine ippsSortAscend_64s_I(pSrc, len) bind(C, name='ippsSortAscend_64s_I') use iso_c_binding - integer*8, intent(in), value :: len - integer, intent(inout) :: pSrc(len) + integer, intent(in), value :: len + integer*8, intent(inout) :: pSrc(len) end end interface interface subroutine ippsSortAscend_64f_I(pSrc, len) bind(C, name='ippsSortAscend_64f_I') use iso_c_binding - double precision, intent(in), value :: len - real, intent(inout) :: pSrc(len) + integer, intent(in), value :: len + double precision, intent(inout) :: pSrc(len) end end interface diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index f3879d4a..21eb8b67 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -310,12 +310,73 @@ BEGIN_TEMPLATE SUBST [ X, type, ityp, n, ippsz ] ; real ; 32f ; 4 ; 13 ;; - d ; double precision ; 64f ; 8 ; 19;; i ; integer ; 32s ; 4 ; 11 ;; - i8 ; integer*8 ; 64s ; 8 ; 17;; i2 ; integer*2 ; 16s ; 2 ; 7 ;; END_TEMPLATE +BEGIN_TEMPLATE + + subroutine $Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + if (isize < 2) then + return + endif +! call sorted_$Xnumber(x,isize,n) +! if (isize == n) then +! return +! endif + if ( isize < 32) then + call insertion_$Xsort(x,iorder,isize) + else +! call heap_$Xsort(x,iorder,isize) + call quick_$Xsort(x,iorder,isize) + endif + end subroutine $Xsort + +SUBST [ X, type ] + d ; double precision ;; +END_TEMPLATE + +BEGIN_TEMPLATE + + subroutine $Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize). + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer :: n + if (isize < 2) then + return + endif + call sorted_$Xnumber(x,isize,n) + if (isize == n) then + return + endif + if ( isize < 32) then + call insertion_$Xsort(x,iorder,isize) + else + call $Xradix_sort(x,iorder,isize,-1) + endif + end subroutine $Xsort + +SUBST [ X, type ] + i8 ; integer*8 ;; +END_TEMPLATE + !---------------------- END INTEL IRP_ELSE !---------------------- NON-INTEL