From 4318b0a04b2d54003f7ea7ad42b380ca8ddfb3a5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 29 Nov 2021 10:39:34 +0100 Subject: [PATCH] OpenMP nested merged --- INSTALL.rst | 122 ++++-------- config/ifort_2019_avx.cfg | 63 +++++++ config/ifort_2019_avx_mpi.cfg | 64 +++++++ .../{ifort_rome.cfg => ifort_2019_rome.cfg} | 2 +- config/ifort_2019_sse4.cfg | 63 +++++++ config/ifort_2019_sse4_mpi.cfg | 64 +++++++ config/ifort_2019_xHost.cfg | 63 +++++++ config/{ifort_avx.cfg => ifort_2021_avx.cfg} | 2 +- ...ort_avx_mpi.cfg => ifort_2021_avx_mpi.cfg} | 2 +- config/{ifort.cfg => ifort_2021_sse4.cfg} | 2 +- ...{ifort_mpi.cfg => ifort_2021_sse4_mpi.cfg} | 2 +- .../{ifort_xHost.cfg => ifort_2021_xHost.cfg} | 2 +- config/ifort_debug.cfg | 66 ------- scripts/verif_omp/check_actual_setup.sh | 12 -- .../{check_omp_v2.f90 => check_omp.f90} | 0 scripts/verif_omp/check_required_setup.sh | 2 +- scripts/verif_omp/study_omp.sh | 2 +- src/cipsi/pt2_stoch_routines.irp.f | 4 +- src/cipsi/slave_cipsi.irp.f | 6 +- src/davidson/davidson_parallel.irp.f | 3 +- src/davidson/davidson_parallel_csf.irp.f | 3 +- src/davidson/davidson_parallel_nos2.irp.f | 3 +- src/dressing/run_dress_slave.irp.f | 6 +- src/fci/check_omp_actual_setup.irp.f | 174 ------------------ src/utils/set_multiple_levels_omp.irp.f | 28 ++- 25 files changed, 386 insertions(+), 374 deletions(-) create mode 100644 config/ifort_2019_avx.cfg create mode 100644 config/ifort_2019_avx_mpi.cfg rename config/{ifort_rome.cfg => ifort_2019_rome.cfg} (95%) create mode 100644 config/ifort_2019_sse4.cfg create mode 100644 config/ifort_2019_sse4_mpi.cfg create mode 100644 config/ifort_2019_xHost.cfg rename config/{ifort_avx.cfg => ifort_2021_avx.cfg} (96%) rename config/{ifort_avx_mpi.cfg => ifort_2021_avx_mpi.cfg} (96%) rename config/{ifort.cfg => ifort_2021_sse4.cfg} (96%) rename config/{ifort_mpi.cfg => ifort_2021_sse4_mpi.cfg} (96%) rename config/{ifort_xHost.cfg => ifort_2021_xHost.cfg} (96%) delete mode 100644 config/ifort_debug.cfg delete mode 100755 scripts/verif_omp/check_actual_setup.sh rename scripts/verif_omp/{check_omp_v2.f90 => check_omp.f90} (100%) delete mode 100644 src/fci/check_omp_actual_setup.irp.f diff --git a/INSTALL.rst b/INSTALL.rst index 229bf40a..64e48598 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -2,9 +2,9 @@ Installation ============ -The |qp| can be downloaded on GitHub as an `archive -`_ or as a `git -repository `_. +|qp| can be downloaded on GitHub as an `archive +`_ or as a `git +repository `_. .. code:: bash @@ -19,16 +19,16 @@ Before anything, go into your :file:`quantum_package` directory and run This script will create the :file:`quantum_package.rc` bash script, which -sets all the environment variables required for the normal operation of the -*Quantum Package*. It will also initialize the git submodules that are +sets all the environment variables required for the normal operation of +|qp|. It will also initialize the git submodules that are required, and tell you which external dependencies are missing and need to be installed. The required dependencies are located in the -`external/qp2-dependencies` directory, such that once QP is configured the +`external/qp2-dependencies` directory, such that once |qp| is configured the internet connection is not needed any more. When all dependencies have been installed, (the :command:`configure` will -inform you) source the :file:`quantum_package.rc` in order to load all -environment variables and compile the |QP|. +inform you what is missing) source the :file:`quantum_package.rc` in order to +load all environment variables and compile |QP|. Now all the requirements are met, you can compile the programs using @@ -37,6 +37,15 @@ Now all the requirements are met, you can compile the programs using make +Installation of dependencies via a Conda environment +==================================================== + +.. code:: bash + + conda env create -f qp2.yml + + + Requirements ============ @@ -64,8 +73,8 @@ architecture. Modify it if needed, and run :command:`configure` with .. code:: bash - cp ./config/gfortran.example config/gfortran.cfg - ./configure -c config/gfortran.cfg + cp ./config/gfortran.example config/gfortran_avx.cfg + ./configure -c config/gfortran_avx.cfg .. note:: @@ -86,45 +95,33 @@ The command is to be used as follows: .. code:: bash - ./configure --install= + ./configure -i The following packages are supported by the :command:`configure` installer: * ninja -* irpf90 * zeromq * f77zmq * gmp * ocaml (:math:`\approx` 5 minutes) -* ezfio * docopt * resultsFile * bats +* zlib Example: .. code:: bash - ./configure -i ezfio + ./configure -i ninja -.. note:: - - When installing the ocaml package, you will be asked the location of where - it should be installed. A safe option is to enter the path proposed by the - |QP|: - - QP>> Please install it here: /your_quantum_package_directory/bin - - So just enter the proposition of the |QP| and press enter. If the :command:`configure` executable fails to install a specific dependency ----------------------------------------------------------------------------- -If the :command:`configure` executable does not succeed to install a specific -dependency, there are some proposition of how to download and install the -minimal dependencies to compile and use the |QP|. - +If the :command:`configure` executable does not succeed in installing a specific +dependency, you should try to install the dependency on your system by yourself. Before doing anything below, try to install the packages with your package manager (:command:`apt`, :command:`yum`, etc). @@ -149,11 +146,11 @@ IRPF90 *IRPF90* is a Fortran code generator for programming using the Implicit Reference to Parameters (IRP) method. -If you have *pip* for Python2, you can do +If you have *pip* for Python2, you can do .. code:: bash - python2 -m pip install --user irpf90 + python3 -m pip install --user irpf90 Otherwise, @@ -262,53 +259,6 @@ With Debian or Ubuntu, you can use sudo apt install libgmp-dev -libcap ------- - -Libcap is a library for getting and setting POSIX.1e draft 15 capabilities. - -* Download the latest version of libcap here: - ``_ - and move it in the :file:`${QP_ROOT}/external` directory - -* Extract the archive, go into the :file:`libcap-*/libcap` directory and run - the following command - -.. code:: bash - - prefix=$QP_ROOT make install - -With Debian or Ubuntu, you can use - -.. code:: bash - - sudo apt install libcap-dev - - -Bubblewrap ----------- - -Bubblewrap is an unprivileged sandboxing tool. - -* Download Bubblewrap here: - ``_ - and move it in the :file:`${QP_ROOT}/external` directory - -* Extract the archive, go into the :file:`bubblewrap-*` directory and run - the following commands - -.. code:: bash - - ./configure --prefix=$QP_ROOT && make -j 8 - make install-exec-am - - -With Debian or Ubuntu, you can use - -.. code:: bash - - sudo apt install bubblewrap - OCaml @@ -327,7 +277,7 @@ OCaml ``_ and move it in the :file:`${QP_ROOT}/external` directory -* If you use OCaml only with the |qp|, you can install the OPAM directory +* If you use OCaml only with |qp|, you can install the OPAM directory containing the compiler and all the installed libraries in the :file:`${QP_ROOT}/external` directory as @@ -352,14 +302,14 @@ OCaml .. code:: bash - opam init --comp=4.07.1 + opam init --comp=4.11.1 eval `${QP_ROOT}/bin/opam env` If the installation fails because of bwrap, you can initialize opam using: .. code:: bash - opam init --disable-sandboxing --comp=4.07.1 + opam init --disable-sandboxing --comp=4.11.1 eval `${QP_ROOT}/bin/opam env` * Install the required external OCaml libraries @@ -369,17 +319,6 @@ OCaml opam install ocamlbuild cryptokit zmq sexplib ppx_sexp_conv ppx_deriving getopt -EZFIO ------ - -*EZFIO* is the Easy Fortran Input/Output library generator. - -* Download EZFIO here : ``_ and move - the downloaded archive in the :file:`${QP_ROOT}/external` directory - -* Extract the archive, and rename it as :file:`${QP_ROOT}/external/ezfio` - - Docopt ------ @@ -406,7 +345,7 @@ resultsFile *resultsFile* is a Python package to extract data from output files of quantum chemistry codes. -If you have *pip* for Python3, you can do +If you have *pip* for Python3, you can do .. code:: bash @@ -414,3 +353,4 @@ If you have *pip* for Python3, you can do + diff --git a/config/ifort_2019_avx.cfg b/config/ifort_2019_avx.cfg new file mode 100644 index 00000000..b929baf5 --- /dev/null +++ b/config/ifort_2019_avx.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xAVX -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2019_avx_mpi.cfg b/config/ifort_2019_avx_mpi.cfg new file mode 100644 index 00000000..383fa68c --- /dev/null +++ b/config/ifort_2019_avx_mpi.cfg @@ -0,0 +1,64 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : mpiifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FCFLAGS : -mavx -axAVX -O2 -ip -ftz -g -traceback + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -march=corei7 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_rome.cfg b/config/ifort_2019_rome.cfg similarity index 95% rename from config/ifort_rome.cfg rename to config/ifort_2019_rome.cfg index 1ac78717..c0396f42 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_2019_rome.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT -DSET_NESTED # Global options ################ diff --git a/config/ifort_2019_sse4.cfg b/config/ifort_2019_sse4.cfg new file mode 100644 index 00000000..460322c1 --- /dev/null +++ b/config/ifort_2019_sse4.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2019_sse4_mpi.cfg b/config/ifort_2019_sse4_mpi.cfg new file mode 100644 index 00000000..4c871591 --- /dev/null +++ b/config/ifort_2019_sse4_mpi.cfg @@ -0,0 +1,64 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : mpiifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FCFLAGS : -msse4.2 -O2 -ip -ftz -g -traceback + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2019_xHost.cfg b/config/ifort_2019_xHost.cfg new file mode 100644 index 00000000..1820ab47 --- /dev/null +++ b/config/ifort_2019_xHost.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=64 -DINTEL -DSET_MAX_ACT -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xHost -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_avx.cfg b/config/ifort_2021_avx.cfg similarity index 96% rename from config/ifort_avx.cfg rename to config/ifort_2021_avx.cfg index a2cb4c8a..b7b3261e 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_2021_avx.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT # Global options ################ diff --git a/config/ifort_avx_mpi.cfg b/config/ifort_2021_avx_mpi.cfg similarity index 96% rename from config/ifort_avx_mpi.cfg rename to config/ifort_2021_avx_mpi.cfg index f2bb8889..e060657f 100644 --- a/config/ifort_avx_mpi.cfg +++ b/config/ifort_2021_avx_mpi.cfg @@ -9,7 +9,7 @@ FC : mpiifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT # Global options ################ diff --git a/config/ifort.cfg b/config/ifort_2021_sse4.cfg similarity index 96% rename from config/ifort.cfg rename to config/ifort_2021_sse4.cfg index 0382360a..ad4f2990 100644 --- a/config/ifort.cfg +++ b/config/ifort_2021_sse4.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT # Global options ################ diff --git a/config/ifort_mpi.cfg b/config/ifort_2021_sse4_mpi.cfg similarity index 96% rename from config/ifort_mpi.cfg rename to config/ifort_2021_sse4_mpi.cfg index e0d489a0..f4a093ec 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_2021_sse4_mpi.cfg @@ -9,7 +9,7 @@ FC : mpiifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT # Global options ################ diff --git a/config/ifort_xHost.cfg b/config/ifort_2021_xHost.cfg similarity index 96% rename from config/ifort_xHost.cfg rename to config/ifort_2021_xHost.cfg index ddb4aa2d..f57e8709 100644 --- a/config/ifort_xHost.cfg +++ b/config/ifort_2021_xHost.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=64 -DINTEL +IRPF90_FLAGS : --ninja --align=64 -DINTEL -DSET_MAX_ACT # Global options ################ diff --git a/config/ifort_debug.cfg b/config/ifort_debug.cfg deleted file mode 100644 index d70b1465..00000000 --- a/config/ifort_debug.cfg +++ /dev/null @@ -1,66 +0,0 @@ -# Common flags -############## -# -# -mkl=[parallel|sequential] : Use the MKL library -# --ninja : Allow the utilisation of ninja. It is mandatory ! -# --align=32 : Align all provided arrays on a 32-byte boundary -# -[COMMON] -FC : ifort -fpic -LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps -IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL - -# Global options -################ -# -# 1 : Activate -# 0 : Deactivate -# -[OPTION] -MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below -CACHE : 0 ; Enable cache_compile.py -OPENMP : 1 ; Append OpenMP flags - -# Optimization flags -#################### -# -# -xHost : Compile a binary optimized for the current architecture -# -O2 : O3 not better than O2. -# -ip : Inter-procedural optimizations -# -ftz : Flushes denormal results to zero -# -[OPT] -FC : -traceback -FCFLAGS : -msse4.2 -O2 -ip -ftz -g - - -# Profiling flags -################# -# -[PROFILE] -FC : -p -g -FCFLAGS : -msse4.2 -O2 -ip -ftz - - -# Debugging flags -################# -# -# -traceback : Activate backtrace on runtime -# -fpe0 : All floating point exaceptions -# -C : Checks uninitialized variables, array subscripts, etc... -# -g : Extra debugging information -# -msse4.2 : Valgrind needs a very simple x86 executable -# -[DEBUG] -FC : -g -traceback -FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone - - -# OpenMP flags -################# -# -[OPENMP] -FC : -qopenmp -IRPF90_FLAGS : --openmp - diff --git a/scripts/verif_omp/check_actual_setup.sh b/scripts/verif_omp/check_actual_setup.sh deleted file mode 100755 index 6eaa4517..00000000 --- a/scripts/verif_omp/check_actual_setup.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -# go in qp2/src/fci to run check_omp_actual_setup -# to see if we can run in parallel an omp section in another one -echo "" -echo "Please wait..." -echo "" -cd ../../src/fci -ninja || echo "Please recompile from the root" -echo "" -./check_omp_actual_setup -cd ../../scripts/verif_omp diff --git a/scripts/verif_omp/check_omp_v2.f90 b/scripts/verif_omp/check_omp.f90 similarity index 100% rename from scripts/verif_omp/check_omp_v2.f90 rename to scripts/verif_omp/check_omp.f90 diff --git a/scripts/verif_omp/check_required_setup.sh b/scripts/verif_omp/check_required_setup.sh index facb6cbb..367530b6 100755 --- a/scripts/verif_omp/check_required_setup.sh +++ b/scripts/verif_omp/check_required_setup.sh @@ -9,7 +9,7 @@ then else $1 --version > /dev/null \ -&& $1 -O0 -fopenmp check_omp_v2.f90 \ +&& $1 -O0 -fopenmp check_omp.f90 \ && ./a.out | tail -n 1 diff --git a/scripts/verif_omp/study_omp.sh b/scripts/verif_omp/study_omp.sh index 00668d59..900d04e1 100755 --- a/scripts/verif_omp/study_omp.sh +++ b/scripts/verif_omp/study_omp.sh @@ -20,7 +20,7 @@ echo "1 2 3" >> $FILE for comp in $list_comp do $comp --version > /dev/null \ - && $comp -O0 -fopenmp check_omp_v2.f90 \ + && $comp -O0 -fopenmp check_omp.f90 \ && echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE done diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index b366a268..b14bdf8d 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -288,7 +288,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) call write_int(6,nproc_target,'Number of threads for PT2') call write_double(6,mem,'Memory (Gb)') - call omp_set_max_active_levels(1) + call set_multiple_levels_omp(.False.) print '(A)', '========== ======================= ===================== ===================== ===========' @@ -315,7 +315,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - call omp_set_max_active_levels(8) + call set_multiple_levels_omp(.True.) print '(A)', '========== ======================= ===================== ===================== ===========' diff --git a/src/cipsi/slave_cipsi.irp.f b/src/cipsi/slave_cipsi.irp.f index 510c667b..ddfc050e 100644 --- a/src/cipsi/slave_cipsi.irp.f +++ b/src/cipsi/slave_cipsi.irp.f @@ -4,7 +4,7 @@ subroutine run_slave_cipsi ! Helper program for distributed parallelism END_DOC - call omp_set_max_active_levels(1) + call set_multiple_levels_omp(.False.) distributed_davidson = .False. read_wf = .False. SOFT_TOUCH read_wf distributed_davidson @@ -171,9 +171,9 @@ subroutine run_slave_main call write_double(6,(t1-t0),'Broadcast time') !--- - call omp_set_max_active_levels(8) + call set_multiple_levels_omp(.True.) call davidson_slave_tcp(0) - call omp_set_max_active_levels(1) + call set_multiple_levels_omp(.False.) print *, mpi_rank, ': Davidson done' !--- diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index fcee16bc..e627dfc9 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -508,8 +508,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) endif - !call omp_set_max_active_levels(5) - call set_multiple_levels_omp() + call set_multiple_levels_omp(.True.) !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() diff --git a/src/davidson/davidson_parallel_csf.irp.f b/src/davidson/davidson_parallel_csf.irp.f index 90e4303e..d8e9bffa 100644 --- a/src/davidson/davidson_parallel_csf.irp.f +++ b/src/davidson/davidson_parallel_csf.irp.f @@ -464,8 +464,7 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - !call omp_set_max_active_levels(4) - call set_multiple_levels_omp() + call set_multiple_levels_omp(.True.) !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() diff --git a/src/davidson/davidson_parallel_nos2.irp.f b/src/davidson/davidson_parallel_nos2.irp.f index 091b8666..597b001f 100644 --- a/src/davidson/davidson_parallel_nos2.irp.f +++ b/src/davidson/davidson_parallel_nos2.irp.f @@ -464,8 +464,7 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - !call omp_set_max_active_levels(4) - call set_multiple_levels_omp() + call set_multiple_levels_omp(.True.) !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() diff --git a/src/dressing/run_dress_slave.irp.f b/src/dressing/run_dress_slave.irp.f index a33fb1dd..08b654c9 100644 --- a/src/dressing/run_dress_slave.irp.f +++ b/src/dressing/run_dress_slave.irp.f @@ -72,7 +72,7 @@ subroutine run_dress_slave(thread,iproce,energy) provide psi_energy ending = dress_N_cp+1 ntask_tbd = 0 - call omp_set_max_active_levels(8) + call set_multiple_levels_omp(.True.) !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(interesting, breve_delta_m, task_id) & @@ -84,7 +84,7 @@ subroutine run_dress_slave(thread,iproce,energy) zmq_socket_push = new_zmq_push_socket(thread) integer, external :: connect_to_taskserver !$OMP CRITICAL - call omp_set_max_active_levels(1) + call set_multiple_levels_omp(.False.) if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then print *, irp_here, ': Unable to connect to task server' stop -1 @@ -296,7 +296,7 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP END CRITICAL !$OMP END PARALLEL - call omp_set_max_active_levels(1) + call set_multiple_levels_omp(.False.) ! do i=0,dress_N_cp+1 ! call omp_destroy_lock(lck_sto(i)) ! end do diff --git a/src/fci/check_omp_actual_setup.irp.f b/src/fci/check_omp_actual_setup.irp.f deleted file mode 100644 index 70514bd3..00000000 --- a/src/fci/check_omp_actual_setup.irp.f +++ /dev/null @@ -1,174 +0,0 @@ -program check_omp_actual_setup - - use omp_lib - - implicit none - - integer :: accu, accu2 - integer :: s, n_setting - logical :: verbose, test_versions - logical, allocatable :: is_working(:) - - verbose = .True. - test_versions = .False. - n_setting = 4 - - allocate(is_working(n_setting)) - - is_working = .False. - - ! set the number of threads - call omp_set_num_threads(2) - - do s = 1, n_setting - - accu = 0 - accu2 = 0 - - call omp_set_max_active_levels(1) - call omp_set_nested(.False.) - - if (s==1) then - call set_multiple_levels_omp() - elseif (s==2) then - call omp_set_max_active_levels(5) - elseif (s==3) then - call omp_set_nested(.True.) - else - call omp_set_nested(.True.) - call omp_set_max_active_levels(5) - endif - - ! Level 1 - !$OMP PARALLEL - if (verbose) then - print*,'Num threads level 1:',omp_get_num_threads() - endif - - ! Level 2 - !$OMP PARALLEL - if (verbose) then - print*,'Num threads level 2:',omp_get_num_threads() - endif - - ! Level 3 - !$OMP PARALLEL - if (verbose) then - print*,'Num threads level 3:',omp_get_num_threads() - endif - - call check_omp_in_subroutine(accu2) - - ! Level 4 - !$OMP PARALLEL - - if (verbose) then - print*,'Num threads level 4:',omp_get_num_threads() - endif - - !$OMP ATOMIC - accu = accu + 1 - !$OMP END ATOMIC - - !$OMP END PARALLEL - - - !$OMP END PARALLEL - - - !$OMP END PARALLEL - - - !$OMP END PARALLEL - - if (verbose) then - print*,'Setting:',s,'accu=',accu - print*,'Setting:',s,'accu2=',accu2 - endif - - if (accu == 16 .and. accu2 == 16) then - is_working(s) = .True. - endif - - enddo - - if (verbose) then - if (is_working(2)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - elseif (is_working(3)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_nested(.True.)' - print*,'' - print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' - elseif (is_working(4)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_nested(.True.)' - print*,'+' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - else - print*,'The parallelization on multiple levels does not work with:' - print*,'call omp_set_max_active_levels(5)' - print*,'or' - print*,'call omp_set_nested(.True.)' - print*,'or' - print*,'call omp_set_nested(.True.)' - print*,'+' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Try an other compiler and good luck...' - endif - - if (is_working(1)) then - print*,'' - print*,'==========================================================' - print*,'Your actual set up works for parallelization with 4 levels' - print*,'==========================================================' - print*,'' - else - print*,'' - print*,'===================================================================' - print*,'Your actual set up does not work for parallelization with 4 levels' - print*,'Please look at the previous messages to understand the requirements' - print*,'===================================================================' - print*,'' - endif - endif - - ! List of working flags - if (test_versions) then - print*,is_working(2:4) - endif - - ! IRPF90_FLAGS - if (is_working(2)) then - print*,'-DSET_MAX_ACT' - elseif (is_working(3)) then - print*,'-DSET_NESTED' - elseif (is_working(4)) then - print*,'-DSET_MAX_ACT -DSET_NESTED' - else - print*,'ERROR' - endif - -end - -subroutine check_omp_in_subroutine(accu2) - - implicit none - - integer, intent(inout) :: accu2 - - !$OMP PARALLEL - - !$OMP ATOMIC - accu2 = accu2 + 1 - !$OMP END ATOMIC - - !$OMP END PARALLEL - -end diff --git a/src/utils/set_multiple_levels_omp.irp.f b/src/utils/set_multiple_levels_omp.irp.f index 4be3af5b..b4764e4a 100644 --- a/src/utils/set_multiple_levels_omp.irp.f +++ b/src/utils/set_multiple_levels_omp.irp.f @@ -1,16 +1,26 @@ -subroutine set_multiple_levels_omp() +subroutine set_multiple_levels_omp(activate) -! Doc : idk + BEGIN_DOC +! If true, activate OpenMP nested parallelism. If false, deactivate. + END_DOC implicit none + logical, intent(in) :: activate - IRP_IF SET_MAX_ACT - !print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)' + if (activate) then call omp_set_max_active_levels(5) - IRP_ENDIF - IRP_IF SET_NESTED - !print*,'SET_NESTED: True, call omp_set_nested(.True.)' - call omp_set_nested(.True.) - IRP_ENDIF + + IRP_IF SET_NESTED + call omp_set_nested(.True.) + IRP_ENDIF + + else + + call omp_set_max_active_levels(1) + + IRP_IF SET_NESTED + call omp_set_nested(.False.) + IRP_ENDIF + end if end