mirror of
https://github.com/QuantumPackage/qp2.git
synced 2025-01-03 01:55:59 +01:00
OpenMP nested merged
This commit is contained in:
parent
3d3751fc78
commit
4318b0a04b
122
INSTALL.rst
122
INSTALL.rst
@ -2,9 +2,9 @@
|
||||
Installation
|
||||
============
|
||||
|
||||
The |qp| can be downloaded on GitHub as an `archive
|
||||
<https://github.com/LCPQ/quantum_package/releases/latest>`_ or as a `git
|
||||
repository <https://github.com/LCPQ/quantum_package>`_.
|
||||
|qp| can be downloaded on GitHub as an `archive
|
||||
<https://github.com/QuantumPackage/qp2/releases>`_ or as a `git
|
||||
repository <https://github.com/QuantumPackage/qp2>`_.
|
||||
|
||||
.. code:: bash
|
||||
|
||||
@ -19,16 +19,16 @@ Before anything, go into your :file:`quantum_package` directory and run
|
||||
|
||||
|
||||
This script will create the :file:`quantum_package.rc` bash script, which
|
||||
sets all the environment variables required for the normal operation of the
|
||||
*Quantum Package*. It will also initialize the git submodules that are
|
||||
sets all the environment variables required for the normal operation of
|
||||
|qp|. It will also initialize the git submodules that are
|
||||
required, and tell you which external dependencies are missing and need to be
|
||||
installed. The required dependencies are located in the
|
||||
`external/qp2-dependencies` directory, such that once QP is configured the
|
||||
`external/qp2-dependencies` directory, such that once |qp| is configured the
|
||||
internet connection is not needed any more.
|
||||
|
||||
When all dependencies have been installed, (the :command:`configure` will
|
||||
inform you) source the :file:`quantum_package.rc` in order to load all
|
||||
environment variables and compile the |QP|.
|
||||
inform you what is missing) source the :file:`quantum_package.rc` in order to
|
||||
load all environment variables and compile |QP|.
|
||||
|
||||
Now all the requirements are met, you can compile the programs using
|
||||
|
||||
@ -37,6 +37,15 @@ Now all the requirements are met, you can compile the programs using
|
||||
make
|
||||
|
||||
|
||||
Installation of dependencies via a Conda environment
|
||||
====================================================
|
||||
|
||||
.. code:: bash
|
||||
|
||||
conda env create -f qp2.yml
|
||||

|
||||
|
||||
|
||||
Requirements
|
||||
============
|
||||
|
||||
@ -64,8 +73,8 @@ architecture. Modify it if needed, and run :command:`configure` with
|
||||
|
||||
.. code:: bash
|
||||
|
||||
cp ./config/gfortran.example config/gfortran.cfg
|
||||
./configure -c config/gfortran.cfg
|
||||
cp ./config/gfortran.example config/gfortran_avx.cfg
|
||||
./configure -c config/gfortran_avx.cfg
|
||||
|
||||
|
||||
.. note::
|
||||
@ -86,45 +95,33 @@ The command is to be used as follows:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
./configure --install=<package>
|
||||
./configure -i <package>
|
||||
|
||||
The following packages are supported by the :command:`configure` installer:
|
||||
|
||||
* ninja
|
||||
* irpf90
|
||||
* zeromq
|
||||
* f77zmq
|
||||
* gmp
|
||||
* ocaml (:math:`\approx` 5 minutes)
|
||||
* ezfio
|
||||
* docopt
|
||||
* resultsFile
|
||||
* bats
|
||||
* zlib
|
||||
|
||||
Example:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
./configure -i ezfio
|
||||
./configure -i ninja
|
||||
|
||||
.. note::
|
||||
|
||||
When installing the ocaml package, you will be asked the location of where
|
||||
it should be installed. A safe option is to enter the path proposed by the
|
||||
|QP|:
|
||||
|
||||
QP>> Please install it here: /your_quantum_package_directory/bin
|
||||
|
||||
So just enter the proposition of the |QP| and press enter.
|
||||
|
||||
|
||||
If the :command:`configure` executable fails to install a specific dependency
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
If the :command:`configure` executable does not succeed to install a specific
|
||||
dependency, there are some proposition of how to download and install the
|
||||
minimal dependencies to compile and use the |QP|.
|
||||
|
||||
If the :command:`configure` executable does not succeed in installing a specific
|
||||
dependency, you should try to install the dependency on your system by yourself.
|
||||
|
||||
Before doing anything below, try to install the packages with your package manager
|
||||
(:command:`apt`, :command:`yum`, etc).
|
||||
@ -149,11 +146,11 @@ IRPF90
|
||||
*IRPF90* is a Fortran code generator for programming using the Implicit Reference
|
||||
to Parameters (IRP) method.
|
||||
|
||||
If you have *pip* for Python2, you can do
|
||||
If you have *pip* for Python2, you can do
|
||||
|
||||
.. code:: bash
|
||||
|
||||
python2 -m pip install --user irpf90
|
||||
python3 -m pip install --user irpf90
|
||||
|
||||
Otherwise,
|
||||
|
||||
@ -262,53 +259,6 @@ With Debian or Ubuntu, you can use
|
||||
sudo apt install libgmp-dev
|
||||
|
||||
|
||||
libcap
|
||||
------
|
||||
|
||||
Libcap is a library for getting and setting POSIX.1e draft 15 capabilities.
|
||||
|
||||
* Download the latest version of libcap here:
|
||||
`<https://git.kernel.org/pub/scm/linux/kernel/git/morgan/libcap.git/snapshot/libcap-2.25.tar.gz>`_
|
||||
and move it in the :file:`${QP_ROOT}/external` directory
|
||||
|
||||
* Extract the archive, go into the :file:`libcap-*/libcap` directory and run
|
||||
the following command
|
||||
|
||||
.. code:: bash
|
||||
|
||||
prefix=$QP_ROOT make install
|
||||
|
||||
With Debian or Ubuntu, you can use
|
||||
|
||||
.. code:: bash
|
||||
|
||||
sudo apt install libcap-dev
|
||||
|
||||
|
||||
Bubblewrap
|
||||
----------
|
||||
|
||||
Bubblewrap is an unprivileged sandboxing tool.
|
||||
|
||||
* Download Bubblewrap here:
|
||||
`<https://github.com/projectatomic/bubblewrap/releases/download/v0.3.3/bubblewrap-0.3.3.tar.xz>`_
|
||||
and move it in the :file:`${QP_ROOT}/external` directory
|
||||
|
||||
* Extract the archive, go into the :file:`bubblewrap-*` directory and run
|
||||
the following commands
|
||||
|
||||
.. code:: bash
|
||||
|
||||
./configure --prefix=$QP_ROOT && make -j 8
|
||||
make install-exec-am
|
||||
|
||||
|
||||
With Debian or Ubuntu, you can use
|
||||
|
||||
.. code:: bash
|
||||
|
||||
sudo apt install bubblewrap
|
||||
|
||||
|
||||
|
||||
OCaml
|
||||
@ -327,7 +277,7 @@ OCaml
|
||||
`<https://raw.githubusercontent.com/ocaml/opam/master/shell/install.sh>`_
|
||||
and move it in the :file:`${QP_ROOT}/external` directory
|
||||
|
||||
* If you use OCaml only with the |qp|, you can install the OPAM directory
|
||||
* If you use OCaml only with |qp|, you can install the OPAM directory
|
||||
containing the compiler and all the installed libraries in the
|
||||
:file:`${QP_ROOT}/external` directory as
|
||||
|
||||
@ -352,14 +302,14 @@ OCaml
|
||||
|
||||
.. code:: bash
|
||||
|
||||
opam init --comp=4.07.1
|
||||
opam init --comp=4.11.1
|
||||
eval `${QP_ROOT}/bin/opam env`
|
||||
|
||||
If the installation fails because of bwrap, you can initialize opam using:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
opam init --disable-sandboxing --comp=4.07.1
|
||||
opam init --disable-sandboxing --comp=4.11.1
|
||||
eval `${QP_ROOT}/bin/opam env`
|
||||
|
||||
* Install the required external OCaml libraries
|
||||
@ -369,17 +319,6 @@ OCaml
|
||||
opam install ocamlbuild cryptokit zmq sexplib ppx_sexp_conv ppx_deriving getopt
|
||||
|
||||
|
||||
EZFIO
|
||||
-----
|
||||
|
||||
*EZFIO* is the Easy Fortran Input/Output library generator.
|
||||
|
||||
* Download EZFIO here : `<https://gitlab.com/scemama/EZFIO/-/archive/master/EZFIO-master.tar.gz>`_ and move
|
||||
the downloaded archive in the :file:`${QP_ROOT}/external` directory
|
||||
|
||||
* Extract the archive, and rename it as :file:`${QP_ROOT}/external/ezfio`
|
||||
|
||||
|
||||
Docopt
|
||||
------
|
||||
|
||||
@ -406,7 +345,7 @@ resultsFile
|
||||
*resultsFile* is a Python package to extract data from output files of quantum chemistry
|
||||
codes.
|
||||
|
||||
If you have *pip* for Python3, you can do
|
||||
If you have *pip* for Python3, you can do
|
||||
|
||||
.. code:: bash
|
||||
|
||||
@ -414,3 +353,4 @@ If you have *pip* for Python3, you can do
|
||||
|
||||
|
||||
|
||||
|
||||
|
63
config/ifort_2019_avx.cfg
Normal file
63
config/ifort_2019_avx.cfg
Normal file
@ -0,0 +1,63 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FC : -traceback
|
||||
FCFLAGS : -xAVX -O2 -ip -ftz -g
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -xSSE2 -C -fpe0 -implicitnone
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
64
config/ifort_2019_avx_mpi.cfg
Normal file
64
config/ifort_2019_avx_mpi.cfg
Normal file
@ -0,0 +1,64 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : mpiifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FCFLAGS : -mavx -axAVX -O2 -ip -ftz -g -traceback
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -march=corei7 -O2 -ip -ftz
|
||||
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -xSSE2 -C -fpe0 -implicitnone
|
||||
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
63
config/ifort_2019_sse4.cfg
Normal file
63
config/ifort_2019_sse4.cfg
Normal file
@ -0,0 +1,63 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FC : -traceback
|
||||
FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -xSSE2 -C -fpe0 -implicitnone
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
64
config/ifort_2019_sse4_mpi.cfg
Normal file
64
config/ifort_2019_sse4_mpi.cfg
Normal file
@ -0,0 +1,64 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : mpiifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FCFLAGS : -msse4.2 -O2 -ip -ftz -g -traceback
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -msse4.2 -O2 -ip -ftz
|
||||
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -xSSE2 -C -fpe0 -implicitnone
|
||||
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
63
config/ifort_2019_xHost.cfg
Normal file
63
config/ifort_2019_xHost.cfg
Normal file
@ -0,0 +1,63 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=64 -DINTEL -DSET_MAX_ACT -DSET_NESTED
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FC : -traceback
|
||||
FCFLAGS : -xHost -O2 -ip -ftz -g
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -xSSE4.2 -O2 -ip -ftz
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -xSSE2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -xSSE2 -C -fpe0 -implicitnone
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT
|
||||
|
||||
# Global options
|
||||
################
|
@ -9,7 +9,7 @@
|
||||
FC : mpiifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT
|
||||
|
||||
# Global options
|
||||
################
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL -DSET_MAX_ACT
|
||||
|
||||
# Global options
|
||||
################
|
@ -9,7 +9,7 @@
|
||||
FC : mpiifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL -DSET_MAX_ACT
|
||||
|
||||
# Global options
|
||||
################
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=64 -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=64 -DINTEL -DSET_MAX_ACT
|
||||
|
||||
# Global options
|
||||
################
|
@ -1,66 +0,0 @@
|
||||
# Common flags
|
||||
##############
|
||||
#
|
||||
# -mkl=[parallel|sequential] : Use the MKL library
|
||||
# --ninja : Allow the utilisation of ninja. It is mandatory !
|
||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||
#
|
||||
[COMMON]
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL
|
||||
|
||||
# Global options
|
||||
################
|
||||
#
|
||||
# 1 : Activate
|
||||
# 0 : Deactivate
|
||||
#
|
||||
[OPTION]
|
||||
MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
|
||||
CACHE : 0 ; Enable cache_compile.py
|
||||
OPENMP : 1 ; Append OpenMP flags
|
||||
|
||||
# Optimization flags
|
||||
####################
|
||||
#
|
||||
# -xHost : Compile a binary optimized for the current architecture
|
||||
# -O2 : O3 not better than O2.
|
||||
# -ip : Inter-procedural optimizations
|
||||
# -ftz : Flushes denormal results to zero
|
||||
#
|
||||
[OPT]
|
||||
FC : -traceback
|
||||
FCFLAGS : -msse4.2 -O2 -ip -ftz -g
|
||||
|
||||
|
||||
# Profiling flags
|
||||
#################
|
||||
#
|
||||
[PROFILE]
|
||||
FC : -p -g
|
||||
FCFLAGS : -msse4.2 -O2 -ip -ftz
|
||||
|
||||
|
||||
# Debugging flags
|
||||
#################
|
||||
#
|
||||
# -traceback : Activate backtrace on runtime
|
||||
# -fpe0 : All floating point exaceptions
|
||||
# -C : Checks uninitialized variables, array subscripts, etc...
|
||||
# -g : Extra debugging information
|
||||
# -msse4.2 : Valgrind needs a very simple x86 executable
|
||||
#
|
||||
[DEBUG]
|
||||
FC : -g -traceback
|
||||
FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone
|
||||
|
||||
|
||||
# OpenMP flags
|
||||
#################
|
||||
#
|
||||
[OPENMP]
|
||||
FC : -qopenmp
|
||||
IRPF90_FLAGS : --openmp
|
||||
|
@ -1,12 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
# go in qp2/src/fci to run check_omp_actual_setup
|
||||
# to see if we can run in parallel an omp section in another one
|
||||
echo ""
|
||||
echo "Please wait..."
|
||||
echo ""
|
||||
cd ../../src/fci
|
||||
ninja || echo "Please recompile from the root"
|
||||
echo ""
|
||||
./check_omp_actual_setup
|
||||
cd ../../scripts/verif_omp
|
@ -9,7 +9,7 @@ then
|
||||
else
|
||||
|
||||
$1 --version > /dev/null \
|
||||
&& $1 -O0 -fopenmp check_omp_v2.f90 \
|
||||
&& $1 -O0 -fopenmp check_omp.f90 \
|
||||
&& ./a.out | tail -n 1
|
||||
|
||||
|
||||
|
@ -20,7 +20,7 @@ echo "1 2 3" >> $FILE
|
||||
for comp in $list_comp
|
||||
do
|
||||
$comp --version > /dev/null \
|
||||
&& $comp -O0 -fopenmp check_omp_v2.f90 \
|
||||
&& $comp -O0 -fopenmp check_omp.f90 \
|
||||
&& echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE
|
||||
|
||||
done
|
||||
|
@ -288,7 +288,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
|
||||
call write_int(6,nproc_target,'Number of threads for PT2')
|
||||
call write_double(6,mem,'Memory (Gb)')
|
||||
|
||||
call omp_set_max_active_levels(1)
|
||||
call set_multiple_levels_omp(.False.)
|
||||
|
||||
|
||||
print '(A)', '========== ======================= ===================== ===================== ==========='
|
||||
@ -315,7 +315,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
|
||||
call omp_set_max_active_levels(8)
|
||||
call set_multiple_levels_omp(.True.)
|
||||
|
||||
print '(A)', '========== ======================= ===================== ===================== ==========='
|
||||
|
||||
|
@ -4,7 +4,7 @@ subroutine run_slave_cipsi
|
||||
! Helper program for distributed parallelism
|
||||
END_DOC
|
||||
|
||||
call omp_set_max_active_levels(1)
|
||||
call set_multiple_levels_omp(.False.)
|
||||
distributed_davidson = .False.
|
||||
read_wf = .False.
|
||||
SOFT_TOUCH read_wf distributed_davidson
|
||||
@ -171,9 +171,9 @@ subroutine run_slave_main
|
||||
call write_double(6,(t1-t0),'Broadcast time')
|
||||
|
||||
!---
|
||||
call omp_set_max_active_levels(8)
|
||||
call set_multiple_levels_omp(.True.)
|
||||
call davidson_slave_tcp(0)
|
||||
call omp_set_max_active_levels(1)
|
||||
call set_multiple_levels_omp(.False.)
|
||||
print *, mpi_rank, ': Davidson done'
|
||||
!---
|
||||
|
||||
|
@ -508,8 +508,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
|
||||
endif
|
||||
|
||||
|
||||
!call omp_set_max_active_levels(5)
|
||||
call set_multiple_levels_omp()
|
||||
call set_multiple_levels_omp(.True.)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
|
@ -464,8 +464,7 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
|
||||
print *, irp_here, ': Failed in zmq_set_running'
|
||||
endif
|
||||
|
||||
!call omp_set_max_active_levels(4)
|
||||
call set_multiple_levels_omp()
|
||||
call set_multiple_levels_omp(.True.)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
|
@ -464,8 +464,7 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
|
||||
print *, irp_here, ': Failed in zmq_set_running'
|
||||
endif
|
||||
|
||||
!call omp_set_max_active_levels(4)
|
||||
call set_multiple_levels_omp()
|
||||
call set_multiple_levels_omp(.True.)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
|
@ -72,7 +72,7 @@ subroutine run_dress_slave(thread,iproce,energy)
|
||||
provide psi_energy
|
||||
ending = dress_N_cp+1
|
||||
ntask_tbd = 0
|
||||
call omp_set_max_active_levels(8)
|
||||
call set_multiple_levels_omp(.True.)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(SHARED) &
|
||||
!$OMP PRIVATE(interesting, breve_delta_m, task_id) &
|
||||
@ -84,7 +84,7 @@ subroutine run_dress_slave(thread,iproce,energy)
|
||||
zmq_socket_push = new_zmq_push_socket(thread)
|
||||
integer, external :: connect_to_taskserver
|
||||
!$OMP CRITICAL
|
||||
call omp_set_max_active_levels(1)
|
||||
call set_multiple_levels_omp(.False.)
|
||||
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
|
||||
print *, irp_here, ': Unable to connect to task server'
|
||||
stop -1
|
||||
@ -296,7 +296,7 @@ subroutine run_dress_slave(thread,iproce,energy)
|
||||
!$OMP END CRITICAL
|
||||
|
||||
!$OMP END PARALLEL
|
||||
call omp_set_max_active_levels(1)
|
||||
call set_multiple_levels_omp(.False.)
|
||||
! do i=0,dress_N_cp+1
|
||||
! call omp_destroy_lock(lck_sto(i))
|
||||
! end do
|
||||
|
@ -1,174 +0,0 @@
|
||||
program check_omp_actual_setup
|
||||
|
||||
use omp_lib
|
||||
|
||||
implicit none
|
||||
|
||||
integer :: accu, accu2
|
||||
integer :: s, n_setting
|
||||
logical :: verbose, test_versions
|
||||
logical, allocatable :: is_working(:)
|
||||
|
||||
verbose = .True.
|
||||
test_versions = .False.
|
||||
n_setting = 4
|
||||
|
||||
allocate(is_working(n_setting))
|
||||
|
||||
is_working = .False.
|
||||
|
||||
! set the number of threads
|
||||
call omp_set_num_threads(2)
|
||||
|
||||
do s = 1, n_setting
|
||||
|
||||
accu = 0
|
||||
accu2 = 0
|
||||
|
||||
call omp_set_max_active_levels(1)
|
||||
call omp_set_nested(.False.)
|
||||
|
||||
if (s==1) then
|
||||
call set_multiple_levels_omp()
|
||||
elseif (s==2) then
|
||||
call omp_set_max_active_levels(5)
|
||||
elseif (s==3) then
|
||||
call omp_set_nested(.True.)
|
||||
else
|
||||
call omp_set_nested(.True.)
|
||||
call omp_set_max_active_levels(5)
|
||||
endif
|
||||
|
||||
! Level 1
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 1:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
! Level 2
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 2:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
! Level 3
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 3:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
call check_omp_in_subroutine(accu2)
|
||||
|
||||
! Level 4
|
||||
!$OMP PARALLEL
|
||||
|
||||
if (verbose) then
|
||||
print*,'Num threads level 4:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
!$OMP ATOMIC
|
||||
accu = accu + 1
|
||||
!$OMP END ATOMIC
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
if (verbose) then
|
||||
print*,'Setting:',s,'accu=',accu
|
||||
print*,'Setting:',s,'accu2=',accu2
|
||||
endif
|
||||
|
||||
if (accu == 16 .and. accu2 == 16) then
|
||||
is_working(s) = .True.
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
if (verbose) then
|
||||
if (is_working(2)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
|
||||
elseif (is_working(3)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg'
|
||||
elseif (is_working(4)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'+'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
|
||||
else
|
||||
print*,'The parallelization on multiple levels does not work with:'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,'or'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'or'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'+'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Try an other compiler and good luck...'
|
||||
endif
|
||||
|
||||
if (is_working(1)) then
|
||||
print*,''
|
||||
print*,'=========================================================='
|
||||
print*,'Your actual set up works for parallelization with 4 levels'
|
||||
print*,'=========================================================='
|
||||
print*,''
|
||||
else
|
||||
print*,''
|
||||
print*,'==================================================================='
|
||||
print*,'Your actual set up does not work for parallelization with 4 levels'
|
||||
print*,'Please look at the previous messages to understand the requirements'
|
||||
print*,'==================================================================='
|
||||
print*,''
|
||||
endif
|
||||
endif
|
||||
|
||||
! List of working flags
|
||||
if (test_versions) then
|
||||
print*,is_working(2:4)
|
||||
endif
|
||||
|
||||
! IRPF90_FLAGS
|
||||
if (is_working(2)) then
|
||||
print*,'-DSET_MAX_ACT'
|
||||
elseif (is_working(3)) then
|
||||
print*,'-DSET_NESTED'
|
||||
elseif (is_working(4)) then
|
||||
print*,'-DSET_MAX_ACT -DSET_NESTED'
|
||||
else
|
||||
print*,'ERROR'
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
subroutine check_omp_in_subroutine(accu2)
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(inout) :: accu2
|
||||
|
||||
!$OMP PARALLEL
|
||||
|
||||
!$OMP ATOMIC
|
||||
accu2 = accu2 + 1
|
||||
!$OMP END ATOMIC
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
end
|
@ -1,16 +1,26 @@
|
||||
subroutine set_multiple_levels_omp()
|
||||
subroutine set_multiple_levels_omp(activate)
|
||||
|
||||
! Doc : idk
|
||||
BEGIN_DOC
|
||||
! If true, activate OpenMP nested parallelism. If false, deactivate.
|
||||
END_DOC
|
||||
|
||||
implicit none
|
||||
logical, intent(in) :: activate
|
||||
|
||||
IRP_IF SET_MAX_ACT
|
||||
!print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)'
|
||||
if (activate) then
|
||||
call omp_set_max_active_levels(5)
|
||||
IRP_ENDIF
|
||||
IRP_IF SET_NESTED
|
||||
!print*,'SET_NESTED: True, call omp_set_nested(.True.)'
|
||||
call omp_set_nested(.True.)
|
||||
IRP_ENDIF
|
||||
|
||||
IRP_IF SET_NESTED
|
||||
call omp_set_nested(.True.)
|
||||
IRP_ENDIF
|
||||
|
||||
else
|
||||
|
||||
call omp_set_max_active_levels(1)
|
||||
|
||||
IRP_IF SET_NESTED
|
||||
call omp_set_nested(.False.)
|
||||
IRP_ENDIF
|
||||
end if
|
||||
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user