10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-11-19 04:22:32 +01:00

Merge pull request #179 from Ydrnan/master

OpenMP nested
This commit is contained in:
Anthony Scemama 2021-11-29 10:06:07 +01:00 committed by GitHub
commit ba51208ac2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 487 additions and 7 deletions

View File

@ -13,7 +13,7 @@
FC : gfortran -g -ffree-line-length-none -I . -fPIC FC : gfortran -g -ffree-line-length-none -I . -fPIC
LAPACK_LIB : -lblas -llapack LAPACK_LIB : -lblas -llapack
IRPF90 : irpf90 IRPF90 : irpf90
IRPF90_FLAGS : --ninja --align=32 --assert IRPF90_FLAGS : --ninja --align=32 --assert -DGNU_CHECK_OMP
# Global options # Global options
################ ################

View File

@ -0,0 +1,12 @@
#!/bin/sh
# go in qp2/src/fci to run check_omp_actual_setup
# to see if we can run in parallel an omp section in another one
echo ""
echo "Please wait..."
echo ""
cd ../../src/fci
ninja || echo "Please recompile from the root"
echo ""
./check_omp_actual_setup
cd ../../scripts/verif_omp

View File

@ -0,0 +1,175 @@
program check_omp_v2
use omp_lib
implicit none
integer :: accu, accu2
integer :: s, n_setting
logical :: verbose, test_versions
logical, allocatable :: is_working(:)
verbose = .False.
test_versions = .True.
n_setting = 4
allocate(is_working(n_setting))
is_working = .False.
! set the number of threads
call omp_set_num_threads(2)
do s = 1, n_setting
accu = 0
accu2 = 0
call omp_set_max_active_levels(1)
call omp_set_nested(.False.)
if (s==1) then
!call set_multiple_levels_omp()
cycle
elseif (s==2) then
call omp_set_max_active_levels(5)
elseif (s==3) then
call omp_set_nested(.True.)
else
call omp_set_nested(.True.)
call omp_set_max_active_levels(5)
endif
! Level 1
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 1:',omp_get_num_threads()
endif
! Level 2
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 2:',omp_get_num_threads()
endif
! Level 3
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 3:',omp_get_num_threads()
endif
call check_omp_in_subroutine(accu2)
! Level 4
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 4:',omp_get_num_threads()
endif
!$OMP ATOMIC
accu = accu + 1
!$OMP END ATOMIC
!$OMP END PARALLEL
!$OMP END PARALLEL
!$OMP END PARALLEL
!$OMP END PARALLEL
if (verbose) then
print*,'Setting:',s,'accu=',accu
print*,'Setting:',s,'accu2=',accu2
endif
if (accu == 16 .and. accu2 == 16) then
is_working(s) = .True.
endif
enddo
if (verbose) then
if (is_working(2)) then
print*,'The parallelization works on 4 levels with:'
print*,'call omp_set_max_active_levels(5)'
print*,''
print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
elseif (is_working(3)) then
print*,'The parallelization works on 4 levels with:'
print*,'call omp_set_nested(.True.)'
print*,''
print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg'
elseif (is_working(4)) then
print*,'The parallelization works on 4 levels with:'
print*,'call omp_set_nested(.True.)'
print*,'+'
print*,'call omp_set_max_active_levels(5)'
print*,''
print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
else
print*,'The parallelization on multiple levels does not work with:'
print*,'call omp_set_max_active_levels(5)'
print*,'or'
print*,'call omp_set_nested(.True.)'
print*,'or'
print*,'call omp_set_nested(.True.)'
print*,'+'
print*,'call omp_set_max_active_levels(5)'
print*,''
print*,'Try an other compiler and good luck...'
endif
! if (is_working(1)) then
! print*,''
! print*,'=========================================================='
! print*,'Your actual set up works for parallelization with 4 levels'
! print*,'=========================================================='
! print*,''
! else
! print*,''
! print*,'==================================================================='
! print*,'Your actual set up does not work for parallelization with 4 levels'
! print*,'Please look at the previous messages to understand the requirements'
! print*,'==================================================================='
! print*,''
! endif
endif
! List of working flags
if (test_versions) then
print*,'Tests:',is_working(2:4)
endif
! IRPF90_FLAGS
if (is_working(2)) then
print*,'-DSET_MAX_ACT'
elseif (is_working(3)) then
print*,'-DSET_NESTED'
elseif (is_working(4)) then
print*,'-DSET_MAX_ACT -DSET_NESTED'
else
print*,'ERROR'
endif
end
subroutine check_omp_in_subroutine(accu2)
implicit none
integer, intent(inout) :: accu2
!$OMP PARALLEL
!$OMP ATOMIC
accu2 = accu2 + 1
!$OMP END ATOMIC
!$OMP END PARALLEL
end

View File

@ -0,0 +1,19 @@
#!/bin/sh
# take one argument which is the compiler used
# return the required IRPF90_FLAGS for the $1 compiler
if [ -z "$1" ]
then
echo "Give the compiler in argument"
else
$1 --version > /dev/null \
&& $1 -O0 -fopenmp check_omp_v2.f90 \
&& ./a.out | tail -n 1
# if there is an error or if the compiler is not found
$1 --version > /dev/null || echo 'compiler not found'
fi

30
scripts/verif_omp/study_omp.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/sh
# list of compilers
list_comp="ifort gfortran-7 gfortran-8 gfortran-9"
# file to store the results
FILE=results.dat
touch $FILE
rm $FILE
# Comments
echo "1: omp_set_max_active_levels(5)" >> $FILE
echo "2: omp_set_nested(.True.)" >> $FILE
echo "3: 1 + 2" >> $FILE
echo "" >> $FILE
echo "1 2 3" >> $FILE
# loop on the comp
for comp in $list_comp
do
$comp --version > /dev/null \
&& $comp -O0 -fopenmp check_omp_v2.f90 \
&& echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE
done
# Display
cat $FILE

View File

@ -0,0 +1,49 @@
#!/bin/bash
# Compiler
COMP=$1
# Path to file.cfg
config_PATH="../../config/"
END="*.cfg"
CONFIG="/config/"
#LIST=${config_PATH}${COMP}${END} # without ${QP_ROOT}
LIST=${QP_ROOT}${CONFIG}${COMP}${END}
if [ -z "$1" ]
then
echo "Give the compiler in argument"
else
# List of the config files for the compiler
#list_files=$(ls ../../config/$comp*.cfg) #does not give the right list
list_files=${LIST}
echo "Files that will be modified:"
echo $list_files
# Flags that must be added
FLAGS=$(./check_required_setup.sh $COMP)
# Add the flags
for file in $list_files
do
echo $file
BASE="IRPF90_FLAGS : --ninja"
ACTUAL=$(grep "$BASE" $file)
# To have only one time each flag
grep " -DSET_MAX_ACT" $file && ${ACTUAL/" -DSET_MAX"/""}
grep " -DSET_NESTED" $file && ${ACTUAL/" -DSET_NESTED"/""}
SPACE=" "
NEW=${ACTUAL}${SPACE}${FLAGS}
# Debug
#echo ${NEW}
sed "s/${ACTUAL}/${NEW}/" $file
# -i # to change the files
done
fi

View File

@ -508,7 +508,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
endif endif
call omp_set_max_active_levels(5) !call omp_set_max_active_levels(5)
call set_multiple_levels_omp()
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()

View File

@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
print *, irp_here, ': Failed in zmq_set_running' print *, irp_here, ': Failed in zmq_set_running'
endif endif
call omp_set_max_active_levels(4) !call omp_set_max_active_levels(4)
call set_multiple_levels_omp()
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
if (ithread == 0 ) then if (ithread == 0 ) then

View File

@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
print *, irp_here, ': Failed in zmq_set_running' print *, irp_here, ': Failed in zmq_set_running'
endif endif
call omp_set_max_active_levels(4) !call omp_set_max_active_levels(4)
call set_multiple_levels_omp()
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
if (ithread == 0 ) then if (ithread == 0 ) then

View File

@ -0,0 +1,174 @@
program check_omp_actual_setup
use omp_lib
implicit none
integer :: accu, accu2
integer :: s, n_setting
logical :: verbose, test_versions
logical, allocatable :: is_working(:)
verbose = .True.
test_versions = .False.
n_setting = 4
allocate(is_working(n_setting))
is_working = .False.
! set the number of threads
call omp_set_num_threads(2)
do s = 1, n_setting
accu = 0
accu2 = 0
call omp_set_max_active_levels(1)
call omp_set_nested(.False.)
if (s==1) then
call set_multiple_levels_omp()
elseif (s==2) then
call omp_set_max_active_levels(5)
elseif (s==3) then
call omp_set_nested(.True.)
else
call omp_set_nested(.True.)
call omp_set_max_active_levels(5)
endif
! Level 1
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 1:',omp_get_num_threads()
endif
! Level 2
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 2:',omp_get_num_threads()
endif
! Level 3
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 3:',omp_get_num_threads()
endif
call check_omp_in_subroutine(accu2)
! Level 4
!$OMP PARALLEL
if (verbose) then
print*,'Num threads level 4:',omp_get_num_threads()
endif
!$OMP ATOMIC
accu = accu + 1
!$OMP END ATOMIC
!$OMP END PARALLEL
!$OMP END PARALLEL
!$OMP END PARALLEL
!$OMP END PARALLEL
if (verbose) then
print*,'Setting:',s,'accu=',accu
print*,'Setting:',s,'accu2=',accu2
endif
if (accu == 16 .and. accu2 == 16) then
is_working(s) = .True.
endif
enddo
if (verbose) then
if (is_working(2)) then
print*,'The parallelization works on 4 levels with:'
print*,'call omp_set_max_active_levels(5)'
print*,''
print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
elseif (is_working(3)) then
print*,'The parallelization works on 4 levels with:'
print*,'call omp_set_nested(.True.)'
print*,''
print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg'
elseif (is_working(4)) then
print*,'The parallelization works on 4 levels with:'
print*,'call omp_set_nested(.True.)'
print*,'+'
print*,'call omp_set_max_active_levels(5)'
print*,''
print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
else
print*,'The parallelization on multiple levels does not work with:'
print*,'call omp_set_max_active_levels(5)'
print*,'or'
print*,'call omp_set_nested(.True.)'
print*,'or'
print*,'call omp_set_nested(.True.)'
print*,'+'
print*,'call omp_set_max_active_levels(5)'
print*,''
print*,'Try an other compiler and good luck...'
endif
if (is_working(1)) then
print*,''
print*,'=========================================================='
print*,'Your actual set up works for parallelization with 4 levels'
print*,'=========================================================='
print*,''
else
print*,''
print*,'==================================================================='
print*,'Your actual set up does not work for parallelization with 4 levels'
print*,'Please look at the previous messages to understand the requirements'
print*,'==================================================================='
print*,''
endif
endif
! List of working flags
if (test_versions) then
print*,is_working(2:4)
endif
! IRPF90_FLAGS
if (is_working(2)) then
print*,'-DSET_MAX_ACT'
elseif (is_working(3)) then
print*,'-DSET_NESTED'
elseif (is_working(4)) then
print*,'-DSET_MAX_ACT -DSET_NESTED'
else
print*,'ERROR'
endif
end
subroutine check_omp_in_subroutine(accu2)
implicit none
integer, intent(inout) :: accu2
!$OMP PARALLEL
!$OMP ATOMIC
accu2 = accu2 + 1
!$OMP END ATOMIC
!$OMP END PARALLEL
end

View File

@ -0,0 +1,16 @@
subroutine set_multiple_levels_omp()
! Doc : idk
implicit none
IRP_IF SET_MAX_ACT
!print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)'
call omp_set_max_active_levels(5)
IRP_ENDIF
IRP_IF SET_NESTED
!print*,'SET_NESTED: True, call omp_set_nested(.True.)'
call omp_set_nested(.True.)
IRP_ENDIF
end