mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-06 21:43:39 +01:00
commit
ba51208ac2
@ -13,7 +13,7 @@
|
||||
FC : gfortran -g -ffree-line-length-none -I . -fPIC
|
||||
LAPACK_LIB : -lblas -llapack
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 --assert
|
||||
IRPF90_FLAGS : --ninja --align=32 --assert -DGNU_CHECK_OMP
|
||||
|
||||
# Global options
|
||||
################
|
||||
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
|
||||
# Global options
|
||||
################
|
||||
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL
|
||||
|
||||
# Global options
|
||||
################
|
||||
|
@ -9,7 +9,7 @@
|
||||
FC : ifort -fpic
|
||||
LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps
|
||||
IRPF90 : irpf90
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
IRPF90_FLAGS : --ninja --align=32 -DINTEL
|
||||
|
||||
# Global options
|
||||
################
|
||||
|
12
scripts/verif_omp/check_actual_setup.sh
Executable file
12
scripts/verif_omp/check_actual_setup.sh
Executable file
@ -0,0 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
# go in qp2/src/fci to run check_omp_actual_setup
|
||||
# to see if we can run in parallel an omp section in another one
|
||||
echo ""
|
||||
echo "Please wait..."
|
||||
echo ""
|
||||
cd ../../src/fci
|
||||
ninja || echo "Please recompile from the root"
|
||||
echo ""
|
||||
./check_omp_actual_setup
|
||||
cd ../../scripts/verif_omp
|
175
scripts/verif_omp/check_omp_v2.f90
Normal file
175
scripts/verif_omp/check_omp_v2.f90
Normal file
@ -0,0 +1,175 @@
|
||||
program check_omp_v2
|
||||
|
||||
use omp_lib
|
||||
|
||||
implicit none
|
||||
|
||||
integer :: accu, accu2
|
||||
integer :: s, n_setting
|
||||
logical :: verbose, test_versions
|
||||
logical, allocatable :: is_working(:)
|
||||
|
||||
verbose = .False.
|
||||
test_versions = .True.
|
||||
n_setting = 4
|
||||
|
||||
allocate(is_working(n_setting))
|
||||
|
||||
is_working = .False.
|
||||
|
||||
! set the number of threads
|
||||
call omp_set_num_threads(2)
|
||||
|
||||
do s = 1, n_setting
|
||||
|
||||
accu = 0
|
||||
accu2 = 0
|
||||
|
||||
call omp_set_max_active_levels(1)
|
||||
call omp_set_nested(.False.)
|
||||
|
||||
if (s==1) then
|
||||
!call set_multiple_levels_omp()
|
||||
cycle
|
||||
elseif (s==2) then
|
||||
call omp_set_max_active_levels(5)
|
||||
elseif (s==3) then
|
||||
call omp_set_nested(.True.)
|
||||
else
|
||||
call omp_set_nested(.True.)
|
||||
call omp_set_max_active_levels(5)
|
||||
endif
|
||||
|
||||
! Level 1
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 1:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
! Level 2
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 2:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
! Level 3
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 3:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
call check_omp_in_subroutine(accu2)
|
||||
|
||||
! Level 4
|
||||
!$OMP PARALLEL
|
||||
|
||||
if (verbose) then
|
||||
print*,'Num threads level 4:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
!$OMP ATOMIC
|
||||
accu = accu + 1
|
||||
!$OMP END ATOMIC
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
if (verbose) then
|
||||
print*,'Setting:',s,'accu=',accu
|
||||
print*,'Setting:',s,'accu2=',accu2
|
||||
endif
|
||||
|
||||
if (accu == 16 .and. accu2 == 16) then
|
||||
is_working(s) = .True.
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
if (verbose) then
|
||||
if (is_working(2)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
|
||||
elseif (is_working(3)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg'
|
||||
elseif (is_working(4)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'+'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
|
||||
else
|
||||
print*,'The parallelization on multiple levels does not work with:'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,'or'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'or'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'+'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Try an other compiler and good luck...'
|
||||
endif
|
||||
|
||||
! if (is_working(1)) then
|
||||
! print*,''
|
||||
! print*,'=========================================================='
|
||||
! print*,'Your actual set up works for parallelization with 4 levels'
|
||||
! print*,'=========================================================='
|
||||
! print*,''
|
||||
! else
|
||||
! print*,''
|
||||
! print*,'==================================================================='
|
||||
! print*,'Your actual set up does not work for parallelization with 4 levels'
|
||||
! print*,'Please look at the previous messages to understand the requirements'
|
||||
! print*,'==================================================================='
|
||||
! print*,''
|
||||
! endif
|
||||
endif
|
||||
|
||||
! List of working flags
|
||||
if (test_versions) then
|
||||
print*,'Tests:',is_working(2:4)
|
||||
endif
|
||||
|
||||
! IRPF90_FLAGS
|
||||
if (is_working(2)) then
|
||||
print*,'-DSET_MAX_ACT'
|
||||
elseif (is_working(3)) then
|
||||
print*,'-DSET_NESTED'
|
||||
elseif (is_working(4)) then
|
||||
print*,'-DSET_MAX_ACT -DSET_NESTED'
|
||||
else
|
||||
print*,'ERROR'
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
subroutine check_omp_in_subroutine(accu2)
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(inout) :: accu2
|
||||
|
||||
!$OMP PARALLEL
|
||||
|
||||
!$OMP ATOMIC
|
||||
accu2 = accu2 + 1
|
||||
!$OMP END ATOMIC
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
end
|
19
scripts/verif_omp/check_required_setup.sh
Executable file
19
scripts/verif_omp/check_required_setup.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
|
||||
# take one argument which is the compiler used
|
||||
# return the required IRPF90_FLAGS for the $1 compiler
|
||||
|
||||
if [ -z "$1" ]
|
||||
then
|
||||
echo "Give the compiler in argument"
|
||||
else
|
||||
|
||||
$1 --version > /dev/null \
|
||||
&& $1 -O0 -fopenmp check_omp_v2.f90 \
|
||||
&& ./a.out | tail -n 1
|
||||
|
||||
|
||||
# if there is an error or if the compiler is not found
|
||||
$1 --version > /dev/null || echo 'compiler not found'
|
||||
|
||||
fi
|
30
scripts/verif_omp/study_omp.sh
Executable file
30
scripts/verif_omp/study_omp.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
|
||||
# list of compilers
|
||||
list_comp="ifort gfortran-7 gfortran-8 gfortran-9"
|
||||
|
||||
# file to store the results
|
||||
FILE=results.dat
|
||||
|
||||
touch $FILE
|
||||
rm $FILE
|
||||
|
||||
# Comments
|
||||
echo "1: omp_set_max_active_levels(5)" >> $FILE
|
||||
echo "2: omp_set_nested(.True.)" >> $FILE
|
||||
echo "3: 1 + 2" >> $FILE
|
||||
echo "" >> $FILE
|
||||
echo "1 2 3" >> $FILE
|
||||
|
||||
# loop on the comp
|
||||
for comp in $list_comp
|
||||
do
|
||||
$comp --version > /dev/null \
|
||||
&& $comp -O0 -fopenmp check_omp_v2.f90 \
|
||||
&& echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE
|
||||
|
||||
done
|
||||
|
||||
# Display
|
||||
cat $FILE
|
||||
|
49
scripts/verif_omp/update_comp.sh
Executable file
49
scripts/verif_omp/update_comp.sh
Executable file
@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Compiler
|
||||
COMP=$1
|
||||
|
||||
# Path to file.cfg
|
||||
config_PATH="../../config/"
|
||||
END="*.cfg"
|
||||
CONFIG="/config/"
|
||||
|
||||
#LIST=${config_PATH}${COMP}${END} # without ${QP_ROOT}
|
||||
LIST=${QP_ROOT}${CONFIG}${COMP}${END}
|
||||
|
||||
if [ -z "$1" ]
|
||||
then
|
||||
echo "Give the compiler in argument"
|
||||
else
|
||||
|
||||
# List of the config files for the compiler
|
||||
#list_files=$(ls ../../config/$comp*.cfg) #does not give the right list
|
||||
list_files=${LIST}
|
||||
echo "Files that will be modified:"
|
||||
echo $list_files
|
||||
|
||||
# Flags that must be added
|
||||
FLAGS=$(./check_required_setup.sh $COMP)
|
||||
|
||||
# Add the flags
|
||||
for file in $list_files
|
||||
do
|
||||
echo $file
|
||||
BASE="IRPF90_FLAGS : --ninja"
|
||||
ACTUAL=$(grep "$BASE" $file)
|
||||
|
||||
# To have only one time each flag
|
||||
grep " -DSET_MAX_ACT" $file && ${ACTUAL/" -DSET_MAX"/""}
|
||||
grep " -DSET_NESTED" $file && ${ACTUAL/" -DSET_NESTED"/""}
|
||||
SPACE=" "
|
||||
|
||||
NEW=${ACTUAL}${SPACE}${FLAGS}
|
||||
|
||||
# Debug
|
||||
#echo ${NEW}
|
||||
|
||||
sed "s/${ACTUAL}/${NEW}/" $file
|
||||
# -i # to change the files
|
||||
done
|
||||
|
||||
fi
|
@ -508,7 +508,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
|
||||
endif
|
||||
|
||||
|
||||
call omp_set_max_active_levels(5)
|
||||
!call omp_set_max_active_levels(5)
|
||||
call set_multiple_levels_omp()
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
|
@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
|
||||
print *, irp_here, ': Failed in zmq_set_running'
|
||||
endif
|
||||
|
||||
call omp_set_max_active_levels(4)
|
||||
!call omp_set_max_active_levels(4)
|
||||
call set_multiple_levels_omp()
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
if (ithread == 0 ) then
|
||||
|
@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
|
||||
print *, irp_here, ': Failed in zmq_set_running'
|
||||
endif
|
||||
|
||||
call omp_set_max_active_levels(4)
|
||||
!call omp_set_max_active_levels(4)
|
||||
call set_multiple_levels_omp()
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
if (ithread == 0 ) then
|
||||
|
174
src/fci/check_omp_actual_setup.irp.f
Normal file
174
src/fci/check_omp_actual_setup.irp.f
Normal file
@ -0,0 +1,174 @@
|
||||
program check_omp_actual_setup
|
||||
|
||||
use omp_lib
|
||||
|
||||
implicit none
|
||||
|
||||
integer :: accu, accu2
|
||||
integer :: s, n_setting
|
||||
logical :: verbose, test_versions
|
||||
logical, allocatable :: is_working(:)
|
||||
|
||||
verbose = .True.
|
||||
test_versions = .False.
|
||||
n_setting = 4
|
||||
|
||||
allocate(is_working(n_setting))
|
||||
|
||||
is_working = .False.
|
||||
|
||||
! set the number of threads
|
||||
call omp_set_num_threads(2)
|
||||
|
||||
do s = 1, n_setting
|
||||
|
||||
accu = 0
|
||||
accu2 = 0
|
||||
|
||||
call omp_set_max_active_levels(1)
|
||||
call omp_set_nested(.False.)
|
||||
|
||||
if (s==1) then
|
||||
call set_multiple_levels_omp()
|
||||
elseif (s==2) then
|
||||
call omp_set_max_active_levels(5)
|
||||
elseif (s==3) then
|
||||
call omp_set_nested(.True.)
|
||||
else
|
||||
call omp_set_nested(.True.)
|
||||
call omp_set_max_active_levels(5)
|
||||
endif
|
||||
|
||||
! Level 1
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 1:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
! Level 2
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 2:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
! Level 3
|
||||
!$OMP PARALLEL
|
||||
if (verbose) then
|
||||
print*,'Num threads level 3:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
call check_omp_in_subroutine(accu2)
|
||||
|
||||
! Level 4
|
||||
!$OMP PARALLEL
|
||||
|
||||
if (verbose) then
|
||||
print*,'Num threads level 4:',omp_get_num_threads()
|
||||
endif
|
||||
|
||||
!$OMP ATOMIC
|
||||
accu = accu + 1
|
||||
!$OMP END ATOMIC
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
if (verbose) then
|
||||
print*,'Setting:',s,'accu=',accu
|
||||
print*,'Setting:',s,'accu2=',accu2
|
||||
endif
|
||||
|
||||
if (accu == 16 .and. accu2 == 16) then
|
||||
is_working(s) = .True.
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
if (verbose) then
|
||||
if (is_working(2)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
|
||||
elseif (is_working(3)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg'
|
||||
elseif (is_working(4)) then
|
||||
print*,'The parallelization works on 4 levels with:'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'+'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg'
|
||||
else
|
||||
print*,'The parallelization on multiple levels does not work with:'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,'or'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'or'
|
||||
print*,'call omp_set_nested(.True.)'
|
||||
print*,'+'
|
||||
print*,'call omp_set_max_active_levels(5)'
|
||||
print*,''
|
||||
print*,'Try an other compiler and good luck...'
|
||||
endif
|
||||
|
||||
if (is_working(1)) then
|
||||
print*,''
|
||||
print*,'=========================================================='
|
||||
print*,'Your actual set up works for parallelization with 4 levels'
|
||||
print*,'=========================================================='
|
||||
print*,''
|
||||
else
|
||||
print*,''
|
||||
print*,'==================================================================='
|
||||
print*,'Your actual set up does not work for parallelization with 4 levels'
|
||||
print*,'Please look at the previous messages to understand the requirements'
|
||||
print*,'==================================================================='
|
||||
print*,''
|
||||
endif
|
||||
endif
|
||||
|
||||
! List of working flags
|
||||
if (test_versions) then
|
||||
print*,is_working(2:4)
|
||||
endif
|
||||
|
||||
! IRPF90_FLAGS
|
||||
if (is_working(2)) then
|
||||
print*,'-DSET_MAX_ACT'
|
||||
elseif (is_working(3)) then
|
||||
print*,'-DSET_NESTED'
|
||||
elseif (is_working(4)) then
|
||||
print*,'-DSET_MAX_ACT -DSET_NESTED'
|
||||
else
|
||||
print*,'ERROR'
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
subroutine check_omp_in_subroutine(accu2)
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(inout) :: accu2
|
||||
|
||||
!$OMP PARALLEL
|
||||
|
||||
!$OMP ATOMIC
|
||||
accu2 = accu2 + 1
|
||||
!$OMP END ATOMIC
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
end
|
16
src/utils/set_multiple_levels_omp.irp.f
Normal file
16
src/utils/set_multiple_levels_omp.irp.f
Normal file
@ -0,0 +1,16 @@
|
||||
subroutine set_multiple_levels_omp()
|
||||
|
||||
! Doc : idk
|
||||
|
||||
implicit none
|
||||
|
||||
IRP_IF SET_MAX_ACT
|
||||
!print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)'
|
||||
call omp_set_max_active_levels(5)
|
||||
IRP_ENDIF
|
||||
IRP_IF SET_NESTED
|
||||
!print*,'SET_NESTED: True, call omp_set_nested(.True.)'
|
||||
call omp_set_nested(.True.)
|
||||
IRP_ENDIF
|
||||
|
||||
end
|
Loading…
Reference in New Issue
Block a user