diff --git a/config/gfortran.cfg b/config/gfortran.cfg index 342acae9..ec72e722 100644 --- a/config/gfortran.cfg +++ b/config/gfortran.cfg @@ -13,7 +13,7 @@ FC : gfortran -g -ffree-line-length-none -I . -fPIC LAPACK_LIB : -lblas -llapack IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert +IRPF90_FLAGS : --ninja --align=32 --assert -DGNU_CHECK_OMP # Global options ################ diff --git a/config/ifort.cfg b/config/ifort.cfg index 714c4b10..0382360a 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_debug.cfg b/config/ifort_debug.cfg index 9b718380..d70b1465 100644 --- a/config/ifort_debug.cfg +++ b/config/ifort_debug.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL # Global options ################ diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index 5ed01227..1ac78717 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/scripts/verif_omp/check_actual_setup.sh b/scripts/verif_omp/check_actual_setup.sh new file mode 100755 index 00000000..6eaa4517 --- /dev/null +++ b/scripts/verif_omp/check_actual_setup.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# go in qp2/src/fci to run check_omp_actual_setup +# to see if we can run in parallel an omp section in another one +echo "" +echo "Please wait..." +echo "" +cd ../../src/fci +ninja || echo "Please recompile from the root" +echo "" +./check_omp_actual_setup +cd ../../scripts/verif_omp diff --git a/scripts/verif_omp/check_omp_v2.f90 b/scripts/verif_omp/check_omp_v2.f90 new file mode 100644 index 00000000..ca6af8bd --- /dev/null +++ b/scripts/verif_omp/check_omp_v2.f90 @@ -0,0 +1,175 @@ +program check_omp_v2 + + use omp_lib + + implicit none + + integer :: accu, accu2 + integer :: s, n_setting + logical :: verbose, test_versions + logical, allocatable :: is_working(:) + + verbose = .False. + test_versions = .True. + n_setting = 4 + + allocate(is_working(n_setting)) + + is_working = .False. + + ! set the number of threads + call omp_set_num_threads(2) + + do s = 1, n_setting + + accu = 0 + accu2 = 0 + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (s==1) then + !call set_multiple_levels_omp() + cycle + elseif (s==2) then + call omp_set_max_active_levels(5) + elseif (s==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + ! Level 1 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 1:',omp_get_num_threads() + endif + + ! Level 2 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 2:',omp_get_num_threads() + endif + + ! Level 3 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 3:',omp_get_num_threads() + endif + + call check_omp_in_subroutine(accu2) + + ! Level 4 + !$OMP PARALLEL + + if (verbose) then + print*,'Num threads level 4:',omp_get_num_threads() + endif + + !$OMP ATOMIC + accu = accu + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + if (verbose) then + print*,'Setting:',s,'accu=',accu + print*,'Setting:',s,'accu2=',accu2 + endif + + if (accu == 16 .and. accu2 == 16) then + is_working(s) = .True. + endif + + enddo + + if (verbose) then + if (is_working(2)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (is_working(3)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (is_working(4)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Try an other compiler and good luck...' + endif + + ! if (is_working(1)) then + ! print*,'' + ! print*,'==========================================================' + ! print*,'Your actual set up works for parallelization with 4 levels' + ! print*,'==========================================================' + ! print*,'' + ! else + ! print*,'' + ! print*,'===================================================================' + ! print*,'Your actual set up does not work for parallelization with 4 levels' + ! print*,'Please look at the previous messages to understand the requirements' + ! print*,'===================================================================' + ! print*,'' + ! endif + endif + + ! List of working flags + if (test_versions) then + print*,'Tests:',is_working(2:4) + endif + + ! IRPF90_FLAGS + if (is_working(2)) then + print*,'-DSET_MAX_ACT' + elseif (is_working(3)) then + print*,'-DSET_NESTED' + elseif (is_working(4)) then + print*,'-DSET_MAX_ACT -DSET_NESTED' + else + print*,'ERROR' + endif + +end + +subroutine check_omp_in_subroutine(accu2) + + implicit none + + integer, intent(inout) :: accu2 + + !$OMP PARALLEL + + !$OMP ATOMIC + accu2 = accu2 + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + +end diff --git a/scripts/verif_omp/check_required_setup.sh b/scripts/verif_omp/check_required_setup.sh new file mode 100755 index 00000000..facb6cbb --- /dev/null +++ b/scripts/verif_omp/check_required_setup.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +# take one argument which is the compiler used +# return the required IRPF90_FLAGS for the $1 compiler + +if [ -z "$1" ] +then + echo "Give the compiler in argument" +else + +$1 --version > /dev/null \ +&& $1 -O0 -fopenmp check_omp_v2.f90 \ +&& ./a.out | tail -n 1 + + +# if there is an error or if the compiler is not found +$1 --version > /dev/null || echo 'compiler not found' + +fi diff --git a/scripts/verif_omp/study_omp.sh b/scripts/verif_omp/study_omp.sh new file mode 100755 index 00000000..00668d59 --- /dev/null +++ b/scripts/verif_omp/study_omp.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +# list of compilers +list_comp="ifort gfortran-7 gfortran-8 gfortran-9" + +# file to store the results +FILE=results.dat + +touch $FILE +rm $FILE + +# Comments +echo "1: omp_set_max_active_levels(5)" >> $FILE +echo "2: omp_set_nested(.True.)" >> $FILE +echo "3: 1 + 2" >> $FILE +echo "" >> $FILE +echo "1 2 3" >> $FILE + +# loop on the comp +for comp in $list_comp +do + $comp --version > /dev/null \ + && $comp -O0 -fopenmp check_omp_v2.f90 \ + && echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE + +done + +# Display +cat $FILE + diff --git a/scripts/verif_omp/update_comp.sh b/scripts/verif_omp/update_comp.sh new file mode 100755 index 00000000..14b644de --- /dev/null +++ b/scripts/verif_omp/update_comp.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Compiler +COMP=$1 + +# Path to file.cfg +config_PATH="../../config/" +END="*.cfg" +CONFIG="/config/" + +#LIST=${config_PATH}${COMP}${END} # without ${QP_ROOT} +LIST=${QP_ROOT}${CONFIG}${COMP}${END} + +if [ -z "$1" ] +then + echo "Give the compiler in argument" +else + + # List of the config files for the compiler + #list_files=$(ls ../../config/$comp*.cfg) #does not give the right list + list_files=${LIST} + echo "Files that will be modified:" + echo $list_files + + # Flags that must be added + FLAGS=$(./check_required_setup.sh $COMP) + + # Add the flags + for file in $list_files + do + echo $file + BASE="IRPF90_FLAGS : --ninja" + ACTUAL=$(grep "$BASE" $file) + + # To have only one time each flag + grep " -DSET_MAX_ACT" $file && ${ACTUAL/" -DSET_MAX"/""} + grep " -DSET_NESTED" $file && ${ACTUAL/" -DSET_NESTED"/""} + SPACE=" " + + NEW=${ACTUAL}${SPACE}${FLAGS} + + # Debug + #echo ${NEW} + + sed "s/${ACTUAL}/${NEW}/" $file + # -i # to change the files + done + +fi diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index 8fd023da..fcee16bc 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -508,7 +508,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) endif - call omp_set_max_active_levels(5) + !call omp_set_max_active_levels(5) + call set_multiple_levels_omp() !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() diff --git a/src/davidson/davidson_parallel_csf.irp.f b/src/davidson/davidson_parallel_csf.irp.f index fe651b1d..90e4303e 100644 --- a/src/davidson/davidson_parallel_csf.irp.f +++ b/src/davidson/davidson_parallel_csf.irp.f @@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - call omp_set_max_active_levels(4) + !call omp_set_max_active_levels(4) + call set_multiple_levels_omp() + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then diff --git a/src/davidson/davidson_parallel_nos2.irp.f b/src/davidson/davidson_parallel_nos2.irp.f index 84cbe3af..091b8666 100644 --- a/src/davidson/davidson_parallel_nos2.irp.f +++ b/src/davidson/davidson_parallel_nos2.irp.f @@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - call omp_set_max_active_levels(4) + !call omp_set_max_active_levels(4) + call set_multiple_levels_omp() + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then diff --git a/src/fci/check_omp_actual_setup.irp.f b/src/fci/check_omp_actual_setup.irp.f new file mode 100644 index 00000000..70514bd3 --- /dev/null +++ b/src/fci/check_omp_actual_setup.irp.f @@ -0,0 +1,174 @@ +program check_omp_actual_setup + + use omp_lib + + implicit none + + integer :: accu, accu2 + integer :: s, n_setting + logical :: verbose, test_versions + logical, allocatable :: is_working(:) + + verbose = .True. + test_versions = .False. + n_setting = 4 + + allocate(is_working(n_setting)) + + is_working = .False. + + ! set the number of threads + call omp_set_num_threads(2) + + do s = 1, n_setting + + accu = 0 + accu2 = 0 + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (s==1) then + call set_multiple_levels_omp() + elseif (s==2) then + call omp_set_max_active_levels(5) + elseif (s==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + ! Level 1 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 1:',omp_get_num_threads() + endif + + ! Level 2 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 2:',omp_get_num_threads() + endif + + ! Level 3 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 3:',omp_get_num_threads() + endif + + call check_omp_in_subroutine(accu2) + + ! Level 4 + !$OMP PARALLEL + + if (verbose) then + print*,'Num threads level 4:',omp_get_num_threads() + endif + + !$OMP ATOMIC + accu = accu + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + if (verbose) then + print*,'Setting:',s,'accu=',accu + print*,'Setting:',s,'accu2=',accu2 + endif + + if (accu == 16 .and. accu2 == 16) then + is_working(s) = .True. + endif + + enddo + + if (verbose) then + if (is_working(2)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (is_working(3)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (is_working(4)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Try an other compiler and good luck...' + endif + + if (is_working(1)) then + print*,'' + print*,'==========================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'==========================================================' + print*,'' + else + print*,'' + print*,'===================================================================' + print*,'Your actual set up does not work for parallelization with 4 levels' + print*,'Please look at the previous messages to understand the requirements' + print*,'===================================================================' + print*,'' + endif + endif + + ! List of working flags + if (test_versions) then + print*,is_working(2:4) + endif + + ! IRPF90_FLAGS + if (is_working(2)) then + print*,'-DSET_MAX_ACT' + elseif (is_working(3)) then + print*,'-DSET_NESTED' + elseif (is_working(4)) then + print*,'-DSET_MAX_ACT -DSET_NESTED' + else + print*,'ERROR' + endif + +end + +subroutine check_omp_in_subroutine(accu2) + + implicit none + + integer, intent(inout) :: accu2 + + !$OMP PARALLEL + + !$OMP ATOMIC + accu2 = accu2 + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + +end diff --git a/src/utils/set_multiple_levels_omp.irp.f b/src/utils/set_multiple_levels_omp.irp.f new file mode 100644 index 00000000..4be3af5b --- /dev/null +++ b/src/utils/set_multiple_levels_omp.irp.f @@ -0,0 +1,16 @@ +subroutine set_multiple_levels_omp() + +! Doc : idk + + implicit none + + IRP_IF SET_MAX_ACT + !print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)' + call omp_set_max_active_levels(5) + IRP_ENDIF + IRP_IF SET_NESTED + !print*,'SET_NESTED: True, call omp_set_nested(.True.)' + call omp_set_nested(.True.) + IRP_ENDIF + +end