diff --git a/config/ifort.cfg b/config/ifort.cfg index 714c4b10..f8685bc0 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_avx.cfg b/config/ifort_avx.cfg index a2cb4c8a..b14369d3 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_avx.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg index e0d489a0..16be2ed2 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_mpi.cfg @@ -59,6 +59,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index 5ed01227..9bd41096 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_xHost.cfg b/config/ifort_xHost.cfg index ddb4aa2d..aa5bb966 100644 --- a/config/ifort_xHost.cfg +++ b/config/ifort_xHost.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/scripts/verif_omp/check_actual_setup.sh b/scripts/verif_omp/check_actual_setup.sh new file mode 100755 index 00000000..f275394f --- /dev/null +++ b/scripts/verif_omp/check_actual_setup.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +echo "" +echo "Please wait..." +echo "" +cd ../../src/fci +ninja || echo "Please recompile from the root" +echo "" +./check_omp_actual_setup +cd ../../scripts/verif_omp diff --git a/scripts/verif_omp/check_omp_v2.f90 b/scripts/verif_omp/check_omp_v2.f90 new file mode 100644 index 00000000..ca6af8bd --- /dev/null +++ b/scripts/verif_omp/check_omp_v2.f90 @@ -0,0 +1,175 @@ +program check_omp_v2 + + use omp_lib + + implicit none + + integer :: accu, accu2 + integer :: s, n_setting + logical :: verbose, test_versions + logical, allocatable :: is_working(:) + + verbose = .False. + test_versions = .True. + n_setting = 4 + + allocate(is_working(n_setting)) + + is_working = .False. + + ! set the number of threads + call omp_set_num_threads(2) + + do s = 1, n_setting + + accu = 0 + accu2 = 0 + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (s==1) then + !call set_multiple_levels_omp() + cycle + elseif (s==2) then + call omp_set_max_active_levels(5) + elseif (s==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + ! Level 1 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 1:',omp_get_num_threads() + endif + + ! Level 2 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 2:',omp_get_num_threads() + endif + + ! Level 3 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 3:',omp_get_num_threads() + endif + + call check_omp_in_subroutine(accu2) + + ! Level 4 + !$OMP PARALLEL + + if (verbose) then + print*,'Num threads level 4:',omp_get_num_threads() + endif + + !$OMP ATOMIC + accu = accu + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + if (verbose) then + print*,'Setting:',s,'accu=',accu + print*,'Setting:',s,'accu2=',accu2 + endif + + if (accu == 16 .and. accu2 == 16) then + is_working(s) = .True. + endif + + enddo + + if (verbose) then + if (is_working(2)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (is_working(3)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (is_working(4)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Try an other compiler and good luck...' + endif + + ! if (is_working(1)) then + ! print*,'' + ! print*,'==========================================================' + ! print*,'Your actual set up works for parallelization with 4 levels' + ! print*,'==========================================================' + ! print*,'' + ! else + ! print*,'' + ! print*,'===================================================================' + ! print*,'Your actual set up does not work for parallelization with 4 levels' + ! print*,'Please look at the previous messages to understand the requirements' + ! print*,'===================================================================' + ! print*,'' + ! endif + endif + + ! List of working flags + if (test_versions) then + print*,'Tests:',is_working(2:4) + endif + + ! IRPF90_FLAGS + if (is_working(2)) then + print*,'-DSET_MAX_ACT' + elseif (is_working(3)) then + print*,'-DSET_NESTED' + elseif (is_working(4)) then + print*,'-DSET_MAX_ACT -DSET_NESTED' + else + print*,'ERROR' + endif + +end + +subroutine check_omp_in_subroutine(accu2) + + implicit none + + integer, intent(inout) :: accu2 + + !$OMP PARALLEL + + !$OMP ATOMIC + accu2 = accu2 + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + +end diff --git a/scripts/verif_omp/check_required_setup.sh b/scripts/verif_omp/check_required_setup.sh new file mode 100755 index 00000000..facb6cbb --- /dev/null +++ b/scripts/verif_omp/check_required_setup.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +# take one argument which is the compiler used +# return the required IRPF90_FLAGS for the $1 compiler + +if [ -z "$1" ] +then + echo "Give the compiler in argument" +else + +$1 --version > /dev/null \ +&& $1 -O0 -fopenmp check_omp_v2.f90 \ +&& ./a.out | tail -n 1 + + +# if there is an error or if the compiler is not found +$1 --version > /dev/null || echo 'compiler not found' + +fi diff --git a/scripts/verif_omp/study_omp.sh b/scripts/verif_omp/study_omp.sh new file mode 100755 index 00000000..1fdd7b26 --- /dev/null +++ b/scripts/verif_omp/study_omp.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +list_comp="ifort gfortran-7 gfortran-8 gfortran-9" + +FILE=results.dat + +touch $FILE +rm $FILE + +echo "1: omp_set_max_active_levels(5)" >> $FILE +echo "2: omp_set_nested(.True.)" >> $FILE +echo "3: 1 + 2" >> $FILE +echo "" >> $FILE +echo "1 2 3" >> $FILE +for comp in $list_comp +do + $comp --version > /dev/null \ + && $comp -O0 -fopenmp check_omp_v2.f90 \ + && echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE + +done + +cat $FILE + diff --git a/src/fci/check_omp.irp.f b/src/fci/check_omp.irp.f deleted file mode 100644 index bd5c204f..00000000 --- a/src/fci/check_omp.irp.f +++ /dev/null @@ -1,235 +0,0 @@ -program check_omp - - use omp_lib - - implicit none - - integer :: i,j,k,l,m,n,x,z,setting,nb_setting - double precision :: w1,w2,c1,c2 - double precision, allocatable :: accu(:,:,:,:) - logical :: must_exit, verbose, is_working - logical, allocatable :: is_working_n(:) - - x = 4 - nb_setting = 4 - - allocate(accu(x,x,x,x)) - allocate(is_working_n(nb_setting)) - - verbose = .True. - - accu = 0d0 - must_exit = .False. - - !$OMP PARALLEL - if (OMP_GET_NUM_THREADS() == 1) then - print*,'' - print*,'1 thread, no parallelization possible' - print*,'' - must_exit=.True. - endif - !$OMP END PARALLEL - if (must_exit) then - call abort - endif - - ! reset the number of max active levels - !call omp_set_max_active_levels(1) - - !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - !call intel_check_omp() - !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - - ! set the number of threads - call omp_set_num_threads(2) - - is_working_n = .True. - - do z = 1, nb_setting - - call omp_set_max_active_levels(1) - call omp_set_nested(.False.) - - if (z==1) then - call test_set_multiple_levels_omp() - !call test_set_multiple_levels_omp - elseif (z==2) then - call omp_set_max_active_levels(5) - elseif (z==3) then - call omp_set_nested(.True.) - else - call omp_set_nested(.True.) - call omp_set_max_active_levels(5) - endif - - setting = z-1 - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 1:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1) then - print*,'Setting',setting,"error at level 1" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l) + 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 2:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1 .and. is_working_n(z)) then - print*,'Setting',setting,"error at level 2" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 3:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1 .and. is_working_n(z)) then - print*,'Setting',setting,"error at level 3" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 4:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1 .and. is_working_n(z)) then - print*,'Setting',setting,"error at level 4" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - enddo - - print*,'' - - if (is_working_n(2)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - elseif (is_working_n(3)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_nested(.True.)' - print*,'' - print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' - elseif (is_working_n(4)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_nested(.True.)' - print*,'+' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - else - print*,'The parallelization on multiple levels does not work with:' - print*,'call omp_set_max_active_levels(5)' - print*,'or' - print*,'call omp_set_nested(.True.)' - print*,'or' - print*,'call omp_set_nested(.True.)' - print*,'+' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Try an other compiler and good luck...' - endif - - if (is_working_n(1)) then - print*,'' - print*,'==========================================================' - print*,'Your actual set up works for parallelization with 4 levels' - print*,'==========================================================' - print*,'' - else - print*,'' - print*,'===================================================================' - print*,'Your actual set up works for parallelization with 4 levels' - print*,'Please look at the previous messages to understand the requirements' - print*,'If it does not work even with the right irpf90 flags, clean and' - print*,'recompile your code at ${QP_ROOT}' - print*,'===================================================================' - print*,'' - endif - -end - diff --git a/src/fci/check_omp_actual_setup.irp.f b/src/fci/check_omp_actual_setup.irp.f new file mode 100644 index 00000000..70514bd3 --- /dev/null +++ b/src/fci/check_omp_actual_setup.irp.f @@ -0,0 +1,174 @@ +program check_omp_actual_setup + + use omp_lib + + implicit none + + integer :: accu, accu2 + integer :: s, n_setting + logical :: verbose, test_versions + logical, allocatable :: is_working(:) + + verbose = .True. + test_versions = .False. + n_setting = 4 + + allocate(is_working(n_setting)) + + is_working = .False. + + ! set the number of threads + call omp_set_num_threads(2) + + do s = 1, n_setting + + accu = 0 + accu2 = 0 + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (s==1) then + call set_multiple_levels_omp() + elseif (s==2) then + call omp_set_max_active_levels(5) + elseif (s==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + ! Level 1 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 1:',omp_get_num_threads() + endif + + ! Level 2 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 2:',omp_get_num_threads() + endif + + ! Level 3 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 3:',omp_get_num_threads() + endif + + call check_omp_in_subroutine(accu2) + + ! Level 4 + !$OMP PARALLEL + + if (verbose) then + print*,'Num threads level 4:',omp_get_num_threads() + endif + + !$OMP ATOMIC + accu = accu + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + if (verbose) then + print*,'Setting:',s,'accu=',accu + print*,'Setting:',s,'accu2=',accu2 + endif + + if (accu == 16 .and. accu2 == 16) then + is_working(s) = .True. + endif + + enddo + + if (verbose) then + if (is_working(2)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (is_working(3)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (is_working(4)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Try an other compiler and good luck...' + endif + + if (is_working(1)) then + print*,'' + print*,'==========================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'==========================================================' + print*,'' + else + print*,'' + print*,'===================================================================' + print*,'Your actual set up does not work for parallelization with 4 levels' + print*,'Please look at the previous messages to understand the requirements' + print*,'===================================================================' + print*,'' + endif + endif + + ! List of working flags + if (test_versions) then + print*,is_working(2:4) + endif + + ! IRPF90_FLAGS + if (is_working(2)) then + print*,'-DSET_MAX_ACT' + elseif (is_working(3)) then + print*,'-DSET_NESTED' + elseif (is_working(4)) then + print*,'-DSET_MAX_ACT -DSET_NESTED' + else + print*,'ERROR' + endif + +end + +subroutine check_omp_in_subroutine(accu2) + + implicit none + + integer, intent(inout) :: accu2 + + !$OMP PARALLEL + + !$OMP ATOMIC + accu2 = accu2 + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + +end diff --git a/src/utils/test_set_multiple_levels_omp.irp.f b/src/utils/set_multiple_levels_omp.irp.f similarity index 87% rename from src/utils/test_set_multiple_levels_omp.irp.f rename to src/utils/set_multiple_levels_omp.irp.f index c4f721a1..a09f615a 100644 --- a/src/utils/test_set_multiple_levels_omp.irp.f +++ b/src/utils/set_multiple_levels_omp.irp.f @@ -1,4 +1,4 @@ -subroutine test_set_multiple_levels_omp() +subroutine set_multiple_levels_omp() ! Doc : idk