From 3d478029e8d8d5a8b2bd492c749ce1ad689168ad Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 18 Nov 2021 09:19:41 +0100 Subject: [PATCH 01/14] test intel bug --- config/ifort.cfg | 2 +- src/utils/intel_check_omp.irp.f | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 src/utils/intel_check_omp.irp.f diff --git a/config/ifort.cfg b/config/ifort.cfg index 714c4b10..9d8302b8 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DINTEL_CHECK_OMP # Global options ################ diff --git a/src/utils/intel_check_omp.irp.f b/src/utils/intel_check_omp.irp.f new file mode 100644 index 00000000..6933377c --- /dev/null +++ b/src/utils/intel_check_omp.irp.f @@ -0,0 +1,16 @@ +subroutine intel_check_omp() + +! Doc : idk + + implicit none + + IRP_IF INTEL_CHECK_OMP + call omp_set_max_active_levels(5) + print*,'INTEL_CHECK_OMP: true' + IRP_ELSE + call omp_set_nested(.True.) + !call omp_set_nested(.False.) + print*,'INTEL_CHECK_OMP: false' + IRP_ENDIF + +end From d521bfaa6f5664b76809f4a5439fb69c5f40e50a Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 18 Nov 2021 14:54:34 +0100 Subject: [PATCH 02/14] test comp flags --- config/gfortran.cfg | 2 +- config/ifort.cfg | 2 +- config/ifort_2019.cfg | 63 +++++++++++++++++++++++++++++++++ config/ifort_2021.cfg | 63 +++++++++++++++++++++++++++++++++ src/utils/intel_check_omp.irp.f | 14 +++++--- 5 files changed, 137 insertions(+), 7 deletions(-) create mode 100644 config/ifort_2019.cfg create mode 100644 config/ifort_2021.cfg diff --git a/config/gfortran.cfg b/config/gfortran.cfg index 342acae9..ec72e722 100644 --- a/config/gfortran.cfg +++ b/config/gfortran.cfg @@ -13,7 +13,7 @@ FC : gfortran -g -ffree-line-length-none -I . -fPIC LAPACK_LIB : -lblas -llapack IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert +IRPF90_FLAGS : --ninja --align=32 --assert -DGNU_CHECK_OMP # Global options ################ diff --git a/config/ifort.cfg b/config/ifort.cfg index 9d8302b8..63c4a5d3 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL -DINTEL_CHECK_OMP +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_2019.cfg b/config/ifort_2019.cfg new file mode 100644 index 00000000..35cf63f7 --- /dev/null +++ b/config/ifort_2019.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DINTEL2019_CHECK_OMP + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2021.cfg b/config/ifort_2021.cfg new file mode 100644 index 00000000..78e201f2 --- /dev/null +++ b/config/ifort_2021.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DINTEL -DINTEL2021_CHECK_OMP + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/src/utils/intel_check_omp.irp.f b/src/utils/intel_check_omp.irp.f index 6933377c..af86b131 100644 --- a/src/utils/intel_check_omp.irp.f +++ b/src/utils/intel_check_omp.irp.f @@ -4,13 +4,17 @@ subroutine intel_check_omp() implicit none - IRP_IF INTEL_CHECK_OMP + IRP_IF INTEL2021_CHECK_OMP call omp_set_max_active_levels(5) - print*,'INTEL_CHECK_OMP: true' - IRP_ELSE + print*,'INTEL2021_CHECK_OMP: true' + IRP_ENDIF + IRP_IF INTEL2019_CHECK_OMP call omp_set_nested(.True.) - !call omp_set_nested(.False.) - print*,'INTEL_CHECK_OMP: false' + print*,'INTEL2019_CHECK_OMP: true' + IRP_ENDIF + IRP_IF GNU_CHECK_OMP + call omp_set_nested(.True.) + print*,'GNU_CHECK_OMP: true' IRP_ENDIF end From 243315ae7ccc64f03d1386fd3ebc99eebc6f3dac Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 18 Nov 2021 14:55:17 +0100 Subject: [PATCH 03/14] TEST, file has to be removed after --- src/fci/test_intel_check_omp.irp.f | 115 +++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 src/fci/test_intel_check_omp.irp.f diff --git a/src/fci/test_intel_check_omp.irp.f b/src/fci/test_intel_check_omp.irp.f new file mode 100644 index 00000000..8a4711b3 --- /dev/null +++ b/src/fci/test_intel_check_omp.irp.f @@ -0,0 +1,115 @@ +program test_intel_check_omp + + use omp_lib + + implicit none + + integer :: i,j,k,l,m,n,x + double precision :: w1,w2,c1,c2 + double precision, allocatable :: accu(:,:,:,:) + + x = 4 + allocate(accu(x,x,x,x)) + + accu = 0d0 + + !$OMP PARALLEL + print*, 'Hello1 from:', OMP_GET_THREAD_NUM() + !$OMP END PARALLEL + + print*,'omp_get_max_active_levels:',omp_get_max_active_levels() + call intel_check_omp() + print*,'omp_get_max_active_levels:',omp_get_max_active_levels() + + !call omp_set_max_active_levels(20000) + + !$OMP PARALLEL + print*, 'Hello2 from:', OMP_GET_THREAD_NUM() + !$OMP END PARALLEL + + call wall_time(w1) + call cpu_time(c1) + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + print*,'level 1',omp_get_num_threads() + !$OMP DO + do l = 1, x + do k = 1, x + do j = 1, x + do i = 1, x + accu(i,j,k,l) = accu(i,j,k,l) + 1d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + print*,'level 2',omp_get_num_threads() + !$OMP DO + do l = 1, x + do k = 1, x + do j = 1, x + do i = 1, x + accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + print*,'level 3',omp_get_num_threads() + !$OMP DO + do l = 1, x + do k = 1, x + do j = 1, x + do i = 1, x + accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + print*,'level 4',omp_get_num_threads() + !$OMP DO + do l = 1, x + do k = 1, x + do j = 1, x + do i = 1, x + accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + call wall_time(w2) + call cpu_time(c2) + + print*,accu(1,1,1,1) + print*,'wall time:', w2-w1 + print*,'cpu time:', c2-c1 + print*,'ration',(c2-c1)/(w2-w1) +end From 612f5a5e9c1dc697bf07560bfdc92dcd58a3218e Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 09:57:04 +0100 Subject: [PATCH 04/14] add a test to check if omp works on multiple levels --- config/ifort.cfg | 2 +- src/fci/check_omp.irp.f | 242 +++++++++++++++++++ src/fci/test_intel_check_omp.irp.f | 115 --------- src/utils/intel_check_omp.irp.f | 20 -- src/utils/test_set_multiple_levels_omp.irp.f | 16 ++ 5 files changed, 259 insertions(+), 136 deletions(-) create mode 100644 src/fci/check_omp.irp.f delete mode 100644 src/fci/test_intel_check_omp.irp.f delete mode 100644 src/utils/intel_check_omp.irp.f create mode 100644 src/utils/test_set_multiple_levels_omp.irp.f diff --git a/config/ifort.cfg b/config/ifort.cfg index 63c4a5d3..714c4b10 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/src/fci/check_omp.irp.f b/src/fci/check_omp.irp.f new file mode 100644 index 00000000..ffc113d6 --- /dev/null +++ b/src/fci/check_omp.irp.f @@ -0,0 +1,242 @@ +program check_omp + + use omp_lib + + implicit none + + integer :: i,j,k,l,m,n,x,z,setting + double precision :: w1,w2,c1,c2 + double precision, allocatable :: accu(:,:,:,:) + logical :: must_exit, verbose, is_working + + x = 4 + allocate(accu(x,x,x,x)) + + verbose = .False. + + accu = 0d0 + must_exit = .False. + + !$OMP PARALLEL + if (OMP_GET_NUM_THREADS() == 1) then + print*,'' + print*,'1 thread, no parallelization possible' + print*,'' + must_exit=.True. + endif + !$OMP END PARALLEL + if (must_exit) then + call abort + endif + + ! reset the number of max active levels + !call omp_set_max_active_levels(1) + + !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() + !call intel_check_omp() + !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() + + ! set the number of threads + call omp_set_num_threads(2) + + do z = 1, 4 + + if (must_exit) then + exit + endif + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (z==1) then + call test_set_multiple_levels_omp() + !call test_set_multiple_levels_omp + elseif (z==2) then + call omp_set_max_active_levels(5) + elseif (z==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + setting = z-1 + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 1:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1) then + print*,'Setting',setting,"error at level 1" + setting = -1 + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l) + 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 2:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1 .and. setting >= 0) then + print*,'Setting',setting,"error at level 2" + setting = -1 + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 3:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1 .and. setting >= 0) then + print*,'Setting',setting,"error at level 3" + setting = -1 + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 4:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1 .and. setting >= 0) then + print*,'Setting',setting,"error at level 4" + elseif(omp_get_num_threads()==1 .or. setting == 0) then + else + must_exit = .True. + endif + + if ( z == 1 .and. setting == 0) then + is_working = .True. + elseif (z == 1 .and. setting == -1) then + is_working = .False. + else + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + enddo + + print*,'' + + if (setting == 1) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (setting == 2) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (setting == 3) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Good luck...' + endif + + if (is_working) then + print*,'' + print*,'==========================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'==========================================================' + print*,'' + else + print*,'' + print*,'===================================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'Please look at the previous messages to understand the requirements' + print*,'If it does not work even with the right irpf90 flags, clean and' + print*,'recompile your code at ${QP_ROOT}' + print*,'===================================================================' + print*,'' + endif + +end + diff --git a/src/fci/test_intel_check_omp.irp.f b/src/fci/test_intel_check_omp.irp.f deleted file mode 100644 index 8a4711b3..00000000 --- a/src/fci/test_intel_check_omp.irp.f +++ /dev/null @@ -1,115 +0,0 @@ -program test_intel_check_omp - - use omp_lib - - implicit none - - integer :: i,j,k,l,m,n,x - double precision :: w1,w2,c1,c2 - double precision, allocatable :: accu(:,:,:,:) - - x = 4 - allocate(accu(x,x,x,x)) - - accu = 0d0 - - !$OMP PARALLEL - print*, 'Hello1 from:', OMP_GET_THREAD_NUM() - !$OMP END PARALLEL - - print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - call intel_check_omp() - print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - - !call omp_set_max_active_levels(20000) - - !$OMP PARALLEL - print*, 'Hello2 from:', OMP_GET_THREAD_NUM() - !$OMP END PARALLEL - - call wall_time(w1) - call cpu_time(c1) - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 1',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l) + 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 2',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 3',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 4',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - call wall_time(w2) - call cpu_time(c2) - - print*,accu(1,1,1,1) - print*,'wall time:', w2-w1 - print*,'cpu time:', c2-c1 - print*,'ration',(c2-c1)/(w2-w1) -end diff --git a/src/utils/intel_check_omp.irp.f b/src/utils/intel_check_omp.irp.f deleted file mode 100644 index af86b131..00000000 --- a/src/utils/intel_check_omp.irp.f +++ /dev/null @@ -1,20 +0,0 @@ -subroutine intel_check_omp() - -! Doc : idk - - implicit none - - IRP_IF INTEL2021_CHECK_OMP - call omp_set_max_active_levels(5) - print*,'INTEL2021_CHECK_OMP: true' - IRP_ENDIF - IRP_IF INTEL2019_CHECK_OMP - call omp_set_nested(.True.) - print*,'INTEL2019_CHECK_OMP: true' - IRP_ENDIF - IRP_IF GNU_CHECK_OMP - call omp_set_nested(.True.) - print*,'GNU_CHECK_OMP: true' - IRP_ENDIF - -end diff --git a/src/utils/test_set_multiple_levels_omp.irp.f b/src/utils/test_set_multiple_levels_omp.irp.f new file mode 100644 index 00000000..c4f721a1 --- /dev/null +++ b/src/utils/test_set_multiple_levels_omp.irp.f @@ -0,0 +1,16 @@ +subroutine test_set_multiple_levels_omp() + +! Doc : idk + + implicit none + + IRP_IF SET_MAX_ACT + print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)' + call omp_set_max_active_levels(5) + IRP_ENDIF + IRP_IF SET_NESTED + print*,'SET_NESTED: True, call omp_set_nested(.True.)' + call omp_set_nested(.True.) + IRP_ENDIF + +end From f260f628250f25ccd78c3b866c465f0544f54362 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 09:58:54 +0100 Subject: [PATCH 05/14] update --- config/ifort_2019.cfg | 63 ------------------------------------------- config/ifort_2021.cfg | 63 ------------------------------------------- 2 files changed, 126 deletions(-) delete mode 100644 config/ifort_2019.cfg delete mode 100644 config/ifort_2021.cfg diff --git a/config/ifort_2019.cfg b/config/ifort_2019.cfg deleted file mode 100644 index 35cf63f7..00000000 --- a/config/ifort_2019.cfg +++ /dev/null @@ -1,63 +0,0 @@ -# Common flags -############## -# -# -mkl=[parallel|sequential] : Use the MKL library -# --ninja : Allow the utilisation of ninja. It is mandatory ! -# --align=32 : Align all provided arrays on a 32-byte boundary -# -[COMMON] -FC : ifort -fpic -LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps -IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL -DINTEL2019_CHECK_OMP - -# Global options -################ -# -# 1 : Activate -# 0 : Deactivate -# -[OPTION] -MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below -CACHE : 0 ; Enable cache_compile.py -OPENMP : 1 ; Append OpenMP flags - -# Optimization flags -#################### -# -# -xHost : Compile a binary optimized for the current architecture -# -O2 : O3 not better than O2. -# -ip : Inter-procedural optimizations -# -ftz : Flushes denormal results to zero -# -[OPT] -FC : -traceback -FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g - -# Profiling flags -################# -# -[PROFILE] -FC : -p -g -FCFLAGS : -xSSE4.2 -O2 -ip -ftz - -# Debugging flags -################# -# -# -traceback : Activate backtrace on runtime -# -fpe0 : All floating point exaceptions -# -C : Checks uninitialized variables, array subscripts, etc... -# -g : Extra debugging information -# -xSSE2 : Valgrind needs a very simple x86 executable -# -[DEBUG] -FC : -g -traceback -FCFLAGS : -xSSE2 -C -fpe0 -implicitnone - -# OpenMP flags -################# -# -[OPENMP] -FC : -qopenmp -IRPF90_FLAGS : --openmp - diff --git a/config/ifort_2021.cfg b/config/ifort_2021.cfg deleted file mode 100644 index 78e201f2..00000000 --- a/config/ifort_2021.cfg +++ /dev/null @@ -1,63 +0,0 @@ -# Common flags -############## -# -# -mkl=[parallel|sequential] : Use the MKL library -# --ninja : Allow the utilisation of ninja. It is mandatory ! -# --align=32 : Align all provided arrays on a 32-byte boundary -# -[COMMON] -FC : ifort -fpic -LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps -IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL -DINTEL2021_CHECK_OMP - -# Global options -################ -# -# 1 : Activate -# 0 : Deactivate -# -[OPTION] -MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below -CACHE : 0 ; Enable cache_compile.py -OPENMP : 1 ; Append OpenMP flags - -# Optimization flags -#################### -# -# -xHost : Compile a binary optimized for the current architecture -# -O2 : O3 not better than O2. -# -ip : Inter-procedural optimizations -# -ftz : Flushes denormal results to zero -# -[OPT] -FC : -traceback -FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g - -# Profiling flags -################# -# -[PROFILE] -FC : -p -g -FCFLAGS : -xSSE4.2 -O2 -ip -ftz - -# Debugging flags -################# -# -# -traceback : Activate backtrace on runtime -# -fpe0 : All floating point exaceptions -# -C : Checks uninitialized variables, array subscripts, etc... -# -g : Extra debugging information -# -xSSE2 : Valgrind needs a very simple x86 executable -# -[DEBUG] -FC : -g -traceback -FCFLAGS : -xSSE2 -C -fpe0 -implicitnone - -# OpenMP flags -################# -# -[OPENMP] -FC : -qopenmp -IRPF90_FLAGS : --openmp - From d997b807e41da06472b04644c4cb69792ffb4fdc Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 11:06:26 +0100 Subject: [PATCH 06/14] update errror message --- src/fci/check_omp.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fci/check_omp.irp.f b/src/fci/check_omp.irp.f index ffc113d6..1413ba7e 100644 --- a/src/fci/check_omp.irp.f +++ b/src/fci/check_omp.irp.f @@ -218,7 +218,7 @@ program check_omp print*,'+' print*,'call omp_set_max_active_levels(5)' print*,'' - print*,'Good luck...' + print*,'Try an other compiler and good luck...' endif if (is_working) then From 62cb1531269715d2db628b1ea7272281d22ce42c Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 12:01:37 +0100 Subject: [PATCH 07/14] cleaner test version --- src/fci/check_omp.irp.f | 55 ++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/src/fci/check_omp.irp.f b/src/fci/check_omp.irp.f index 1413ba7e..bd5c204f 100644 --- a/src/fci/check_omp.irp.f +++ b/src/fci/check_omp.irp.f @@ -4,15 +4,19 @@ program check_omp implicit none - integer :: i,j,k,l,m,n,x,z,setting + integer :: i,j,k,l,m,n,x,z,setting,nb_setting double precision :: w1,w2,c1,c2 double precision, allocatable :: accu(:,:,:,:) logical :: must_exit, verbose, is_working + logical, allocatable :: is_working_n(:) x = 4 - allocate(accu(x,x,x,x)) + nb_setting = 4 - verbose = .False. + allocate(accu(x,x,x,x)) + allocate(is_working_n(nb_setting)) + + verbose = .True. accu = 0d0 must_exit = .False. @@ -39,11 +43,9 @@ program check_omp ! set the number of threads call omp_set_num_threads(2) - do z = 1, 4 + is_working_n = .True. - if (must_exit) then - exit - endif + do z = 1, nb_setting call omp_set_max_active_levels(1) call omp_set_nested(.False.) @@ -67,13 +69,13 @@ program check_omp !$OMP SHARED(accu) if (verbose) then - print*,'Nb threads level 1:', omp_get_num_threads() + print*,'Setting:',setting,'Nb threads level 1:', omp_get_num_threads() endif !$OMP MASTER if (omp_get_num_threads()==1) then print*,'Setting',setting,"error at level 1" - setting = -1 + is_working_n(z) = .False. endif !$OMP END MASTER @@ -94,13 +96,13 @@ program check_omp !$OMP SHARED(accu) if (verbose) then - print*,'Nb threads level 2:', omp_get_num_threads() + print*,'Setting:',setting,'Nb threads level 2:', omp_get_num_threads() endif !$OMP MASTER - if (omp_get_num_threads()==1 .and. setting >= 0) then + if (omp_get_num_threads()==1 .and. is_working_n(z)) then print*,'Setting',setting,"error at level 2" - setting = -1 + is_working_n(z) = .False. endif !$OMP END MASTER @@ -121,13 +123,13 @@ program check_omp !$OMP SHARED(accu) if (verbose) then - print*,'Nb threads level 3:', omp_get_num_threads() + print*,'Setting:',setting,'Nb threads level 3:', omp_get_num_threads() endif !$OMP MASTER - if (omp_get_num_threads()==1 .and. setting >= 0) then + if (omp_get_num_threads()==1 .and. is_working_n(z)) then print*,'Setting',setting,"error at level 3" - setting = -1 + is_working_n(z) = .False. endif !$OMP END MASTER @@ -148,22 +150,13 @@ program check_omp !$OMP SHARED(accu) if (verbose) then - print*,'Nb threads level 4:', omp_get_num_threads() + print*,'Setting:',setting,'Nb threads level 4:', omp_get_num_threads() endif !$OMP MASTER - if (omp_get_num_threads()==1 .and. setting >= 0) then + if (omp_get_num_threads()==1 .and. is_working_n(z)) then print*,'Setting',setting,"error at level 4" - elseif(omp_get_num_threads()==1 .or. setting == 0) then - else - must_exit = .True. - endif - - if ( z == 1 .and. setting == 0) then - is_working = .True. - elseif (z == 1 .and. setting == -1) then - is_working = .False. - else + is_working_n(z) = .False. endif !$OMP END MASTER @@ -191,17 +184,17 @@ program check_omp print*,'' - if (setting == 1) then + if (is_working_n(2)) then print*,'The parallelization works on 4 levels with:' print*,'call omp_set_max_active_levels(5)' print*,'' print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - elseif (setting == 2) then + elseif (is_working_n(3)) then print*,'The parallelization works on 4 levels with:' print*,'call omp_set_nested(.True.)' print*,'' print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' - elseif (setting == 3) then + elseif (is_working_n(4)) then print*,'The parallelization works on 4 levels with:' print*,'call omp_set_nested(.True.)' print*,'+' @@ -221,7 +214,7 @@ program check_omp print*,'Try an other compiler and good luck...' endif - if (is_working) then + if (is_working_n(1)) then print*,'' print*,'==========================================================' print*,'Your actual set up works for parallelization with 4 levels' From 0a4aec9f5ebd1c7246a0747037a7b5c715007c8f Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 22:39:59 +0100 Subject: [PATCH 08/14] script test, omp flag --- config/ifort.cfg | 2 +- config/ifort_avx.cfg | 2 +- config/ifort_mpi.cfg | 2 +- config/ifort_rome.cfg | 2 +- config/ifort_xHost.cfg | 2 +- scripts/verif_omp/check_actual_setup.sh | 10 + scripts/verif_omp/check_omp_v2.f90 | 175 +++++++++++++ scripts/verif_omp/check_required_setup.sh | 19 ++ scripts/verif_omp/study_omp.sh | 24 ++ src/fci/check_omp.irp.f | 235 ------------------ src/fci/check_omp_actual_setup.irp.f | 174 +++++++++++++ ...mp.irp.f => set_multiple_levels_omp.irp.f} | 2 +- 12 files changed, 408 insertions(+), 241 deletions(-) create mode 100755 scripts/verif_omp/check_actual_setup.sh create mode 100644 scripts/verif_omp/check_omp_v2.f90 create mode 100755 scripts/verif_omp/check_required_setup.sh create mode 100755 scripts/verif_omp/study_omp.sh delete mode 100644 src/fci/check_omp.irp.f create mode 100644 src/fci/check_omp_actual_setup.irp.f rename src/utils/{test_set_multiple_levels_omp.irp.f => set_multiple_levels_omp.irp.f} (87%) diff --git a/config/ifort.cfg b/config/ifort.cfg index 714c4b10..f8685bc0 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_avx.cfg b/config/ifort_avx.cfg index a2cb4c8a..b14369d3 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_avx.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg index e0d489a0..16be2ed2 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_mpi.cfg @@ -59,6 +59,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index 5ed01227..9bd41096 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/config/ifort_xHost.cfg b/config/ifort_xHost.cfg index ddb4aa2d..aa5bb966 100644 --- a/config/ifort_xHost.cfg +++ b/config/ifort_xHost.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp +FC : -qopenmp -w IRPF90_FLAGS : --openmp diff --git a/scripts/verif_omp/check_actual_setup.sh b/scripts/verif_omp/check_actual_setup.sh new file mode 100755 index 00000000..f275394f --- /dev/null +++ b/scripts/verif_omp/check_actual_setup.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +echo "" +echo "Please wait..." +echo "" +cd ../../src/fci +ninja || echo "Please recompile from the root" +echo "" +./check_omp_actual_setup +cd ../../scripts/verif_omp diff --git a/scripts/verif_omp/check_omp_v2.f90 b/scripts/verif_omp/check_omp_v2.f90 new file mode 100644 index 00000000..ca6af8bd --- /dev/null +++ b/scripts/verif_omp/check_omp_v2.f90 @@ -0,0 +1,175 @@ +program check_omp_v2 + + use omp_lib + + implicit none + + integer :: accu, accu2 + integer :: s, n_setting + logical :: verbose, test_versions + logical, allocatable :: is_working(:) + + verbose = .False. + test_versions = .True. + n_setting = 4 + + allocate(is_working(n_setting)) + + is_working = .False. + + ! set the number of threads + call omp_set_num_threads(2) + + do s = 1, n_setting + + accu = 0 + accu2 = 0 + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (s==1) then + !call set_multiple_levels_omp() + cycle + elseif (s==2) then + call omp_set_max_active_levels(5) + elseif (s==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + ! Level 1 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 1:',omp_get_num_threads() + endif + + ! Level 2 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 2:',omp_get_num_threads() + endif + + ! Level 3 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 3:',omp_get_num_threads() + endif + + call check_omp_in_subroutine(accu2) + + ! Level 4 + !$OMP PARALLEL + + if (verbose) then + print*,'Num threads level 4:',omp_get_num_threads() + endif + + !$OMP ATOMIC + accu = accu + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + if (verbose) then + print*,'Setting:',s,'accu=',accu + print*,'Setting:',s,'accu2=',accu2 + endif + + if (accu == 16 .and. accu2 == 16) then + is_working(s) = .True. + endif + + enddo + + if (verbose) then + if (is_working(2)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (is_working(3)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (is_working(4)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Try an other compiler and good luck...' + endif + + ! if (is_working(1)) then + ! print*,'' + ! print*,'==========================================================' + ! print*,'Your actual set up works for parallelization with 4 levels' + ! print*,'==========================================================' + ! print*,'' + ! else + ! print*,'' + ! print*,'===================================================================' + ! print*,'Your actual set up does not work for parallelization with 4 levels' + ! print*,'Please look at the previous messages to understand the requirements' + ! print*,'===================================================================' + ! print*,'' + ! endif + endif + + ! List of working flags + if (test_versions) then + print*,'Tests:',is_working(2:4) + endif + + ! IRPF90_FLAGS + if (is_working(2)) then + print*,'-DSET_MAX_ACT' + elseif (is_working(3)) then + print*,'-DSET_NESTED' + elseif (is_working(4)) then + print*,'-DSET_MAX_ACT -DSET_NESTED' + else + print*,'ERROR' + endif + +end + +subroutine check_omp_in_subroutine(accu2) + + implicit none + + integer, intent(inout) :: accu2 + + !$OMP PARALLEL + + !$OMP ATOMIC + accu2 = accu2 + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + +end diff --git a/scripts/verif_omp/check_required_setup.sh b/scripts/verif_omp/check_required_setup.sh new file mode 100755 index 00000000..facb6cbb --- /dev/null +++ b/scripts/verif_omp/check_required_setup.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +# take one argument which is the compiler used +# return the required IRPF90_FLAGS for the $1 compiler + +if [ -z "$1" ] +then + echo "Give the compiler in argument" +else + +$1 --version > /dev/null \ +&& $1 -O0 -fopenmp check_omp_v2.f90 \ +&& ./a.out | tail -n 1 + + +# if there is an error or if the compiler is not found +$1 --version > /dev/null || echo 'compiler not found' + +fi diff --git a/scripts/verif_omp/study_omp.sh b/scripts/verif_omp/study_omp.sh new file mode 100755 index 00000000..1fdd7b26 --- /dev/null +++ b/scripts/verif_omp/study_omp.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +list_comp="ifort gfortran-7 gfortran-8 gfortran-9" + +FILE=results.dat + +touch $FILE +rm $FILE + +echo "1: omp_set_max_active_levels(5)" >> $FILE +echo "2: omp_set_nested(.True.)" >> $FILE +echo "3: 1 + 2" >> $FILE +echo "" >> $FILE +echo "1 2 3" >> $FILE +for comp in $list_comp +do + $comp --version > /dev/null \ + && $comp -O0 -fopenmp check_omp_v2.f90 \ + && echo $(./a.out | grep "Tests:" | cut -d ":" -f2- ) $(echo " : ") $($comp --version | head -n 1) >> $FILE + +done + +cat $FILE + diff --git a/src/fci/check_omp.irp.f b/src/fci/check_omp.irp.f deleted file mode 100644 index bd5c204f..00000000 --- a/src/fci/check_omp.irp.f +++ /dev/null @@ -1,235 +0,0 @@ -program check_omp - - use omp_lib - - implicit none - - integer :: i,j,k,l,m,n,x,z,setting,nb_setting - double precision :: w1,w2,c1,c2 - double precision, allocatable :: accu(:,:,:,:) - logical :: must_exit, verbose, is_working - logical, allocatable :: is_working_n(:) - - x = 4 - nb_setting = 4 - - allocate(accu(x,x,x,x)) - allocate(is_working_n(nb_setting)) - - verbose = .True. - - accu = 0d0 - must_exit = .False. - - !$OMP PARALLEL - if (OMP_GET_NUM_THREADS() == 1) then - print*,'' - print*,'1 thread, no parallelization possible' - print*,'' - must_exit=.True. - endif - !$OMP END PARALLEL - if (must_exit) then - call abort - endif - - ! reset the number of max active levels - !call omp_set_max_active_levels(1) - - !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - !call intel_check_omp() - !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - - ! set the number of threads - call omp_set_num_threads(2) - - is_working_n = .True. - - do z = 1, nb_setting - - call omp_set_max_active_levels(1) - call omp_set_nested(.False.) - - if (z==1) then - call test_set_multiple_levels_omp() - !call test_set_multiple_levels_omp - elseif (z==2) then - call omp_set_max_active_levels(5) - elseif (z==3) then - call omp_set_nested(.True.) - else - call omp_set_nested(.True.) - call omp_set_max_active_levels(5) - endif - - setting = z-1 - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 1:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1) then - print*,'Setting',setting,"error at level 1" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l) + 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 2:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1 .and. is_working_n(z)) then - print*,'Setting',setting,"error at level 2" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 3:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1 .and. is_working_n(z)) then - print*,'Setting',setting,"error at level 3" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - if (verbose) then - print*,'Setting:',setting,'Nb threads level 4:', omp_get_num_threads() - endif - - !$OMP MASTER - if (omp_get_num_threads()==1 .and. is_working_n(z)) then - print*,'Setting',setting,"error at level 4" - is_working_n(z) = .False. - endif - !$OMP END MASTER - - ! !$OMP DO - ! do l = 1, x - ! do k = 1, x - ! do j = 1, x - ! do i = 1, x - ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - enddo - - print*,'' - - if (is_working_n(2)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - elseif (is_working_n(3)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_nested(.True.)' - print*,'' - print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' - elseif (is_working_n(4)) then - print*,'The parallelization works on 4 levels with:' - print*,'call omp_set_nested(.True.)' - print*,'+' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' - else - print*,'The parallelization on multiple levels does not work with:' - print*,'call omp_set_max_active_levels(5)' - print*,'or' - print*,'call omp_set_nested(.True.)' - print*,'or' - print*,'call omp_set_nested(.True.)' - print*,'+' - print*,'call omp_set_max_active_levels(5)' - print*,'' - print*,'Try an other compiler and good luck...' - endif - - if (is_working_n(1)) then - print*,'' - print*,'==========================================================' - print*,'Your actual set up works for parallelization with 4 levels' - print*,'==========================================================' - print*,'' - else - print*,'' - print*,'===================================================================' - print*,'Your actual set up works for parallelization with 4 levels' - print*,'Please look at the previous messages to understand the requirements' - print*,'If it does not work even with the right irpf90 flags, clean and' - print*,'recompile your code at ${QP_ROOT}' - print*,'===================================================================' - print*,'' - endif - -end - diff --git a/src/fci/check_omp_actual_setup.irp.f b/src/fci/check_omp_actual_setup.irp.f new file mode 100644 index 00000000..70514bd3 --- /dev/null +++ b/src/fci/check_omp_actual_setup.irp.f @@ -0,0 +1,174 @@ +program check_omp_actual_setup + + use omp_lib + + implicit none + + integer :: accu, accu2 + integer :: s, n_setting + logical :: verbose, test_versions + logical, allocatable :: is_working(:) + + verbose = .True. + test_versions = .False. + n_setting = 4 + + allocate(is_working(n_setting)) + + is_working = .False. + + ! set the number of threads + call omp_set_num_threads(2) + + do s = 1, n_setting + + accu = 0 + accu2 = 0 + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (s==1) then + call set_multiple_levels_omp() + elseif (s==2) then + call omp_set_max_active_levels(5) + elseif (s==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + ! Level 1 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 1:',omp_get_num_threads() + endif + + ! Level 2 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 2:',omp_get_num_threads() + endif + + ! Level 3 + !$OMP PARALLEL + if (verbose) then + print*,'Num threads level 3:',omp_get_num_threads() + endif + + call check_omp_in_subroutine(accu2) + + ! Level 4 + !$OMP PARALLEL + + if (verbose) then + print*,'Num threads level 4:',omp_get_num_threads() + endif + + !$OMP ATOMIC + accu = accu + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + + !$OMP END PARALLEL + + if (verbose) then + print*,'Setting:',s,'accu=',accu + print*,'Setting:',s,'accu2=',accu2 + endif + + if (accu == 16 .and. accu2 == 16) then + is_working(s) = .True. + endif + + enddo + + if (verbose) then + if (is_working(2)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (is_working(3)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (is_working(4)) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Try an other compiler and good luck...' + endif + + if (is_working(1)) then + print*,'' + print*,'==========================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'==========================================================' + print*,'' + else + print*,'' + print*,'===================================================================' + print*,'Your actual set up does not work for parallelization with 4 levels' + print*,'Please look at the previous messages to understand the requirements' + print*,'===================================================================' + print*,'' + endif + endif + + ! List of working flags + if (test_versions) then + print*,is_working(2:4) + endif + + ! IRPF90_FLAGS + if (is_working(2)) then + print*,'-DSET_MAX_ACT' + elseif (is_working(3)) then + print*,'-DSET_NESTED' + elseif (is_working(4)) then + print*,'-DSET_MAX_ACT -DSET_NESTED' + else + print*,'ERROR' + endif + +end + +subroutine check_omp_in_subroutine(accu2) + + implicit none + + integer, intent(inout) :: accu2 + + !$OMP PARALLEL + + !$OMP ATOMIC + accu2 = accu2 + 1 + !$OMP END ATOMIC + + !$OMP END PARALLEL + +end diff --git a/src/utils/test_set_multiple_levels_omp.irp.f b/src/utils/set_multiple_levels_omp.irp.f similarity index 87% rename from src/utils/test_set_multiple_levels_omp.irp.f rename to src/utils/set_multiple_levels_omp.irp.f index c4f721a1..a09f615a 100644 --- a/src/utils/test_set_multiple_levels_omp.irp.f +++ b/src/utils/set_multiple_levels_omp.irp.f @@ -1,4 +1,4 @@ -subroutine test_set_multiple_levels_omp() +subroutine set_multiple_levels_omp() ! Doc : idk From 991c198220d75cf87301479d2fff618dcfe2b8c4 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 22:53:05 +0100 Subject: [PATCH 09/14] davidson with IRPF90 flags for multiple levels omp --- src/davidson/davidson_parallel.irp.f | 3 ++- src/davidson/davidson_parallel_csf.irp.f | 4 +++- src/davidson/davidson_parallel_nos2.irp.f | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index 8fd023da..fcee16bc 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -508,7 +508,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) endif - call omp_set_max_active_levels(5) + !call omp_set_max_active_levels(5) + call set_multiple_levels_omp() !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() diff --git a/src/davidson/davidson_parallel_csf.irp.f b/src/davidson/davidson_parallel_csf.irp.f index fe651b1d..90e4303e 100644 --- a/src/davidson/davidson_parallel_csf.irp.f +++ b/src/davidson/davidson_parallel_csf.irp.f @@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - call omp_set_max_active_levels(4) + !call omp_set_max_active_levels(4) + call set_multiple_levels_omp() + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then diff --git a/src/davidson/davidson_parallel_nos2.irp.f b/src/davidson/davidson_parallel_nos2.irp.f index 84cbe3af..091b8666 100644 --- a/src/davidson/davidson_parallel_nos2.irp.f +++ b/src/davidson/davidson_parallel_nos2.irp.f @@ -464,7 +464,9 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) print *, irp_here, ': Failed in zmq_set_running' endif - call omp_set_max_active_levels(4) + !call omp_set_max_active_levels(4) + call set_multiple_levels_omp() + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then From c93938e44307021f8e7a9ae52bfc3d0c06d9323f Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 19 Nov 2021 23:53:20 +0100 Subject: [PATCH 10/14] remove test omp compilation flag --- config/ifort.cfg | 2 +- config/ifort_avx.cfg | 2 +- config/ifort_mpi.cfg | 2 +- config/ifort_rome.cfg | 2 +- config/ifort_xHost.cfg | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/config/ifort.cfg b/config/ifort.cfg index f8685bc0..714c4b10 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp -w +FC : -qopenmp IRPF90_FLAGS : --openmp diff --git a/config/ifort_avx.cfg b/config/ifort_avx.cfg index b14369d3..a2cb4c8a 100644 --- a/config/ifort_avx.cfg +++ b/config/ifort_avx.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp -w +FC : -qopenmp IRPF90_FLAGS : --openmp diff --git a/config/ifort_mpi.cfg b/config/ifort_mpi.cfg index 16be2ed2..e0d489a0 100644 --- a/config/ifort_mpi.cfg +++ b/config/ifort_mpi.cfg @@ -59,6 +59,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp -w +FC : -qopenmp IRPF90_FLAGS : --openmp diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index 9bd41096..5ed01227 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp -w +FC : -qopenmp IRPF90_FLAGS : --openmp diff --git a/config/ifort_xHost.cfg b/config/ifort_xHost.cfg index aa5bb966..ddb4aa2d 100644 --- a/config/ifort_xHost.cfg +++ b/config/ifort_xHost.cfg @@ -58,6 +58,6 @@ FCFLAGS : -xSSE2 -C -fpe0 -implicitnone ################# # [OPENMP] -FC : -qopenmp -w +FC : -qopenmp IRPF90_FLAGS : --openmp From 082b32b24f5817ea2f5e217835701a4507677ef7 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 23 Nov 2021 10:33:37 +0100 Subject: [PATCH 11/14] remove comments --- src/utils/set_multiple_levels_omp.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/set_multiple_levels_omp.irp.f b/src/utils/set_multiple_levels_omp.irp.f index a09f615a..4be3af5b 100644 --- a/src/utils/set_multiple_levels_omp.irp.f +++ b/src/utils/set_multiple_levels_omp.irp.f @@ -5,11 +5,11 @@ subroutine set_multiple_levels_omp() implicit none IRP_IF SET_MAX_ACT - print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)' + !print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)' call omp_set_max_active_levels(5) IRP_ENDIF IRP_IF SET_NESTED - print*,'SET_NESTED: True, call omp_set_nested(.True.)' + !print*,'SET_NESTED: True, call omp_set_nested(.True.)' call omp_set_nested(.True.) IRP_ENDIF From adc94fcb2958e711fb0884a747d8f81ebc3761f9 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 23 Nov 2021 14:55:37 +0100 Subject: [PATCH 12/14] script to automatically add the omp irpf90 flags --- scripts/verif_omp/update_comp.sh | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 scripts/verif_omp/update_comp.sh diff --git a/scripts/verif_omp/update_comp.sh b/scripts/verif_omp/update_comp.sh new file mode 100755 index 00000000..2199c3f6 --- /dev/null +++ b/scripts/verif_omp/update_comp.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Compiler +COMP=$1 + +# Path to file.cfg +config_PATH="../../config/" +END="*.cfg" +CONFIG="/config/" + +#LIST=${config_PATH}${COMP}${END} # without ${QP_ROOT} +LIST=${QP_ROOT}${CONFIG}${COMP}${END} + +if [ -z "$1" ] +then + echo "Give the compiler in argument" +else + + # List of the config files for the compiler + #list_files=$(ls ../../config/$comp*.cfg) #does not give the right list + list_files=${LIST} + echo "Files that will be modified:" + echo $list_files + + # Add the flags + for file in $list_files + do + echo $file + ACTUAL=$(grep "IRPF90_FLAGS : --openmp" $file) + FLAGS=$(./check_required_setup.sh $COMP) + SPACE=" " + BASE="IRPF90_FLAGS : --openmp" + NEW=${BASE}${SPACE}${FLAGS} + + sed "s/${ACTUAL}/${NEW}/" $file + # -i # to change the files + done + +fi From 7fcd03b911424770fcd833549f4e573569f83604 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 23 Nov 2021 15:00:54 +0100 Subject: [PATCH 13/14] comments --- scripts/verif_omp/check_actual_setup.sh | 2 ++ scripts/verif_omp/study_omp.sh | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/scripts/verif_omp/check_actual_setup.sh b/scripts/verif_omp/check_actual_setup.sh index f275394f..6eaa4517 100755 --- a/scripts/verif_omp/check_actual_setup.sh +++ b/scripts/verif_omp/check_actual_setup.sh @@ -1,5 +1,7 @@ #!/bin/sh +# go in qp2/src/fci to run check_omp_actual_setup +# to see if we can run in parallel an omp section in another one echo "" echo "Please wait..." echo "" diff --git a/scripts/verif_omp/study_omp.sh b/scripts/verif_omp/study_omp.sh index 1fdd7b26..00668d59 100755 --- a/scripts/verif_omp/study_omp.sh +++ b/scripts/verif_omp/study_omp.sh @@ -1,17 +1,22 @@ #!/bin/sh +# list of compilers list_comp="ifort gfortran-7 gfortran-8 gfortran-9" +# file to store the results FILE=results.dat touch $FILE rm $FILE +# Comments echo "1: omp_set_max_active_levels(5)" >> $FILE echo "2: omp_set_nested(.True.)" >> $FILE echo "3: 1 + 2" >> $FILE echo "" >> $FILE echo "1 2 3" >> $FILE + +# loop on the comp for comp in $list_comp do $comp --version > /dev/null \ @@ -20,5 +25,6 @@ do done +# Display cat $FILE From b16edd29e7959901b4676914fbcd319343f9a013 Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 25 Nov 2021 10:28:44 +0100 Subject: [PATCH 14/14] fixed error --- config/ifort.cfg | 2 +- config/ifort_debug.cfg | 2 +- config/ifort_rome.cfg | 2 +- scripts/verif_omp/update_comp.sh | 22 ++++++++++++++++------ 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/config/ifort.cfg b/config/ifort.cfg index 714c4b10..0382360a 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/config/ifort_debug.cfg b/config/ifort_debug.cfg index 9b718380..d70b1465 100644 --- a/config/ifort_debug.cfg +++ b/config/ifort_debug.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL # Global options ################ diff --git a/config/ifort_rome.cfg b/config/ifort_rome.cfg index 5ed01227..1ac78717 100644 --- a/config/ifort_rome.cfg +++ b/config/ifort_rome.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/scripts/verif_omp/update_comp.sh b/scripts/verif_omp/update_comp.sh index 2199c3f6..14b644de 100755 --- a/scripts/verif_omp/update_comp.sh +++ b/scripts/verif_omp/update_comp.sh @@ -21,17 +21,27 @@ else list_files=${LIST} echo "Files that will be modified:" echo $list_files - + + # Flags that must be added + FLAGS=$(./check_required_setup.sh $COMP) + # Add the flags for file in $list_files do echo $file - ACTUAL=$(grep "IRPF90_FLAGS : --openmp" $file) - FLAGS=$(./check_required_setup.sh $COMP) - SPACE=" " - BASE="IRPF90_FLAGS : --openmp" - NEW=${BASE}${SPACE}${FLAGS} + BASE="IRPF90_FLAGS : --ninja" + ACTUAL=$(grep "$BASE" $file) + # To have only one time each flag + grep " -DSET_MAX_ACT" $file && ${ACTUAL/" -DSET_MAX"/""} + grep " -DSET_NESTED" $file && ${ACTUAL/" -DSET_NESTED"/""} + SPACE=" " + + NEW=${ACTUAL}${SPACE}${FLAGS} + + # Debug + #echo ${NEW} + sed "s/${ACTUAL}/${NEW}/" $file # -i # to change the files done