diff --git a/config/ifort.cfg b/config/ifort.cfg index 63c4a5d3..714c4b10 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -9,7 +9,7 @@ FC : ifort -fpic LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps IRPF90 : irpf90 -IRPF90_FLAGS : --ninja --align=32 -DINTEL +IRPF90_FLAGS : --ninja --align=32 -DINTEL # Global options ################ diff --git a/src/fci/check_omp.irp.f b/src/fci/check_omp.irp.f new file mode 100644 index 00000000..ffc113d6 --- /dev/null +++ b/src/fci/check_omp.irp.f @@ -0,0 +1,242 @@ +program check_omp + + use omp_lib + + implicit none + + integer :: i,j,k,l,m,n,x,z,setting + double precision :: w1,w2,c1,c2 + double precision, allocatable :: accu(:,:,:,:) + logical :: must_exit, verbose, is_working + + x = 4 + allocate(accu(x,x,x,x)) + + verbose = .False. + + accu = 0d0 + must_exit = .False. + + !$OMP PARALLEL + if (OMP_GET_NUM_THREADS() == 1) then + print*,'' + print*,'1 thread, no parallelization possible' + print*,'' + must_exit=.True. + endif + !$OMP END PARALLEL + if (must_exit) then + call abort + endif + + ! reset the number of max active levels + !call omp_set_max_active_levels(1) + + !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() + !call intel_check_omp() + !print*,'omp_get_max_active_levels:',omp_get_max_active_levels() + + ! set the number of threads + call omp_set_num_threads(2) + + do z = 1, 4 + + if (must_exit) then + exit + endif + + call omp_set_max_active_levels(1) + call omp_set_nested(.False.) + + if (z==1) then + call test_set_multiple_levels_omp() + !call test_set_multiple_levels_omp + elseif (z==2) then + call omp_set_max_active_levels(5) + elseif (z==3) then + call omp_set_nested(.True.) + else + call omp_set_nested(.True.) + call omp_set_max_active_levels(5) + endif + + setting = z-1 + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 1:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1) then + print*,'Setting',setting,"error at level 1" + setting = -1 + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l) + 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 2:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1 .and. setting >= 0) then + print*,'Setting',setting,"error at level 2" + setting = -1 + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 3:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1 .and. setting >= 0) then + print*,'Setting',setting,"error at level 3" + setting = -1 + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,k,l,m,n) & + !$OMP SHARED(accu) + + if (verbose) then + print*,'Nb threads level 4:', omp_get_num_threads() + endif + + !$OMP MASTER + if (omp_get_num_threads()==1 .and. setting >= 0) then + print*,'Setting',setting,"error at level 4" + elseif(omp_get_num_threads()==1 .or. setting == 0) then + else + must_exit = .True. + endif + + if ( z == 1 .and. setting == 0) then + is_working = .True. + elseif (z == 1 .and. setting == -1) then + is_working = .False. + else + endif + !$OMP END MASTER + + ! !$OMP DO + ! do l = 1, x + ! do k = 1, x + ! do j = 1, x + ! do i = 1, x + ! accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + !$OMP END PARALLEL + + enddo + + print*,'' + + if (setting == 1) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + elseif (setting == 2) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'' + print*,'Please use the irpf90 flag -DSET_NESTED in qp2/config/${compiler_name}.cfg' + elseif (setting == 3) then + print*,'The parallelization works on 4 levels with:' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Please use the irpf90 flags -DSET_NESTED -DSET_MAX_ACT in qp2/config/${compiler_name}.cfg' + else + print*,'The parallelization on multiple levels does not work with:' + print*,'call omp_set_max_active_levels(5)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'or' + print*,'call omp_set_nested(.True.)' + print*,'+' + print*,'call omp_set_max_active_levels(5)' + print*,'' + print*,'Good luck...' + endif + + if (is_working) then + print*,'' + print*,'==========================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'==========================================================' + print*,'' + else + print*,'' + print*,'===================================================================' + print*,'Your actual set up works for parallelization with 4 levels' + print*,'Please look at the previous messages to understand the requirements' + print*,'If it does not work even with the right irpf90 flags, clean and' + print*,'recompile your code at ${QP_ROOT}' + print*,'===================================================================' + print*,'' + endif + +end + diff --git a/src/fci/test_intel_check_omp.irp.f b/src/fci/test_intel_check_omp.irp.f deleted file mode 100644 index 8a4711b3..00000000 --- a/src/fci/test_intel_check_omp.irp.f +++ /dev/null @@ -1,115 +0,0 @@ -program test_intel_check_omp - - use omp_lib - - implicit none - - integer :: i,j,k,l,m,n,x - double precision :: w1,w2,c1,c2 - double precision, allocatable :: accu(:,:,:,:) - - x = 4 - allocate(accu(x,x,x,x)) - - accu = 0d0 - - !$OMP PARALLEL - print*, 'Hello1 from:', OMP_GET_THREAD_NUM() - !$OMP END PARALLEL - - print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - call intel_check_omp() - print*,'omp_get_max_active_levels:',omp_get_max_active_levels() - - !call omp_set_max_active_levels(20000) - - !$OMP PARALLEL - print*, 'Hello2 from:', OMP_GET_THREAD_NUM() - !$OMP END PARALLEL - - call wall_time(w1) - call cpu_time(c1) - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 1',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l) + 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 2',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 3',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP PARALLEL & - !$OMP PRIVATE(i,j,k,l,m,n) & - !$OMP SHARED(accu) - - print*,'level 4',omp_get_num_threads() - !$OMP DO - do l = 1, x - do k = 1, x - do j = 1, x - do i = 1, x - accu(i,j,k,l) = accu(i,j,k,l)+ 1d0 - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - !$OMP END PARALLEL - - call wall_time(w2) - call cpu_time(c2) - - print*,accu(1,1,1,1) - print*,'wall time:', w2-w1 - print*,'cpu time:', c2-c1 - print*,'ration',(c2-c1)/(w2-w1) -end diff --git a/src/utils/intel_check_omp.irp.f b/src/utils/intel_check_omp.irp.f deleted file mode 100644 index af86b131..00000000 --- a/src/utils/intel_check_omp.irp.f +++ /dev/null @@ -1,20 +0,0 @@ -subroutine intel_check_omp() - -! Doc : idk - - implicit none - - IRP_IF INTEL2021_CHECK_OMP - call omp_set_max_active_levels(5) - print*,'INTEL2021_CHECK_OMP: true' - IRP_ENDIF - IRP_IF INTEL2019_CHECK_OMP - call omp_set_nested(.True.) - print*,'INTEL2019_CHECK_OMP: true' - IRP_ENDIF - IRP_IF GNU_CHECK_OMP - call omp_set_nested(.True.) - print*,'GNU_CHECK_OMP: true' - IRP_ENDIF - -end diff --git a/src/utils/test_set_multiple_levels_omp.irp.f b/src/utils/test_set_multiple_levels_omp.irp.f new file mode 100644 index 00000000..c4f721a1 --- /dev/null +++ b/src/utils/test_set_multiple_levels_omp.irp.f @@ -0,0 +1,16 @@ +subroutine test_set_multiple_levels_omp() + +! Doc : idk + + implicit none + + IRP_IF SET_MAX_ACT + print*,'SET_MAX_ACT: True, call omp_set_max_active_levels(5)' + call omp_set_max_active_levels(5) + IRP_ENDIF + IRP_IF SET_NESTED + print*,'SET_NESTED: True, call omp_set_nested(.True.)' + call omp_set_nested(.True.) + IRP_ENDIF + +end