From 2a54a2e4493bd0dd58a5f21343cabf68125aac17 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 5 Oct 2018 10:15:53 +0200 Subject: [PATCH] Improved sort --- src/Davidson/davidson_parallel.irp.f | 14 +++++------ src/Determinants/determinants.irp.f | 2 ++ src/Determinants/spindeterminants.irp.f | 31 ++++++++++++++++++++++--- src/Utils/sort.irp.f | 12 +++++++++- 4 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 5d150bb3..5ac05af5 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -323,7 +323,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) integer :: istep, imin, imax, ishift, ipos integer, external :: add_task_to_taskserver - integer, parameter :: tasksize=10000 + integer, parameter :: tasksize=20000 character*(100000) :: task istep=1 ishift=0 @@ -331,7 +331,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) ipos=1 - do imin=1,N_det,10000 + do imin=1,N_det,tasksize imax = min(N_det,imin-1+tasksize) do ishift=0,istep-1 write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|' @@ -352,12 +352,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) ipos=1 endif - integer, external :: zmq_set_running - if (zmq_set_running(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Failed in zmq_set_running' - endif - - allocate(u_t(N_st,N_det)) do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) @@ -396,6 +390,10 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) deallocate(u_t) + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif v_0 = 0.d0 s_0 = 0.d0 diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index 8d53d873..3d1d9879 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -336,8 +336,10 @@ end subroutine ! function. END_DOC +print *, '==> ', irp_here call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, & psi_det_sorted_bit, psi_coef_sorted_bit, N_states) +print *, ' <== ', irp_here END_PROVIDER subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out, N_st) diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 29d0eb30..90fa24bc 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -36,6 +36,7 @@ BEGIN_PROVIDER [ integer(bit_kind), psi_det_alpha, (N_int,psi_det_size) ] psi_det_alpha(k,i) = psi_det(k,1,i) enddo enddo + END_PROVIDER BEGIN_PROVIDER [ integer(bit_kind), psi_det_beta, (N_int,psi_det_size) ] @@ -70,13 +71,20 @@ BEGIN_TEMPLATE logical,allocatable :: duplicate(:) allocate ( iorder(N_det), bit_tmp(N_det), duplicate(N_det) ) - + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) do i=1,N_det iorder(i) = i bit_tmp(i) = spin_det_search_key(psi_det_$alpha(1,i),N_int) enddo + !$OMP END PARALLEL DO + +double precision :: w0,w1 +call wall_time(w0) call i8sort(bit_tmp,iorder,N_det) +call wall_time(w1) +print *, '==> ', irp_here, w1-w0 N_det_$alpha_unique = 0 last_key = 0_8 @@ -126,6 +134,7 @@ BEGIN_TEMPLATE N_det_$alpha_unique = j deallocate (iorder, bit_tmp, duplicate) + END_PROVIDER SUBST [ alpha ] @@ -430,11 +439,19 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) enddo !$OMP END PARALLEL DO call i8sort(to_sort, psi_bilinear_matrix_order, N_det) + !$OMP PARALLEL + !$OMP SINGLE call iset_order(psi_bilinear_matrix_rows,psi_bilinear_matrix_order,N_det) + !$OMP END SINGLE + !$OMP SINGLE call iset_order(psi_bilinear_matrix_columns,psi_bilinear_matrix_order,N_det) + !$OMP END SINGLE + !$OMP DO do l=1,N_states call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det) enddo + !$OMP END DO + !$OMP END PARALLEL deallocate(to_sort) ASSERT (minval(psi_bilinear_matrix_rows) == 1) ASSERT (minval(psi_bilinear_matrix_columns) == 1) @@ -442,6 +459,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) ASSERT (maxval(psi_bilinear_matrix_rows) == N_det_alpha_unique) ASSERT (maxval(psi_bilinear_matrix_columns) == N_det_beta_unique) ASSERT (maxval(psi_bilinear_matrix_order) == N_det) + END_PROVIDER @@ -477,6 +495,7 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1) l = psi_bilinear_matrix_columns(1) psi_bilinear_matrix_columns_loc(l) = 1 + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k,l) do k=2,N_det if (psi_bilinear_matrix_columns(k) == psi_bilinear_matrix_columns(k-1)) then cycle @@ -488,9 +507,11 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1) stop '(psi_bilinear_matrix_columns(k) < 1)' endif enddo + !$OMP END PARALLEL DO psi_bilinear_matrix_columns_loc(N_det_beta_unique+1) = N_det+1 ASSERT (minval(psi_bilinear_matrix_columns_loc) == 1) ASSERT (maxval(psi_bilinear_matrix_columns_loc) == N_det+1) + END_PROVIDER BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ] @@ -508,7 +529,6 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_ END_DOC integer :: i,j,k,l - PROVIDE psi_coef_sorted_bit integer*8, allocatable :: to_sort(:) @@ -542,9 +562,11 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_ call i8radix_sort(to_sort, psi_bilinear_matrix_transp_order, N_det,-1) call iset_order(psi_bilinear_matrix_transp_rows,psi_bilinear_matrix_transp_order,N_det) call iset_order(psi_bilinear_matrix_transp_columns,psi_bilinear_matrix_transp_order,N_det) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(l) do l=1,N_states call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det) enddo + !$OMP END PARALLEL DO deallocate(to_sort) ASSERT (minval(psi_bilinear_matrix_transp_columns) == 1) ASSERT (minval(psi_bilinear_matrix_transp_rows) == 1) @@ -552,6 +574,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_ ASSERT (maxval(psi_bilinear_matrix_transp_columns) == N_det_beta_unique) ASSERT (maxval(psi_bilinear_matrix_transp_rows) == N_det_alpha_unique) ASSERT (maxval(psi_bilinear_matrix_transp_order) == N_det) + END_PROVIDER BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_unique+1) ] @@ -564,6 +587,7 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_uniq l = psi_bilinear_matrix_transp_rows(1) psi_bilinear_matrix_transp_rows_loc(l) = 1 + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k,l) do k=2,N_det if (psi_bilinear_matrix_transp_rows(k) == psi_bilinear_matrix_transp_rows(k-1)) then cycle @@ -572,9 +596,11 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_uniq psi_bilinear_matrix_transp_rows_loc(l) = k endif enddo + !$OMP END PARALLEL DO psi_bilinear_matrix_transp_rows_loc(N_det_alpha_unique+1) = N_det+1 ASSERT (minval(psi_bilinear_matrix_transp_rows_loc) == 1) ASSERT (maxval(psi_bilinear_matrix_transp_rows_loc) == N_det+1) + END_PROVIDER BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ] @@ -584,7 +610,6 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ] ! Order which allows to go from psi_bilinear_matrix_order_transp to psi_bilinear_matrix END_DOC integer :: k - psi_bilinear_matrix_order_transp_reverse = -1 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k) do k=1,N_det diff --git a/src/Utils/sort.irp.f b/src/Utils/sort.irp.f index 085d3d35..bf34d3bd 100644 --- a/src/Utils/sort.irp.f +++ b/src/Utils/sort.irp.f @@ -37,7 +37,11 @@ BEGIN_TEMPLATE integer,intent(in) :: isize $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) + !$OMP PARALLEL + !$OMP SINGLE call rec_$X_quicksort(x,iorder,isize,1,isize) + !$OMP END SINGLE + !$OMP END PARALLEL end recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last) @@ -70,11 +74,16 @@ BEGIN_TEMPLATE j=j-1 enddo if (first < i-1) then + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(isize,first,i) if (i-first > 100000) call rec_$X_quicksort(x, iorder, isize, first, i-1) + !$OMP END TASK endif if (j+1 < last) then + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(isize,last,j) if (last-j > 100000) call rec_$X_quicksort(x, iorder, isize, j+1, last) + !$OMP END TASK endif + !$OMP TASKWAIT end subroutine heap_$Xsort(x,iorder,isize) @@ -281,7 +290,8 @@ BEGIN_TEMPLATE $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) integer :: n - call $Xradix_sort(x,iorder,isize,-1) +! call $Xradix_sort(x,iorder,isize,-1) + call quick_$Xsort(x,iorder,isize) end subroutine $Xsort SUBST [ X, type ]