From 9afc82c878b67a44f8875b51ba1e86aa2dd1fbf1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Feb 2017 22:43:59 +0100 Subject: [PATCH] Less pressure on qp_run when ading tasks --- src/Davidson/davidson_parallel.irp.f | 23 +++++-------------- src/Davidson/u0Hu0.irp.f | 30 ++++++++++++++++++++----- src/Determinants/H_apply_zmq.template.f | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 5387ff5b..4ff3af03 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -28,8 +28,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) ii=0 sh = blockb do sh2=1,shortcut_(0,1) - exa = 0 - do ni=1,N_int + exa = popcnt(xor(version_(1,sh,1), version_(1,sh2,1))) + do ni=2,N_int exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1))) end do if(exa > 2) cycle @@ -44,8 +44,9 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1 if(i == j) cycle - ext = exa - do ni=1,N_int + ext = exa + popcnt(xor(sorted_i(1), sorted_(1,j,1))) + if(ext > 4) cycle + do ni=2,N_int ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) if(ext > 4) exit end do @@ -176,20 +177,6 @@ end subroutine -subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep) - use f77_zmq - implicit none - - integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket - integer ,intent(in) :: blockb, blockb2, istep - character*(512) :: task - - write(task,'(3(I9,X))') blockb, blockb2, istep - call add_task_to_taskserver(zmq_to_qp_run_socket, task) -end subroutine - - - subroutine davidson_slave_inproc(i) implicit none integer, intent(in) :: i diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 42e61b3a..2d1095cd 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -326,10 +326,20 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ PROVIDE nproc + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, & - !$OMP workload,istep,blockb2) + !$OMP workload,istep,blockb2,task,ipos,iposmax,send) ithread = omp_get_thread_num() if (ithread == 0 ) then + character(len=:), allocatable :: task + character(32) :: tmp_task + integer :: ipos, iposmax + logical :: send + iposmax = shortcut_(0,1)+32 + send = .False. + allocate(character(len=iposmax) :: task) + task = '' + ipos = 1 do sh=1,shortcut_(0,1),1 workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2 do i=sh, shortcut_(0,2), shortcut_(0,1) @@ -339,13 +349,23 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ end do istep = 1+ int(workload*target_workload_inv) do blockb2=0, istep-1 - call davidson_add_task(handler, sh, blockb2, istep) + write(tmp_task,'(3(I9,X),''|'',X)') sh, blockb2, istep + task = task//tmp_task + ipos += 32 + if (ipos+32 < iposmax) then + send = .True. + else + call add_task_to_taskserver(handler, trim(task)) + ipos=1 + task = '' + send = .False. + endif enddo - if (sh == shortcut_(0,1)/10 + 1) then - !$OMP BARRIER - endif enddo + if (send) call add_task_to_taskserver(handler, trim(task)) + deallocate(task) call zmq_set_running(handler) + !$OMP BARRIER call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then !$OMP BARRIER diff --git a/src/Determinants/H_apply_zmq.template.f b/src/Determinants/H_apply_zmq.template.f index 59544b79..ddedc5a2 100644 --- a/src/Determinants/H_apply_zmq.template.f +++ b/src/Determinants/H_apply_zmq.template.f @@ -38,7 +38,7 @@ subroutine $subroutine($params_main) do i_generator=1,N_det_generators $skip write(task,*) i_generator - call add_task_to_taskserver(zmq_to_qp_run_socket,task) + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) enddo allocate ( pt2_generators(N_states,N_det_generators), &