From f7a2710f5c4f515699b0cb9a338f615b7a251301 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Oct 2016 09:46:59 +0200 Subject: [PATCH] Corrected bug in transposed collector --- src/Davidson/davidson_parallel.irp.f | 20 +++----- src/Davidson/u0Hu0.irp.f | 72 ++++++++++++++-------------- 2 files changed, 43 insertions(+), 49 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 90f2ec8f..6935256e 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -327,11 +327,6 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer , allocatable :: idx(:) double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:) double precision , allocatable :: st(:,:) - integer :: deleted - logical, allocatable :: done(:) - allocate(done(shortcut_(0,1))) - deleted = 0 - done = .false. allocate(idx(dav_size)) allocate(vt(N_states_diag, dav_size)) @@ -339,6 +334,9 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) allocate(v0t(N_states_diag, dav_size)) allocate(s0t(N_states_diag, dav_size)) + v0t = 00.d0 + s0t = 00.d0 + more = 1 do while (more == 1) @@ -346,9 +344,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) !DIR$ FORCEINLINE call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t) call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) - deleted += 1 end do - deallocate(idx,vt,st,done) + deallocate(idx,vt,st) call dtranspose(v0t,size(v0t,1), v0, size(v0,1), N_states_diag, dav_size) call dtranspose(s0t,size(s0t,1), s0, size(s0,1), N_states_diag, dav_size) @@ -391,7 +388,7 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) else if(i==1) then call davidson_miniserver_run() else - call davidson_slave_inproc(i-1) + call davidson_slave_inproc(i) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, 'davidson') @@ -442,10 +439,8 @@ subroutine davidson_miniserver_end() requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) - rc = f77_zmq_send(requester, "end", 3, ZMQ_NOBLOCK) - if (rc > 0) then - rc = f77_zmq_recv(requester, buf, 3, 0) - endif + rc = f77_zmq_send(requester, "end", 3, 0) + rc = f77_zmq_recv(requester, buf, 3, 0) rc = f77_zmq_close(requester) end subroutine @@ -471,7 +466,6 @@ subroutine davidson_miniserver_get() rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) TOUCH dav_det dav_ut - rc = f77_zmq_close(requester) end subroutine diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 8c2b373c..6b3f2782 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -262,47 +262,47 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) + !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) - allocate(vt(N_st_8,n),st(N_st_8,n)) - Vt = 0.d0 - St = 0.d0 - - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0,2) - do i=shortcut(sh,2),shortcut(sh+1,2)-1 - org_i = sort_idx(i,2) - do j=shortcut(sh,2),i-1 - org_j = sort_idx(j,2) - ext = 0 - do ni=1,Nint - ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) - end do - if(ext == 4) then - call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) - call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) - do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) - enddo - end if + allocate(vt(N_st_8,n),st(N_st_8,n)) + Vt = 0.d0 + St = 0.d0 + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0,2) + do i=shortcut(sh,2),shortcut(sh+1,2)-1 + org_i = sort_idx(i,2) + do j=shortcut(sh,2),i-1 + org_j = sort_idx(j,2) + ext = 0 + do ni=1,Nint + ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2))) end do + if(ext == 4) then + call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) + call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) + do istate=1,n_st + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) + vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) + st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + enddo + end if end do + end do + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do istate=1,N_st + do i=n,1,-1 + v_0(i,istate) = v_0(i,istate) + vt(istate,i) + s_0(i,istate) = s_0(i,istate) + st(istate,i) enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do istate=1,N_st - do i=n,1,-1 - v_0(i,istate) = v_0(i,istate) + vt(istate,i) - s_0(i,istate) = s_0(i,istate) + st(istate,i) - enddo - enddo - !$OMP END CRITICAL + enddo + !$OMP END CRITICAL - deallocate(vt,st) + deallocate(vt,st) !$OMP END PARALLEL do istate=1,N_st