diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 0b4a84d8..9b5130bc 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -11,18 +11,19 @@ subroutine davidson_process(block, N, idx, vt, st) integer , intent(in) :: block integer , intent(inout) :: N - integer , intent(inout) :: idx(N_det) - double precision , intent(inout) :: vt(N_states, N_det) - double precision , intent(inout) :: st(N_states, N_det) + integer , intent(inout) :: idx(dav_size) + double precision , intent(inout) :: vt(N_states, dav_size) + double precision , intent(inout) :: st(N_states, dav_size) integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi integer(bit_kind) :: sorted_i(N_int) double precision :: s2, hij +! print *, "processing block", block, "/", shortcut_(0,1) vt = 0d0 st = 0d0 - N = dav_size ! N_det + N = dav_size do i=1,N idx(i) = i end do @@ -68,6 +69,19 @@ subroutine davidson_process(block, N, idx, vt, st) enddo enddo enddo + + N = 0 + do i=1, dav_size + if(vt(1, i) /= 0d0 .or. st(1, i) /= 0d0) then + N = N+1 + do istate=1,N_states + vt (istate,N) = vt (istate,i) + st (istate,N) = st (istate,i) + idx(N) = i + enddo + end if + end do +! print *, "done processing block", block, "/", shortcut_(0,1) end subroutine @@ -82,8 +96,8 @@ subroutine davidson_collect(block, N, idx, vt, st , v0, s0) integer , intent(in) :: idx(N) double precision , intent(in) :: vt(N_states, N) double precision , intent(in) :: st(N_states, N) - double precision , intent(inout) :: v0(N_det, N_states) - double precision , intent(inout) :: s0(N_det, N_states) + double precision , intent(inout) :: v0(dav_size, N_states) + double precision , intent(inout) :: s0(dav_size, N_states) integer :: i @@ -101,7 +115,7 @@ subroutine davidson_init(zmq_to_qp_run_socket) integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket ! zmq_to_qp_run_socket touch dav_size dav_det dav_ut - call new_parallel_job(zmq_to_qp_run_socket,'davidson') + call new_parallel_job(zmq_to_qp_run_socket,"davidson") end subroutine @@ -166,7 +180,8 @@ subroutine davidson_run_slave(thread,iproc) end if call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) - +! print *, "done slavin'" + !call sleep(1) call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) @@ -192,11 +207,10 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) double precision , allocatable :: st(:,:) - allocate(idx(N_det)) - allocate(vt(N_states, N_det)) - allocate(st(N_states, N_det)) + allocate(idx(dav_size)) + allocate(vt(N_states, dav_size)) + allocate(st(N_states, dav_size)) - !call hobo_get() do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) @@ -205,6 +219,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) call davidson_process(block,N, idx, vt, st) + ! reverse ? call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id) end do @@ -255,9 +270,9 @@ subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id integer ,intent(out) :: task_id integer ,intent(out) :: block integer ,intent(out) :: N - integer ,intent(out) :: idx(N_det) - double precision ,intent(out) :: vt(N_states, N_det) - double precision ,intent(out) :: st(N_states, N_det) + integer ,intent(out) :: idx(dav_size) + double precision ,intent(out) :: vt(N_states, dav_size) + double precision ,intent(out) :: st(N_states, dav_size) integer :: rc @@ -266,7 +281,7 @@ subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) stop "davidson_push_results failed to pull N" - + rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0) if(rc /= 4*N) stop "davidson_push_results failed to pull idx" @@ -289,8 +304,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision ,intent(inout) :: v0(N_det, N_states) - double precision ,intent(inout) :: s0(N_det, N_states) + double precision ,intent(inout) :: v0(dav_size, N_states) + double precision ,intent(inout) :: s0(dav_size, N_states) integer :: more, task_id @@ -300,20 +315,27 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0) integer , allocatable :: idx(:) double precision , allocatable :: vt(:,:) double precision , allocatable :: st(:,:) + integer :: deleted + logical, allocatable :: done(:) + allocate(done(shortcut_(0,1))) + deleted = 0 + done = .false. - - allocate(idx(N_det)) - allocate(vt(N_states, N_det)) - allocate(st(N_states, N_det)) + allocate(idx(dav_size)) + allocate(vt(N_states, dav_size)) + allocate(st(N_states, dav_size)) more = 1 do while (more == 1) call davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id) call davidson_collect(block, N, idx, vt, st , v0, s0) - +! done(block) = .true. call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) + deleted += 1 +! print *, "DELETED", deleted, done end do +! print *, "done collector" end subroutine @@ -330,8 +352,8 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) integer :: i integer, external :: omp_get_thread_num - double precision , intent(inout) :: v0(N_det, N_states) - double precision , intent(inout) :: s0(N_det, N_states) + double precision , intent(inout) :: v0(dav_size, N_states) + double precision , intent(inout) :: s0(dav_size, N_states) call zmq_set_running(zmq_to_qp_run_socket) @@ -342,7 +364,7 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0) PROVIDE nproc - !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1) + !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(3) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! NPROC !!!!! i = omp_get_thread_num() if (i==0) then call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0) @@ -380,8 +402,8 @@ subroutine davidson_miniserver_run() rc = f77_zmq_recv(responder, buffer, 5, 0) if (buffer(1:rc) /= 'end') then rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_det, 16*N_int*N_det, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_ut, 8*N_det*N_states, 0) + rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states, 0) else rc = f77_zmq_send (responder, "end", 3, 0) exit @@ -412,6 +434,7 @@ subroutine davidson_miniserver_end() rc = f77_zmq_recv(requester, buf, 3, 0) rc = f77_zmq_close(requester) rc = f77_zmq_ctx_destroy(context) +! print *, "closed miniserver" end subroutine @@ -439,6 +462,8 @@ subroutine davidson_miniserver_get() TOUCH dav_size rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states, 0) + TOUCH dav_det dav_ut + rc = f77_zmq_close(requester) rc = f77_zmq_ctx_destroy(context) @@ -455,10 +480,6 @@ BEGIN_PROVIDER [ double precision, dav_ut, (N_states, dav_size) ] END_PROVIDER -BEGIN_PROVIDER [ double precision, ut, (N_states, N_det) ] -END_PROVIDER - - BEGIN_PROVIDER [ integer, dav_size ] END_PROVIDER diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index b4819067..418ada51 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -44,7 +44,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij double precision, allocatable :: vt(:,:) -! double precision, allocatable :: ut(:,:) + double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -197,7 +197,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 double precision, allocatable :: vt(:,:), st(:,:) - !double precision, allocatable :: ut(:,:) + double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0 @@ -222,7 +222,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) PROVIDE ref_bitmask_energy allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) - !allocate(ut(N_st_8,n)) + allocate(ut(N_st_8,n)) v_0 = 0.d0 s_0 = 0.d0 @@ -240,9 +240,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) dav_size = n dav_det = psi_det dav_ut = ut - + call davidson_init(handler) - do sh=shortcut(0,1),1,-1 call davidson_add_task(handler, sh) enddo