10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-11-04 05:03:54 +01:00

davidson locally reduces task results

This commit is contained in:
Yann Garniron 2016-10-08 19:21:53 +02:00
parent e6b528fe03
commit c3dd90e199
2 changed files with 62 additions and 62 deletions

View File

@ -4,25 +4,20 @@
use bitmasks use bitmasks
use f77_zmq use f77_zmq
subroutine davidson_process(blockb, blocke, N, idx, vt, st) subroutine davidson_process(blockb, blocke, vt, st)
use f77_zmq use f77_zmq
implicit none implicit none
integer , intent(in) :: blockb, blocke integer , intent(in) :: blockb, blocke
integer , intent(inout) :: N
integer , intent(inout) :: idx(dav_size)
double precision , intent(inout) :: vt(N_states_diag, dav_size) double precision , intent(inout) :: vt(N_states_diag, dav_size)
double precision , intent(inout) :: st(N_states_diag, dav_size) double precision , intent(inout) :: st(N_states_diag, dav_size)
integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi
integer(bit_kind) :: sorted_i(N_int) integer(bit_kind) :: sorted_i(N_int)
double precision :: s2, hij double precision :: s2, hij
logical, allocatable :: wrotten(:)
integer, external :: omp_get_thread_num integer, external :: omp_get_thread_num
allocate(wrotten(dav_size))
wrotten = .false.
provide dav_det dav_ut shortcut_ provide dav_det dav_ut shortcut_
!useless calls not to provide in the parallel section !useless calls not to provide in the parallel section
call i_h_j (dav_det(1,1,1),dav_det(1,1,dav_size),n_int,hij) call i_h_j (dav_det(1,1,1),dav_det(1,1,dav_size),n_int,hij)
@ -31,7 +26,7 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st)
do sh = blockb, blocke do sh = blockb, blocke
!$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP PARALLEL DO default(none) schedule(dynamic) &
!$OMP shared(vt, st, wrotten, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) & !$OMP shared(vt, st, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) &
!$OMP private(exa, ni, ext, org_i, org_j, sorted_i, endi, hij, s2) !$OMP private(exa, ni, ext, org_i, org_j, sorted_i, endi, hij, s2)
do sh2=1,sh do sh2=1,sh
exa = 0 exa = 0
@ -63,16 +58,6 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st)
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
!$OMP CRITICAL !$OMP CRITICAL
if(.not. wrotten(org_i)) then
wrotten(org_i) = .true.
vt (:,org_i) = 0d0
st (:,org_i) = 0d0
end if
if(.not. wrotten(org_j)) then
wrotten(org_j) = .true.
vt (:,org_j) = 0d0
st (:,org_j) = 0d0
end if
do istate=1,N_states_diag do istate=1,N_states_diag
vt (istate,org_i) += hij*dav_ut(istate,org_j) vt (istate,org_i) += hij*dav_ut(istate,org_j)
st (istate,org_i) += s2*dav_ut(istate,org_j) st (istate,org_i) += s2*dav_ut(istate,org_j)
@ -89,7 +74,7 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st)
do sh=blockb,min(blocke, shortcut_(0,2)) do sh=blockb,min(blocke, shortcut_(0,2))
!$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP PARALLEL DO default(none) schedule(dynamic) &
!$OMP shared(vt, st, wrotten, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) & !$OMP shared(vt, st, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) &
!$OMP private(exa, ni, ext, org_i, org_j, sorted_i, endi, hij, s2) !$OMP private(exa, ni, ext, org_i, org_j, sorted_i, endi, hij, s2)
do sh2=sh, shortcut_(0,2), shortcut_(0,1) do sh2=sh, shortcut_(0,2), shortcut_(0,1)
do i=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 do i=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
@ -104,16 +89,6 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st)
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
!$OMP CRITICAL !$OMP CRITICAL
if(.not. wrotten(org_i)) then
wrotten(org_i) = .true.
vt (:,org_i) = 0d0
st (:,org_i) = 0d0
end if
if(.not. wrotten(org_j)) then
wrotten(org_j) = .true.
vt (:,org_j) = 0d0
st (:,org_j) = 0d0
end if
do istate=1,N_states_diag do istate=1,N_states_diag
vt (istate,org_i) = vt (istate,org_i) + hij*dav_ut(istate,org_j) vt (istate,org_i) = vt (istate,org_i) + hij*dav_ut(istate,org_j)
vt (istate,org_j) = vt (istate,org_j) + hij*dav_ut(istate,org_i) vt (istate,org_j) = vt (istate,org_j) + hij*dav_ut(istate,org_i)
@ -127,18 +102,6 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st)
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
enddo enddo
N = 0
do i=1, dav_size
if(wrotten(i)) then
N = N+1
do istate=1,N_states_diag
vt (istate,N) = vt (istate,i)
st (istate,N) = st (istate,i)
idx(N) = i
enddo
end if
end do
end subroutine end subroutine
@ -262,7 +225,8 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),intent(in) :: zmq_socket_push integer(ZMQ_PTR),intent(in) :: zmq_socket_push
integer,intent(in) :: worker_id integer,intent(in) :: worker_id
integer :: task_id integer :: i, taskn, myTask, istate
integer, allocatable :: task_id(:)
character*(512) :: task character*(512) :: task
@ -272,32 +236,59 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
double precision , allocatable :: vt(:,:) double precision , allocatable :: vt(:,:)
double precision , allocatable :: st(:,:) double precision , allocatable :: st(:,:)
allocate(task_id(100))
allocate(idx(dav_size)) allocate(idx(dav_size))
allocate(vt(N_states_diag, dav_size)) allocate(vt(N_states_diag, dav_size))
allocate(st(N_states_diag, dav_size)) allocate(st(N_states_diag, dav_size))
vt = 0d0
st = 0d0
taskn = 0
do do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, myTask, task)
if(task_id == 0) exit if(myTask /= 0) then
read (task,*) blockb, blocke read (task,*) blockb, blocke
call davidson_process(blockb, blocke, vt, st)
taskn += 1
task_id(taskn) = myTask
end if
call davidson_process(blockb, blocke, N, idx, vt, st)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) if(myTask == 0 .or. taskn == size(task_id)) then
call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id) N = 0
do i=1, dav_size
if(vt(1,i) /= 0d0 .or. st(1,i) /= 0d0) then
N = N+1
do istate=1,N_states_diag
vt (istate,N) = vt (istate,i)
st (istate,N) = st (istate,i)
idx(N) = i
enddo
end if
end do
do i = 1, taskn
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i))
end do
if(taskn /= 0) call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, taskn, task_id)
if(myTask == 0) exit
vt = 0d0
st = 0d0
taskn = 0
end if
end do end do
end subroutine end subroutine
subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id) subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, taskn, task_id)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
integer ,intent(in) :: task_id integer ,intent(in) :: task_id(100), taskn
integer ,intent(in) :: blockb, blocke integer ,intent(in) :: blockb, blocke
integer ,intent(in) :: N integer ,intent(in) :: N
@ -324,18 +315,21 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st
rc = f77_zmq_send( zmq_socket_push, st, 8*N_states_diag* N, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, st, 8*N_states_diag* N, ZMQ_SNDMORE)
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push st" if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push st"
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) rc = f77_zmq_send( zmq_socket_push, taskn, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "davidson_push_results failed to push task_id" if(rc /= 4) stop "davidson_push_results failed to push taskn"
rc = f77_zmq_send( zmq_socket_push, task_id, 4*taskn, 0)
if(rc /= 4*taskn) stop "davidson_push_results failed to push task_id"
end subroutine end subroutine
subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id) subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, taskn, task_id)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
integer ,intent(out) :: task_id integer ,intent(out) :: task_id(100), taskn
integer ,intent(out) :: blockb, blocke integer ,intent(out) :: blockb, blocke
integer ,intent(out) :: N integer ,intent(out) :: N
integer ,intent(out) :: idx(dav_size) integer ,intent(out) :: idx(dav_size)
@ -362,8 +356,11 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st
rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states_diag* N, 0) rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states_diag* N, 0)
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull st" if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull st"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, taskn, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id" if(rc /= 4) stop "davidson_pull_results failed to pull taskn"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4*taskn, 0)
if(rc /= 4*taskn) stop "davidson_pull_results failed to pull task_id"
end subroutine end subroutine
@ -378,7 +375,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0)
double precision ,intent(inout) :: v0(dav_size, N_states_diag) double precision ,intent(inout) :: v0(dav_size, N_states_diag)
double precision ,intent(inout) :: s0(dav_size, N_states_diag) double precision ,intent(inout) :: s0(dav_size, N_states_diag)
integer :: more, task_id integer :: more, task_id(100), taskn
integer :: blockb, blocke integer :: blockb, blocke
@ -387,6 +384,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0)
double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:) double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:)
double precision , allocatable :: st(:,:) double precision , allocatable :: st(:,:)
integer :: i
allocate(idx(dav_size)) allocate(idx(dav_size))
allocate(vt(N_states_diag, dav_size)) allocate(vt(N_states_diag, dav_size))
allocate(st(N_states_diag, dav_size)) allocate(st(N_states_diag, dav_size))
@ -399,10 +398,13 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0)
more = 1 more = 1
do while (more == 1) do while (more == 1)
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id) call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, taskn, task_id)
!DIR$ FORCEINLINE !DIR$ FORCEINLINE
call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t) call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) do i=1,taskn
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
end do
end do end do
deallocate(idx,vt,st) deallocate(idx,vt,st)

View File

@ -240,21 +240,19 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
touch dav_size touch dav_size
dav_det = psi_det dav_det = psi_det
dav_ut = ut dav_ut = ut
workload = 0 workload = 0
blockb = shortcut(0,1) blockb = shortcut(0,1)
blocke = blockb blocke = blockb
call davidson_init(handler) call davidson_init(handler)
do sh=shortcut(0,1),1,-1 do sh=shortcut(0,1),1,-1
workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
if(workload > 100000) then if(workload > 1000) then
blocke = sh blocke = sh
call davidson_add_task(handler, blocke, blockb) call davidson_add_task(handler, blocke, blockb)
blockb = sh-1 blockb = sh-1
workload = 0 workload = 0
end if end if
enddo enddo
if(blockb > 0) call davidson_add_task(handler, 1, blockb) if(blockb > 0) call davidson_add_task(handler, 1, blockb)
call davidson_run(handler, v_0, s_0) call davidson_run(handler, v_0, s_0)