10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-07-03 18:05:59 +02:00

per checkpoint dressing communication - buggy

This commit is contained in:
Yann Garniron 2018-05-01 13:16:10 +02:00
parent f61661a832
commit c14fe5b99f
3 changed files with 277 additions and 363 deletions

View File

@ -54,12 +54,12 @@ subroutine run_wf
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call dress_slave_tcp(i+1, energy)
!$OMP END PARALLEL
!!$OMP PARALLEL PRIVATE(i)
!i = omp_get_thread_num()
! call dress_slave_tcp(i+1, energy)
call dress_slave_tcp(0, energy)
!!$OMP END PARALLEL
print *, 'dress done'
endif
end do

View File

@ -66,7 +66,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error)
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer :: ipos, sz
integer :: block(8), block_i, cur_tooth_reduce, ntas
integer :: block(1), block_i, cur_tooth_reduce, ntas
logical :: flushme
block = 0
block_i = 0
@ -176,8 +176,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
double precision, intent(out) :: delta(N_states, N_det)
double precision, intent(out) :: delta_s2(N_states, N_det)
double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:)
real, allocatable :: delta_loc4(:,:,:)
double precision, allocatable :: delta_loc(:,:,:)
double precision, allocatable :: dress_detail(:,:)
double precision :: dress_mwen(N_states)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
@ -189,62 +188,42 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
integer :: i, j, k, i_state, N
integer :: task_id, ind
double precision, save :: time0 = -1.d0
double precision :: time, timeLast, old_tooth
double precision :: time
double precision, external :: omp_get_wtime
integer :: cur_cp, old_cur_cp
integer, allocatable :: parts_to_get(:)
logical, allocatable :: actually_computed(:)
integer :: total_computed
integer :: cur_cp
integer :: delta_loc_cur, is, N_buf(3)
double precision :: fac , wei
integer, allocatable :: int_buf(:)
integer, allocatable :: int_buf(:), agreg_for_cp(:)
double precision, allocatable :: double_buf(:)
integer(bit_kind), allocatable :: det_buf(:,:,:)
integer, external :: zmq_delete_tasks
allocate(agreg_for_cp(N_cp))
agreg_for_cp = 0
allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer))
delta_loc_cur = 1
delta = 0d0
delta_s2 = 0d0
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det))
allocate(delta_loc(N_states, N_det, 2))
allocate(delta_loc4(N_states, N_det, 2))
dress_detail = 0d0
delta_det = 0d0
dress_detail = -1000d0
cp = 0d0
total_computed = 0
character*(512) :: task
allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators))
parts_to_get(:) = 1
if(fragment_first > 0) then
do i=1,fragment_first
parts_to_get(i) = fragment_count
enddo
endif
actually_computed = .false.
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
more = 1
if (time0 < 0.d0) then
call wall_time(time0)
endif
timeLast = time0
cur_cp = 0
old_cur_cp = 0
logical :: loop, last, floop
integer, allocatable :: sparse(:)
allocate(sparse(0:N_det))
logical :: loop, floop
integer :: finalcp
finalcp = N_cp*2
floop = .true.
loop = .true.
pullLoop : do while (loop)
call pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen)
call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf)
call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen)
if(floop) then
call wall_time(time)
print *, "FIRST PULL", time-time0
@ -252,100 +231,34 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
end if
integer, external :: zmq_delete_tasks
if(last) then
if(cur_cp == -1) then
call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf)
if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then
stop 'Unable to delete tasks'
stop 'Unable to delete tasks'
endif
if(more == 0) loop = .false.
end if
!if(more == 0) stop 'loop = .false.' !!!!!!!!!!!!!!!!
dress_detail(:, ind) = dress_mwen(:)
else if(cur_cp > 0) then
!dress_mwen = 0d0
if(ind == 0) cycle
!do i_state=1,N_states
! do i=1,sparse(0)
! dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(sparse(i), i_state)
! end do
!end do
dress_detail(:, ind) += dress_mwen(:)
wei = dress_weight_inv(ind)
do j=1,N_cp !! optimizable
fac = 0d0
!fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step
fac = cps(ind, j) * wei * comb_step
if(fac /= 0) then
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is)
do i=1,sparse(0)
do is=1,N_states
cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac
end do
end do
!$OMP END PARALLEL DO
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is)
do i=1,sparse(0)
do is=1,N_states
cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac
end do
end do
!$OMP END PARALLEL DO
end if
end do
! do i=1,delta_loc_cur
logical :: fracted
integer :: toothMwen
toothMwen = tooth_of_det(ind)
fracted = (toothMwen /= 0)
if(fracted) fracted = (ind == first_det_of_teeth(toothMwen))
if(fracted .and. .false.) then
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i)
do i=1,sparse(0)
delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen))
delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen))
delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen))
delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen))
end do
!$OMP END PARALLEL DO
else if(.false.) then
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i)
do i=1,sparse(0)
delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1)
delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2)
end do
!$OMP END PARALLEL DO
end if
parts_to_get(ind) -= 1
if(parts_to_get(ind) == 0) then
actually_computed(ind) = .true.
total_computed += 1
end if
!end do
time = omp_get_wtime()
if((time - timeLast > 5d0) .or. (.not. loop)) then
timeLast = time
cur_cp = N_cp
do i=1,N_det_generators
if(.not. actually_computed(dress_jobs(i))) then
if(i /= 1) then
cur_cp = done_cp_at(i-1)
else
cur_cp = 0
end if
exit
end if
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
do i=1,N_det
cp(:,i,cur_cp,1) += delta_loc(:,i,1)
end do
if(cur_cp == 0 .or. (cur_cp == old_cur_cp .and. total_computed /= N_det_generators)) cycle pullLoop
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
do i=1,N_det
cp(:,i,cur_cp,2) += delta_loc(:,i,2)
end do
agreg_for_cp(cur_cp) += ind
if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then
stop "too much results..."
end if
if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle
print *, "FINISHED CP", cur_cp
double precision :: su, su2, eqt, avg, E0, val
integer, external :: zmq_abort
@ -359,6 +272,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
su += val
su2 += val*val
end do
avg = su / cps_N(cur_cp)
eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) )
E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1))
@ -366,47 +280,29 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp)))
end if
call wall_time(time)
if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then
! Termination
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
print *, "TERMINATE"
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
call sleep(1)
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Error in sending abort signal (2)'
endif
endif
else
if (cur_cp > old_cur_cp) then
old_cur_cp = cur_cp
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
endif
!exit pullLoop
endif
end if
end do pullLoop
print *, "exited"
delta (1:N_states,1:N_det) = 0d0
delta_s2(1:N_states,1:N_det) = 0d0
if(total_computed == N_det_generators) then
do i=comb_teeth+1,0,-1
delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1)
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2)
end do
else
do i=1,cur_cp
delta (1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,1)
delta_s2(1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,2)
end do
delta (1:N_states,1:N_det) = delta(1:N_states,1:N_det) / cps_N(cur_cp)
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) / cps_N(cur_cp)
do i=cp_first_tooth(cur_cp)-1,0,-1
delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1)
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2)
end do
delta(:,:) = cp(:,:,cur_cp,1)
delta_s2(:,:) = cp(:,:,cur_cp,2)
end if
dress(istate) = E(istate)+E0
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
end subroutine
@ -458,6 +354,8 @@ END_PROVIDER
&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ]
&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ]
&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ]
&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ]
&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ]
&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ]
&BEGIN_PROVIDER [ integer, N_dress_jobs ]
&BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ]
@ -486,6 +384,8 @@ END_PROVIDER
cps = 0d0
cur_cp = 1
done_cp_at = 0
done_cp_at_det = 0
needed_by_cp = 0
comp_filler = .false.
computed = .false.
cps_N = 1d0
@ -506,6 +406,7 @@ END_PROVIDER
end do
l=first_det_of_comb
call random_seed(put=(/321,654,65,321,65/))
call RANDOM_NUMBER(comb)
lfiller = 1
nfiller = 1
@ -574,6 +475,8 @@ END_PROVIDER
do i=1,N_dress_jobs
if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i)
done_cp_at(i) = cur_cp
done_cp_at_det(dress_jobs(i)) = cur_cp
needed_by_cp(cur_cp) += 1
end do
@ -625,7 +528,7 @@ END_PROVIDER
end do
do i=1,N_cp-1
call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i))
call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1)
end do
do i=1,N_det_generators

View File

@ -9,13 +9,13 @@ BEGIN_PROVIDER [ integer, fragment_count ]
END_PROVIDER
subroutine run_dress_slave(thread,iproc,energy)
subroutine run_dress_slave(thread,iproce,energy)
use f77_zmq
implicit none
double precision, intent(in) :: energy(N_states_diag)
integer, intent(in) :: thread, iproc
integer :: rc, i, subset, i_generator(60)
integer, intent(in) :: thread, iproce
integer :: rc, i, subset, i_generator
integer :: worker_id, task_id, ctask, ltask
character*(5120) :: task
@ -41,13 +41,24 @@ subroutine run_dress_slave(thread,iproc,energy)
integer(bit_kind), allocatable :: det_buf(:,:,:)
integer :: N_buf(3)
logical :: last
integer, external :: omp_get_thread_num
double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:)
integer :: toothMwen
logical :: fracted
double precision :: fac
if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
allocate(cp(N_states, N_det, N_cp, 2))
delta_det = 0d9
cp = 0d0
task(:) = CHAR(0)
allocate(int_buf(N_dress_int_buffer))
allocate(double_buf(N_dress_double_buffer))
allocate(det_buf(N_int, 2, N_dress_det_buffer))
allocate(delta_ij_loc(N_states,N_det,2))
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_push = new_zmq_push_socket(thread)
@ -61,48 +72,139 @@ subroutine run_dress_slave(thread,iproc,energy)
do i=1,N_states
div(i) = psi_coef(dressed_column_idx(i), i)
end do
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
if(task_id /= 0) then
task = trim(task)//' 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
i_generator = 0
read (task,*) subset, i_generator
if(i_generator(size(i_generator)) /= 0) stop "i_generator buffer too small"
delta_ij_loc = 0d0
i=1
do while(i_generator(i) /= 0)
call generator_start(i_generator(i), iproc)
call alpha_callback(delta_ij_loc, i_generator(i), subset, iproc)
call generator_done(i_generator(i), int_buf, double_buf, det_buf, N_buf, iproc)
last = (i_generator(i+1) == 0)
call push_dress_results(zmq_socket_push, i_generator(i), last, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id)
i += 1
integer :: iproc, cur_cp, done_for(0:N_cp)
integer, allocatable :: tasks(:)
logical :: loop, donedone
integer :: res_task(Nproc), res_gen(Nproc), res_sub(Nproc)
res_gen = 0
donedone = .false.
allocate(tasks(0:N_det))
done_for = 0
do cur_cp=0, N_cp
if(donedone) exit
print *, "DOING CP", cur_cp
tasks(0) = 0
!$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) &
!$OMP PRIVATE(toothMwen, fracted, fac) &
!$OMP PRIVATE(loop, i_generator, subset, iproc, N_buf)
iproc = omp_get_thread_num()+1
loop = .true.
allocate(int_buf(N_dress_int_buffer))
allocate(double_buf(N_dress_double_buffer))
allocate(det_buf(N_int, 2, N_dress_det_buffer))
allocate(delta_ij_loc(N_states,N_det,2))
do while(loop)
if(res_gen(iproc) == 0) then
!$OMP CRITICAL
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
!$OMP END CRITICAL
task = task//" 0"
if(task_id == 0) then
donedone = .true.
print *, "DONEDONE"
exit !! LAST MESSAGE ???
end if
read (task,*) subset, i_generator
else
subset = res_sub(iproc)
i_generator = res_gen(iproc)
task_id = res_task(iproc)
res_gen(iproc) = 0
end if
!if(done_cp_at_det(i_generator) > cur_cp) loop = .false.
if(done_cp_at_det(i_generator) > cur_cp) then
res_gen(iproc) = i_generator
res_task(iproc) = task_id
res_sub(iproc) = subset
exit
end if
!$OMP ATOMIC
done_for(done_cp_at_det(i_generator)) += 1
delta_ij_loc(:,:,:) = 0d0
call generator_start(i_generator, iproc)
call alpha_callback(delta_ij_loc, i_generator, subset, iproc)
call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc)
!if(.false.) then
!$OMP CRITICAL
do i=1,N_cp
fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step
if(fac == 0d0) cycle
cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac)
cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac)
end do
toothMwen = tooth_of_det(i_generator)
fracted = (toothMwen /= 0)
if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen))
if(fracted) then
delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen))
delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen))
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen))
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen))
else
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1)
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2)
end if
!$OMP END CRITICAL
!end if
!$OMP CRITICAL
call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
else
exit
!$OMP END CRITICAL
tasks(0) += 1
tasks(tasks(0)) = task_id
end do
print *, "SLAVE", iproc, "waits"
deallocate(int_buf,double_buf,det_buf,delta_ij_loc)
!$OMP END PARALLEL
allocate(delta_ij_loc(N_states,N_det,2))
allocate(int_buf(1), double_buf(1), det_buf(1,1,1))
N_buf = (/0,1,0/)
delta_ij_loc = 0d0
if(cur_cp > 0) then
do i=1,cur_cp
delta_ij_loc(:,:,:) += cp(:,:,i,:)
!delta_s2(:,:) += cp(:,:,i,2)
end do
delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp)
do i=cp_first_tooth(cur_cp)-1,0,-1
delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:)
end do
end if
call sleep(1)
call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1)
!do i=1,tasks(0)
! call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,tasks(i))
!end do
deallocate(delta_ij_loc, int_buf, double_buf, det_buf)
end do
call sleep(10)
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
end subroutine
! BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ]
!&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ]
!&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ]
! implicit none
!
! dress_int_buffer = 0
! dress_double_buffer = 0d0
! dress_det_buffer = 0_bit_kind
!END_PROVIDER
!subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem)
subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
use f77_zmq
implicit none
@ -110,136 +212,69 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
real(kind=4), allocatable :: delta_loc4(:,:,:)
double precision, intent(in) :: double_buf(*)
logical, intent(in) :: last
integer, intent(in) :: int_buf(*)
integer(bit_kind), intent(in) :: det_buf(N_int, 2, *)
integer, intent(in) :: N_bufi(3)
integer :: N_buf(3)
integer, intent(in) :: ind, task_id
integer, intent(in) :: ind, cur_cp, task_id
integer :: rc, i, j, k, l
double precision :: tmp(N_states,2)
integer, allocatable :: sparse(:)
integer :: sparsei
double precision :: contrib(N_states)
contrib = 0d0
allocate(sparse(N_det))
allocate(delta_loc4(N_states, N_det, 2))
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, last, 1, ZMQ_SNDMORE)
if(rc /= 1) stop "push"
rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(last) then
sparsei = 0
if(cur_cp /= -1) then
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 8*N_states*N_det) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 8*N_states*N_det) stop "push"
else
contrib = 0d0
do i=1,N_det
do j=1,N_states
if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then
sparsei += 1
sparse(sparsei) = i
do k=1,2
do l=1,N_states
delta_loc4(l,sparsei,k) = real(delta_loc(l,i,k), kind=4)
end do
end do
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
end if
end do
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
end do
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
if(sparsei /= 0) then
if(sparsei < N_det / 2) then
rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
N_buf = N_bufi
N_buf = (/0,1,0/)
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5"
rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
if(rc /= 4*sparsei) stop "push"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 4*N_states*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 4*N_states*sparsei) stop "push"
else
rc = f77_zmq_send( zmq_socket_push, -1, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
do i=1,N_det
sparse(i) = i
do k=1,2
do l=1,N_states
delta_loc4(l,i,k) = real(delta_loc(l,i,k), kind=4)
end do
end do
end do
!rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
!if(rc /= 4*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 4*N_states*N_det) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 4*N_states*N_det) stop "push"
end if
else
rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(N_buf(1) > 0) then
rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE)
if(rc /= 4*N_buf(1)) stop "push6"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE)
if(rc /= 8*N_buf(2)) stop "push8"
end if
else
rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(N_buf(3) > 0) then
rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10"
end if
!rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE)
!if(rc /= 8*N_states) stop "push"
!rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE)
!if(rc /= 8*N_states) stop "push"
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "push11"
end if
N_buf = N_bufi
!N_buf = (/0, 1, 0/)
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE)
if(rc /= 4*N_buf(1)) stop "push6"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE)
if(rc /= 8*N_buf(2)) stop "push8"
end if
if(N_buf(3) > 0) then
rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10"
end if
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "push11"
! Activate is zmq_socket_push is a REQ
IRP_IF ZMQ_PUSH
IRP_ELSE
@ -250,90 +285,66 @@ IRP_ENDIF
end subroutine
subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib)
subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib)
use f77_zmq
implicit none
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
logical, intent(out) :: last
integer, intent(out) :: cur_cp
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
double precision, intent(out) :: double_buf(*), contrib(N_states)
integer, intent(out) :: int_buf(*)
integer(bit_kind), intent(out) :: det_buf(N_int, 2, *)
integer, intent(out) :: sparse(0:N_det)
integer, intent(out) :: ind
integer, intent(out) :: task_id
integer :: rc, i, j, k, sparsen
integer :: rc, i, j, k
integer, intent(out) :: N_buf(3)
real(kind=4), intent(out) :: delta_loc4(N_states, N_det, 2)
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
if(rc /= 4) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0)
if(rc /= 1) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0)
if(rc /= 4) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, sparse(0), 4, 0)
if(rc /= 4) stop "pullb"
if(sparse(0) /= 0) then
if(cur_cp /= -1) then
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det, 0)
if(rc /= 8*N_states*N_det) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det, 0)
if(rc /= 8*N_states*N_det) stop "pulld"
else
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
if(rc /= 8*N_states) stop "pullc"
if(sparse(0) == -1) then
do i=1,N_det
sparse(i) = i
end do
sparse(0) = N_det
else
rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0)
if(rc /= 4*sparse(0)) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)
if(rc /= 4*3) stop "pull"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0)
if(rc /= 4*N_buf(1)) stop "pull1"
end if
rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0)
if(rc /= 4*N_states*sparse(0)) stop "pullc"
if(N_buf(2) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0)
if(rc /= 8*N_buf(2)) stop "pull2"
end if
rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,2), N_states*4*sparse(0), 0)
if(rc /= 4*N_states*sparse(0)) stop "pulld"
if(N_buf(3) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3"
end if
do j=1,2
do i=1,sparse(0)
do k=1,N_states
delta_loc(k,i,j) = real(delta_loc4(k,i,j), kind=8)
end do
end do
end do
else
contrib = 0d0
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "pull4"
end if
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)
if(rc /= 4*3) stop "pull"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0)
if(rc /= 4*N_buf(1)) stop "pull1"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0)
if(rc /= 8*N_buf(2)) stop "pull2"
end if
if(N_buf(3) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3"
end if
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "pull4"
! Activate is zmq_socket_pull is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE