mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-12 05:58:24 +01:00
per checkpoint dressing communication - buggy
This commit is contained in:
parent
f61661a832
commit
c14fe5b99f
@ -54,12 +54,12 @@ subroutine run_wf
|
||||
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
|
||||
PROVIDE psi_bilinear_matrix_transp_order
|
||||
|
||||
!$OMP PARALLEL PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
call dress_slave_tcp(i+1, energy)
|
||||
!$OMP END PARALLEL
|
||||
!!$OMP PARALLEL PRIVATE(i)
|
||||
!i = omp_get_thread_num()
|
||||
! call dress_slave_tcp(i+1, energy)
|
||||
call dress_slave_tcp(0, energy)
|
||||
!!$OMP END PARALLEL
|
||||
print *, 'dress done'
|
||||
|
||||
endif
|
||||
|
||||
end do
|
||||
|
@ -66,7 +66,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error)
|
||||
|
||||
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
|
||||
integer :: ipos, sz
|
||||
integer :: block(8), block_i, cur_tooth_reduce, ntas
|
||||
integer :: block(1), block_i, cur_tooth_reduce, ntas
|
||||
logical :: flushme
|
||||
block = 0
|
||||
block_i = 0
|
||||
@ -176,8 +176,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
|
||||
|
||||
double precision, intent(out) :: delta(N_states, N_det)
|
||||
double precision, intent(out) :: delta_s2(N_states, N_det)
|
||||
double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:)
|
||||
real, allocatable :: delta_loc4(:,:,:)
|
||||
double precision, allocatable :: delta_loc(:,:,:)
|
||||
double precision, allocatable :: dress_detail(:,:)
|
||||
double precision :: dress_mwen(N_states)
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
@ -189,62 +188,42 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
|
||||
integer :: i, j, k, i_state, N
|
||||
integer :: task_id, ind
|
||||
double precision, save :: time0 = -1.d0
|
||||
double precision :: time, timeLast, old_tooth
|
||||
double precision :: time
|
||||
double precision, external :: omp_get_wtime
|
||||
integer :: cur_cp, old_cur_cp
|
||||
integer, allocatable :: parts_to_get(:)
|
||||
logical, allocatable :: actually_computed(:)
|
||||
integer :: total_computed
|
||||
integer :: cur_cp
|
||||
integer :: delta_loc_cur, is, N_buf(3)
|
||||
double precision :: fac , wei
|
||||
integer, allocatable :: int_buf(:)
|
||||
integer, allocatable :: int_buf(:), agreg_for_cp(:)
|
||||
double precision, allocatable :: double_buf(:)
|
||||
integer(bit_kind), allocatable :: det_buf(:,:,:)
|
||||
integer, external :: zmq_delete_tasks
|
||||
|
||||
allocate(agreg_for_cp(N_cp))
|
||||
agreg_for_cp = 0
|
||||
allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer))
|
||||
delta_loc_cur = 1
|
||||
|
||||
delta = 0d0
|
||||
delta_s2 = 0d0
|
||||
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
|
||||
allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det))
|
||||
allocate(delta_loc(N_states, N_det, 2))
|
||||
allocate(delta_loc4(N_states, N_det, 2))
|
||||
dress_detail = 0d0
|
||||
delta_det = 0d0
|
||||
dress_detail = -1000d0
|
||||
cp = 0d0
|
||||
total_computed = 0
|
||||
character*(512) :: task
|
||||
|
||||
allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators))
|
||||
|
||||
|
||||
parts_to_get(:) = 1
|
||||
if(fragment_first > 0) then
|
||||
do i=1,fragment_first
|
||||
parts_to_get(i) = fragment_count
|
||||
enddo
|
||||
endif
|
||||
|
||||
actually_computed = .false.
|
||||
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
more = 1
|
||||
if (time0 < 0.d0) then
|
||||
call wall_time(time0)
|
||||
endif
|
||||
timeLast = time0
|
||||
cur_cp = 0
|
||||
old_cur_cp = 0
|
||||
logical :: loop, last, floop
|
||||
integer, allocatable :: sparse(:)
|
||||
allocate(sparse(0:N_det))
|
||||
logical :: loop, floop
|
||||
integer :: finalcp
|
||||
finalcp = N_cp*2
|
||||
|
||||
floop = .true.
|
||||
loop = .true.
|
||||
|
||||
pullLoop : do while (loop)
|
||||
call pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen)
|
||||
call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf)
|
||||
call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen)
|
||||
if(floop) then
|
||||
call wall_time(time)
|
||||
print *, "FIRST PULL", time-time0
|
||||
@ -252,100 +231,34 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
|
||||
end if
|
||||
|
||||
|
||||
integer, external :: zmq_delete_tasks
|
||||
|
||||
if(last) then
|
||||
if(cur_cp == -1) then
|
||||
call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf)
|
||||
if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then
|
||||
stop 'Unable to delete tasks'
|
||||
stop 'Unable to delete tasks'
|
||||
endif
|
||||
if(more == 0) loop = .false.
|
||||
end if
|
||||
!if(more == 0) stop 'loop = .false.' !!!!!!!!!!!!!!!!
|
||||
dress_detail(:, ind) = dress_mwen(:)
|
||||
else if(cur_cp > 0) then
|
||||
|
||||
!dress_mwen = 0d0
|
||||
if(ind == 0) cycle
|
||||
|
||||
!do i_state=1,N_states
|
||||
! do i=1,sparse(0)
|
||||
! dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(sparse(i), i_state)
|
||||
! end do
|
||||
!end do
|
||||
|
||||
dress_detail(:, ind) += dress_mwen(:)
|
||||
wei = dress_weight_inv(ind)
|
||||
|
||||
do j=1,N_cp !! optimizable
|
||||
fac = 0d0
|
||||
!fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step
|
||||
fac = cps(ind, j) * wei * comb_step
|
||||
|
||||
if(fac /= 0) then
|
||||
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is)
|
||||
do i=1,sparse(0)
|
||||
do is=1,N_states
|
||||
cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac
|
||||
end do
|
||||
end do
|
||||
!$OMP END PARALLEL DO
|
||||
|
||||
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is)
|
||||
do i=1,sparse(0)
|
||||
do is=1,N_states
|
||||
cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac
|
||||
end do
|
||||
end do
|
||||
!$OMP END PARALLEL DO
|
||||
end if
|
||||
end do
|
||||
|
||||
! do i=1,delta_loc_cur
|
||||
logical :: fracted
|
||||
integer :: toothMwen
|
||||
|
||||
toothMwen = tooth_of_det(ind)
|
||||
fracted = (toothMwen /= 0)
|
||||
if(fracted) fracted = (ind == first_det_of_teeth(toothMwen))
|
||||
|
||||
if(fracted .and. .false.) then
|
||||
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i)
|
||||
do i=1,sparse(0)
|
||||
delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen))
|
||||
delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen))
|
||||
delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen))
|
||||
delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen))
|
||||
end do
|
||||
!$OMP END PARALLEL DO
|
||||
else if(.false.) then
|
||||
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i)
|
||||
do i=1,sparse(0)
|
||||
delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1)
|
||||
delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2)
|
||||
end do
|
||||
!$OMP END PARALLEL DO
|
||||
end if
|
||||
|
||||
parts_to_get(ind) -= 1
|
||||
if(parts_to_get(ind) == 0) then
|
||||
actually_computed(ind) = .true.
|
||||
total_computed += 1
|
||||
end if
|
||||
!end do
|
||||
|
||||
time = omp_get_wtime()
|
||||
|
||||
if((time - timeLast > 5d0) .or. (.not. loop)) then
|
||||
timeLast = time
|
||||
cur_cp = N_cp
|
||||
|
||||
do i=1,N_det_generators
|
||||
if(.not. actually_computed(dress_jobs(i))) then
|
||||
if(i /= 1) then
|
||||
cur_cp = done_cp_at(i-1)
|
||||
else
|
||||
cur_cp = 0
|
||||
end if
|
||||
exit
|
||||
end if
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
|
||||
do i=1,N_det
|
||||
cp(:,i,cur_cp,1) += delta_loc(:,i,1)
|
||||
end do
|
||||
if(cur_cp == 0 .or. (cur_cp == old_cur_cp .and. total_computed /= N_det_generators)) cycle pullLoop
|
||||
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
|
||||
do i=1,N_det
|
||||
cp(:,i,cur_cp,2) += delta_loc(:,i,2)
|
||||
end do
|
||||
|
||||
agreg_for_cp(cur_cp) += ind
|
||||
if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then
|
||||
stop "too much results..."
|
||||
end if
|
||||
if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle
|
||||
|
||||
print *, "FINISHED CP", cur_cp
|
||||
|
||||
double precision :: su, su2, eqt, avg, E0, val
|
||||
integer, external :: zmq_abort
|
||||
@ -359,6 +272,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
|
||||
su += val
|
||||
su2 += val*val
|
||||
end do
|
||||
|
||||
avg = su / cps_N(cur_cp)
|
||||
eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) )
|
||||
E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1))
|
||||
@ -366,47 +280,29 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
|
||||
E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp)))
|
||||
end if
|
||||
|
||||
|
||||
call wall_time(time)
|
||||
if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then
|
||||
|
||||
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
|
||||
if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then
|
||||
! Termination
|
||||
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
|
||||
print *, "TERMINATE"
|
||||
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
|
||||
call sleep(1)
|
||||
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
|
||||
print *, irp_here, ': Error in sending abort signal (2)'
|
||||
endif
|
||||
endif
|
||||
else
|
||||
if (cur_cp > old_cur_cp) then
|
||||
old_cur_cp = cur_cp
|
||||
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
|
||||
endif
|
||||
!exit pullLoop
|
||||
endif
|
||||
end if
|
||||
end do pullLoop
|
||||
print *, "exited"
|
||||
|
||||
delta (1:N_states,1:N_det) = 0d0
|
||||
delta_s2(1:N_states,1:N_det) = 0d0
|
||||
|
||||
if(total_computed == N_det_generators) then
|
||||
do i=comb_teeth+1,0,-1
|
||||
delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1)
|
||||
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2)
|
||||
end do
|
||||
else
|
||||
do i=1,cur_cp
|
||||
delta (1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,1)
|
||||
delta_s2(1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,2)
|
||||
end do
|
||||
delta (1:N_states,1:N_det) = delta(1:N_states,1:N_det) / cps_N(cur_cp)
|
||||
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) / cps_N(cur_cp)
|
||||
do i=cp_first_tooth(cur_cp)-1,0,-1
|
||||
delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1)
|
||||
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2)
|
||||
end do
|
||||
delta(:,:) = cp(:,:,cur_cp,1)
|
||||
delta_s2(:,:) = cp(:,:,cur_cp,2)
|
||||
|
||||
|
||||
end if
|
||||
dress(istate) = E(istate)+E0
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
end subroutine
|
||||
@ -458,6 +354,8 @@ END_PROVIDER
|
||||
&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ]
|
||||
&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ]
|
||||
&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ]
|
||||
&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ]
|
||||
&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ]
|
||||
&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ]
|
||||
&BEGIN_PROVIDER [ integer, N_dress_jobs ]
|
||||
&BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ]
|
||||
@ -486,6 +384,8 @@ END_PROVIDER
|
||||
cps = 0d0
|
||||
cur_cp = 1
|
||||
done_cp_at = 0
|
||||
done_cp_at_det = 0
|
||||
needed_by_cp = 0
|
||||
comp_filler = .false.
|
||||
computed = .false.
|
||||
cps_N = 1d0
|
||||
@ -506,6 +406,7 @@ END_PROVIDER
|
||||
end do
|
||||
|
||||
l=first_det_of_comb
|
||||
call random_seed(put=(/321,654,65,321,65/))
|
||||
call RANDOM_NUMBER(comb)
|
||||
lfiller = 1
|
||||
nfiller = 1
|
||||
@ -574,6 +475,8 @@ END_PROVIDER
|
||||
do i=1,N_dress_jobs
|
||||
if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i)
|
||||
done_cp_at(i) = cur_cp
|
||||
done_cp_at_det(dress_jobs(i)) = cur_cp
|
||||
needed_by_cp(cur_cp) += 1
|
||||
end do
|
||||
|
||||
|
||||
@ -625,7 +528,7 @@ END_PROVIDER
|
||||
end do
|
||||
|
||||
do i=1,N_cp-1
|
||||
call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i))
|
||||
call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1)
|
||||
end do
|
||||
|
||||
do i=1,N_det_generators
|
||||
|
@ -9,13 +9,13 @@ BEGIN_PROVIDER [ integer, fragment_count ]
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
subroutine run_dress_slave(thread,iproc,energy)
|
||||
subroutine run_dress_slave(thread,iproce,energy)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
double precision, intent(in) :: energy(N_states_diag)
|
||||
integer, intent(in) :: thread, iproc
|
||||
integer :: rc, i, subset, i_generator(60)
|
||||
integer, intent(in) :: thread, iproce
|
||||
integer :: rc, i, subset, i_generator
|
||||
|
||||
integer :: worker_id, task_id, ctask, ltask
|
||||
character*(5120) :: task
|
||||
@ -41,13 +41,24 @@ subroutine run_dress_slave(thread,iproc,energy)
|
||||
integer(bit_kind), allocatable :: det_buf(:,:,:)
|
||||
integer :: N_buf(3)
|
||||
logical :: last
|
||||
integer, external :: omp_get_thread_num
|
||||
double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:)
|
||||
integer :: toothMwen
|
||||
logical :: fracted
|
||||
double precision :: fac
|
||||
|
||||
|
||||
|
||||
if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
|
||||
|
||||
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
|
||||
allocate(cp(N_states, N_det, N_cp, 2))
|
||||
delta_det = 0d9
|
||||
cp = 0d0
|
||||
|
||||
|
||||
task(:) = CHAR(0)
|
||||
|
||||
allocate(int_buf(N_dress_int_buffer))
|
||||
allocate(double_buf(N_dress_double_buffer))
|
||||
allocate(det_buf(N_int, 2, N_dress_det_buffer))
|
||||
allocate(delta_ij_loc(N_states,N_det,2))
|
||||
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
zmq_socket_push = new_zmq_push_socket(thread)
|
||||
@ -61,48 +72,139 @@ subroutine run_dress_slave(thread,iproc,energy)
|
||||
do i=1,N_states
|
||||
div(i) = psi_coef(dressed_column_idx(i), i)
|
||||
end do
|
||||
do
|
||||
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
|
||||
if(task_id /= 0) then
|
||||
task = trim(task)//' 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
|
||||
|
||||
i_generator = 0
|
||||
read (task,*) subset, i_generator
|
||||
if(i_generator(size(i_generator)) /= 0) stop "i_generator buffer too small"
|
||||
delta_ij_loc = 0d0
|
||||
i=1
|
||||
do while(i_generator(i) /= 0)
|
||||
call generator_start(i_generator(i), iproc)
|
||||
call alpha_callback(delta_ij_loc, i_generator(i), subset, iproc)
|
||||
call generator_done(i_generator(i), int_buf, double_buf, det_buf, N_buf, iproc)
|
||||
last = (i_generator(i+1) == 0)
|
||||
call push_dress_results(zmq_socket_push, i_generator(i), last, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id)
|
||||
i += 1
|
||||
integer :: iproc, cur_cp, done_for(0:N_cp)
|
||||
integer, allocatable :: tasks(:)
|
||||
logical :: loop, donedone
|
||||
integer :: res_task(Nproc), res_gen(Nproc), res_sub(Nproc)
|
||||
res_gen = 0
|
||||
|
||||
donedone = .false.
|
||||
allocate(tasks(0:N_det))
|
||||
done_for = 0
|
||||
|
||||
do cur_cp=0, N_cp
|
||||
if(donedone) exit
|
||||
print *, "DOING CP", cur_cp
|
||||
tasks(0) = 0
|
||||
!$OMP PARALLEL DEFAULT(SHARED) &
|
||||
!$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) &
|
||||
!$OMP PRIVATE(toothMwen, fracted, fac) &
|
||||
!$OMP PRIVATE(loop, i_generator, subset, iproc, N_buf)
|
||||
iproc = omp_get_thread_num()+1
|
||||
loop = .true.
|
||||
allocate(int_buf(N_dress_int_buffer))
|
||||
allocate(double_buf(N_dress_double_buffer))
|
||||
allocate(det_buf(N_int, 2, N_dress_det_buffer))
|
||||
allocate(delta_ij_loc(N_states,N_det,2))
|
||||
do while(loop)
|
||||
if(res_gen(iproc) == 0) then
|
||||
!$OMP CRITICAL
|
||||
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
|
||||
!$OMP END CRITICAL
|
||||
task = task//" 0"
|
||||
if(task_id == 0) then
|
||||
donedone = .true.
|
||||
print *, "DONEDONE"
|
||||
exit !! LAST MESSAGE ???
|
||||
end if
|
||||
read (task,*) subset, i_generator
|
||||
else
|
||||
subset = res_sub(iproc)
|
||||
i_generator = res_gen(iproc)
|
||||
task_id = res_task(iproc)
|
||||
res_gen(iproc) = 0
|
||||
end if
|
||||
|
||||
!if(done_cp_at_det(i_generator) > cur_cp) loop = .false.
|
||||
if(done_cp_at_det(i_generator) > cur_cp) then
|
||||
res_gen(iproc) = i_generator
|
||||
res_task(iproc) = task_id
|
||||
res_sub(iproc) = subset
|
||||
exit
|
||||
end if
|
||||
|
||||
!$OMP ATOMIC
|
||||
done_for(done_cp_at_det(i_generator)) += 1
|
||||
|
||||
delta_ij_loc(:,:,:) = 0d0
|
||||
call generator_start(i_generator, iproc)
|
||||
call alpha_callback(delta_ij_loc, i_generator, subset, iproc)
|
||||
call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc)
|
||||
|
||||
!if(.false.) then
|
||||
!$OMP CRITICAL
|
||||
do i=1,N_cp
|
||||
fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step
|
||||
if(fac == 0d0) cycle
|
||||
cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac)
|
||||
cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac)
|
||||
end do
|
||||
|
||||
|
||||
toothMwen = tooth_of_det(i_generator)
|
||||
fracted = (toothMwen /= 0)
|
||||
if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen))
|
||||
if(fracted) then
|
||||
delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen))
|
||||
delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen))
|
||||
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen))
|
||||
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen))
|
||||
else
|
||||
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1)
|
||||
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2)
|
||||
end if
|
||||
|
||||
|
||||
!$OMP END CRITICAL
|
||||
!end if
|
||||
|
||||
!$OMP CRITICAL
|
||||
call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id)
|
||||
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
||||
else
|
||||
exit
|
||||
!$OMP END CRITICAL
|
||||
tasks(0) += 1
|
||||
tasks(tasks(0)) = task_id
|
||||
|
||||
end do
|
||||
print *, "SLAVE", iproc, "waits"
|
||||
deallocate(int_buf,double_buf,det_buf,delta_ij_loc)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
allocate(delta_ij_loc(N_states,N_det,2))
|
||||
allocate(int_buf(1), double_buf(1), det_buf(1,1,1))
|
||||
N_buf = (/0,1,0/)
|
||||
|
||||
delta_ij_loc = 0d0
|
||||
|
||||
if(cur_cp > 0) then
|
||||
do i=1,cur_cp
|
||||
delta_ij_loc(:,:,:) += cp(:,:,i,:)
|
||||
!delta_s2(:,:) += cp(:,:,i,2)
|
||||
end do
|
||||
|
||||
delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp)
|
||||
do i=cp_first_tooth(cur_cp)-1,0,-1
|
||||
delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) +delta_det(:,:,i,:)
|
||||
end do
|
||||
end if
|
||||
call sleep(1)
|
||||
call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1)
|
||||
!do i=1,tasks(0)
|
||||
! call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,tasks(i))
|
||||
!end do
|
||||
deallocate(delta_ij_loc, int_buf, double_buf, det_buf)
|
||||
end do
|
||||
|
||||
call sleep(10)
|
||||
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
call end_zmq_push_socket(zmq_socket_push,thread)
|
||||
end subroutine
|
||||
|
||||
|
||||
! BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ]
|
||||
!&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ]
|
||||
!&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ]
|
||||
! implicit none
|
||||
!
|
||||
! dress_int_buffer = 0
|
||||
! dress_double_buffer = 0d0
|
||||
! dress_det_buffer = 0_bit_kind
|
||||
!END_PROVIDER
|
||||
|
||||
|
||||
!subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem)
|
||||
subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
|
||||
subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
@ -110,136 +212,69 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
|
||||
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
|
||||
real(kind=4), allocatable :: delta_loc4(:,:,:)
|
||||
double precision, intent(in) :: double_buf(*)
|
||||
logical, intent(in) :: last
|
||||
integer, intent(in) :: int_buf(*)
|
||||
integer(bit_kind), intent(in) :: det_buf(N_int, 2, *)
|
||||
integer, intent(in) :: N_bufi(3)
|
||||
integer :: N_buf(3)
|
||||
integer, intent(in) :: ind, task_id
|
||||
integer, intent(in) :: ind, cur_cp, task_id
|
||||
integer :: rc, i, j, k, l
|
||||
double precision :: tmp(N_states,2)
|
||||
integer, allocatable :: sparse(:)
|
||||
integer :: sparsei
|
||||
double precision :: contrib(N_states)
|
||||
|
||||
contrib = 0d0
|
||||
allocate(sparse(N_det))
|
||||
allocate(delta_loc4(N_states, N_det, 2))
|
||||
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "push"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, last, 1, ZMQ_SNDMORE)
|
||||
if(rc /= 1) stop "push"
|
||||
rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "push"
|
||||
|
||||
if(last) then
|
||||
|
||||
sparsei = 0
|
||||
if(cur_cp /= -1) then
|
||||
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states*N_det) stop "push"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states*N_det) stop "push"
|
||||
else
|
||||
contrib = 0d0
|
||||
|
||||
do i=1,N_det
|
||||
do j=1,N_states
|
||||
if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then
|
||||
sparsei += 1
|
||||
sparse(sparsei) = i
|
||||
do k=1,2
|
||||
do l=1,N_states
|
||||
delta_loc4(l,sparsei,k) = real(delta_loc(l,i,k), kind=4)
|
||||
end do
|
||||
end do
|
||||
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
|
||||
end if
|
||||
end do
|
||||
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
|
||||
end do
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states) stop "push"
|
||||
|
||||
if(sparsei /= 0) then
|
||||
if(sparsei < N_det / 2) then
|
||||
rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "push"
|
||||
N_buf = N_bufi
|
||||
N_buf = (/0,1,0/)
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states) stop "push"
|
||||
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
|
||||
if(rc /= 4*3) stop "push5"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
|
||||
if(rc /= 4*sparsei) stop "push"
|
||||
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE)
|
||||
if(rc /= 4*N_states*sparsei) stop "push"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE)
|
||||
if(rc /= 4*N_states*sparsei) stop "push"
|
||||
else
|
||||
rc = f77_zmq_send( zmq_socket_push, -1, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "push"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states) stop "push"
|
||||
|
||||
do i=1,N_det
|
||||
sparse(i) = i
|
||||
do k=1,2
|
||||
do l=1,N_states
|
||||
delta_loc4(l,i,k) = real(delta_loc(l,i,k), kind=4)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
!rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
|
||||
!if(rc /= 4*sparsei) stop "push"
|
||||
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE)
|
||||
if(rc /= 4*N_states*N_det) stop "push"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE)
|
||||
if(rc /= 4*N_states*N_det) stop "push"
|
||||
end if
|
||||
else
|
||||
rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "push"
|
||||
if(N_buf(1) > 0) then
|
||||
rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE)
|
||||
if(rc /= 4*N_buf(1)) stop "push6"
|
||||
end if
|
||||
|
||||
if(N_buf(2) > 0) then
|
||||
rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_buf(2)) stop "push8"
|
||||
end if
|
||||
|
||||
else
|
||||
rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "push"
|
||||
if(N_buf(3) > 0) then
|
||||
rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE)
|
||||
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10"
|
||||
end if
|
||||
|
||||
!rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE)
|
||||
!if(rc /= 8*N_states) stop "push"
|
||||
|
||||
!rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE)
|
||||
!if(rc /= 8*N_states) stop "push"
|
||||
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
|
||||
if(rc /= 4) stop "push11"
|
||||
end if
|
||||
|
||||
N_buf = N_bufi
|
||||
!N_buf = (/0, 1, 0/)
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
|
||||
if(rc /= 4*3) stop "push5"
|
||||
|
||||
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
|
||||
|
||||
if(N_buf(1) > 0) then
|
||||
rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE)
|
||||
if(rc /= 4*N_buf(1)) stop "push6"
|
||||
end if
|
||||
|
||||
if(N_buf(2) > 0) then
|
||||
rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_buf(2)) stop "push8"
|
||||
end if
|
||||
|
||||
if(N_buf(3) > 0) then
|
||||
rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE)
|
||||
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10"
|
||||
end if
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
|
||||
if(rc /= 4) stop "push11"
|
||||
|
||||
! Activate is zmq_socket_push is a REQ
|
||||
IRP_IF ZMQ_PUSH
|
||||
IRP_ELSE
|
||||
@ -250,90 +285,66 @@ IRP_ENDIF
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib)
|
||||
subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
|
||||
logical, intent(out) :: last
|
||||
integer, intent(out) :: cur_cp
|
||||
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
|
||||
double precision, intent(out) :: double_buf(*), contrib(N_states)
|
||||
integer, intent(out) :: int_buf(*)
|
||||
integer(bit_kind), intent(out) :: det_buf(N_int, 2, *)
|
||||
integer, intent(out) :: sparse(0:N_det)
|
||||
integer, intent(out) :: ind
|
||||
integer, intent(out) :: task_id
|
||||
integer :: rc, i, j, k, sparsen
|
||||
integer :: rc, i, j, k
|
||||
integer, intent(out) :: N_buf(3)
|
||||
real(kind=4), intent(out) :: delta_loc4(N_states, N_det, 2)
|
||||
|
||||
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
|
||||
if(rc /= 4) stop "pulla"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0)
|
||||
if(rc /= 1) stop "pulla"
|
||||
rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0)
|
||||
if(rc /= 4) stop "pulla"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, sparse(0), 4, 0)
|
||||
if(rc /= 4) stop "pullb"
|
||||
|
||||
if(sparse(0) /= 0) then
|
||||
|
||||
|
||||
if(cur_cp /= -1) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det, 0)
|
||||
if(rc /= 8*N_states*N_det) stop "pullc"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det, 0)
|
||||
if(rc /= 8*N_states*N_det) stop "pulld"
|
||||
else
|
||||
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
|
||||
if(rc /= 8*N_states) stop "pullc"
|
||||
|
||||
if(sparse(0) == -1) then
|
||||
do i=1,N_det
|
||||
sparse(i) = i
|
||||
end do
|
||||
sparse(0) = N_det
|
||||
else
|
||||
rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0)
|
||||
if(rc /= 4*sparse(0)) stop "pullc"
|
||||
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)
|
||||
if(rc /= 4*3) stop "pull"
|
||||
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
|
||||
|
||||
if(N_buf(1) > 0) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0)
|
||||
if(rc /= 4*N_buf(1)) stop "pull1"
|
||||
end if
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0)
|
||||
if(rc /= 4*N_states*sparse(0)) stop "pullc"
|
||||
if(N_buf(2) > 0) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0)
|
||||
if(rc /= 8*N_buf(2)) stop "pull2"
|
||||
end if
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,2), N_states*4*sparse(0), 0)
|
||||
if(rc /= 4*N_states*sparse(0)) stop "pulld"
|
||||
if(N_buf(3) > 0) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0)
|
||||
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3"
|
||||
end if
|
||||
|
||||
do j=1,2
|
||||
do i=1,sparse(0)
|
||||
do k=1,N_states
|
||||
delta_loc(k,i,j) = real(delta_loc4(k,i,j), kind=8)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
else
|
||||
contrib = 0d0
|
||||
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
|
||||
if(rc /= 4) stop "pull4"
|
||||
end if
|
||||
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)
|
||||
if(rc /= 4*3) stop "pull"
|
||||
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
|
||||
|
||||
|
||||
if(N_buf(1) > 0) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0)
|
||||
if(rc /= 4*N_buf(1)) stop "pull1"
|
||||
end if
|
||||
|
||||
if(N_buf(2) > 0) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0)
|
||||
if(rc /= 8*N_buf(2)) stop "pull2"
|
||||
end if
|
||||
|
||||
if(N_buf(3) > 0) then
|
||||
rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0)
|
||||
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3"
|
||||
end if
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
|
||||
if(rc /= 4) stop "pull4"
|
||||
|
||||
! Activate is zmq_socket_pull is a REP
|
||||
IRP_IF ZMQ_PUSH
|
||||
IRP_ELSE
|
||||
|
Loading…
Reference in New Issue
Block a user