10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-12 14:08:30 +01:00
quantum_package/plugins/dress_zmq/run_dress_slave.irp.f
2018-05-22 19:11:10 +02:00

382 lines
12 KiB
Fortran

use bitmasks
BEGIN_PROVIDER [ integer, fragment_count ]
implicit none
BEGIN_DOC
! Number of fragments for the deterministic part
END_DOC
fragment_count = 1
END_PROVIDER
subroutine run_dress_slave(thread,iproce,energy)
use f77_zmq
use omp_lib
implicit none
double precision, intent(in) :: energy(N_states_diag)
integer, intent(in) :: thread, iproce
integer :: rc, i, subset, i_generator
integer :: worker_id, task_id, ctask, ltask
character*(5120) :: task
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_push_socket
integer(ZMQ_PTR) :: zmq_socket_push
logical :: done
double precision,allocatable :: dress_detail(:)
integer :: ind
double precision,allocatable :: delta_ij_loc(:,:,:)
integer :: h,p,n,i_state
logical :: ok
integer, allocatable :: int_buf(:)
double precision, allocatable :: double_buf(:)
integer(bit_kind), allocatable :: det_buf(:,:,:)
integer :: N_buf(3)
logical :: last
!integer, external :: omp_get_thread_num
double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:)
integer :: toothMwen
logical :: fracted
double precision :: fac
! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
allocate(cp(N_states, N_det, N_cp, 2))
delta_det = 0d9
cp = 0d0
task(:) = CHAR(0)
integer :: iproc, cur_cp, done_for(0:N_cp)
integer, allocatable :: tasks(:)
integer :: lastCp(Nproc)
integer :: lastSent, lastSendable
logical :: send
integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1)
integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1)
do i=0,N_cp+1
call omp_init_lock(lck_sto(i))
end do
do i=0,comb_teeth+1
call omp_init_lock(lck_det(i))
end do
lastCp = 0
lastSent = 0
send = .false.
done_for = 0
double precision :: hij, sij
!call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij)
hij = dress_E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL
!$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) &
!$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) &
!$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) &
!$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_push = new_zmq_push_socket(thread)
call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
if(worker_id == -1) then
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
stop "WORKER -1"
end if
iproc = omp_get_thread_num()+1
allocate(int_buf(N_dress_int_buffer))
allocate(double_buf(N_dress_double_buffer))
allocate(det_buf(N_int, 2, N_dress_det_buffer))
allocate(delta_ij_loc(N_states,N_det,2))
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
task = task//" 0"
if(task_id == 0) exit
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if(task_id /= 0) then
read (task,*) subset, i_generator
!$OMP ATOMIC
done_for(done_cp_at_det(i_generator)) += 1
! print *, "IGEN", i_generator, done_cp_at_det(i_generator)
delta_ij_loc(:,:,:) = 0d0
call generator_start(i_generator, iproc)
call alpha_callback(delta_ij_loc, i_generator, subset, iproc)
call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc)
do i=1,N_cp
fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step
if(fac == 0d0) cycle
call omp_set_lock(lck_sto(i))
cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac)
cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac)
call omp_unset_lock(lck_sto(i))
end do
toothMwen = tooth_of_det(i_generator)
fracted = (toothMwen /= 0)
if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen))
if(fracted) then
call omp_set_lock(lck_det(toothMwen))
call omp_set_lock(lck_det(toothMwen-1))
delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen))
delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen))
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen))
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen))
call omp_unset_lock(lck_det(toothMwen))
call omp_unset_lock(lck_det(toothMwen-1))
else
call omp_set_lock(lck_det(toothMwen))
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1)
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2)
call omp_unset_lock(lck_det(toothMwen))
end if
call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
lastCp(iproc) = done_cp_at_det(i_generator)
end if
!$OMP CRITICAL
send = .false.
lastSendable = N_cp*2
do i=1,Nproc
lastSendable = min(lastCp(i), lastSendable)
end do
lastSendable -= 1
if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then
lastSent = lastSendable
cur_cp = lastSent
send = .true.
end if
!$OMP END CRITICAL
if(send) then
N_buf = (/0,1,0/)
delta_ij_loc = 0d0
if(cur_cp < 1) stop "cur_cp < 1"
do i=1,cur_cp
delta_ij_loc(:,:,1) += cp(:,:,i,1)
delta_ij_loc(:,:,2) += cp(:,:,i,2)
end do
delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp)
do i=cp_first_tooth(cur_cp)-1,0,-1
delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1)
delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2)
end do
call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1)
end if
if(task_id == 0) exit
end do
call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
!$OMP END PARALLEL
do i=0,N_cp+1
call omp_destroy_lock(lck_sto(i))
end do
do i=0,comb_teeth+1
call omp_destroy_lock(lck_det(i))
end do
end subroutine
subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
use f77_zmq
implicit none
integer, parameter :: sendt = 4
integer(ZMQ_PTR), intent(in) :: zmq_socket_push
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
real(kind=4), allocatable :: delta_loc4(:,:,:)
double precision, intent(in) :: double_buf(*)
integer, intent(in) :: int_buf(*)
integer(bit_kind), intent(in) :: det_buf(N_int, 2, *)
integer, intent(in) :: N_bufi(3)
integer :: N_buf(3)
integer, intent(in) :: ind, cur_cp, task_id
integer :: rc, i, j, k, l
double precision :: contrib(N_states)
real(sendt), allocatable :: r4buf(:,:,:)
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(cur_cp /= -1) then
allocate(r4buf(N_states, N_det, 2))
do i=1,2
do j=1,N_det
do k=1,N_states
r4buf(k,j,i) = real(delta_loc(k,j,i), sendt)
end do
end do
end do
rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE)
if(rc /= sendt*N_states*N_det) stop "push"
rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE)
if(rc /= sendt*N_states*N_det) stop "push"
else
contrib = 0d0
do i=1,N_det
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
end do
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
N_buf = N_bufi
!N_buf = (/0,1,0/)
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE)
if(rc /= 4*N_buf(1)) stop "push6"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE)
if(rc /= 8*N_buf(2)) stop "push8"
end if
if(N_buf(3) > 0) then
rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10"
end if
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "push11"
end if
! Activate is zmq_socket_push is a REQ
IRP_IF ZMQ_PUSH
IRP_ELSE
character*(2) :: ok
rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
IRP_ENDIF
end subroutine
BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ]
END_PROVIDER
subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib)
use f77_zmq
implicit none
integer, parameter :: sendt = 4
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
integer, intent(out) :: cur_cp
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
double precision, intent(out) :: double_buf(*), contrib(N_states)
integer, intent(out) :: int_buf(*)
integer(bit_kind), intent(out) :: det_buf(N_int, 2, *)
integer, intent(out) :: ind
integer, intent(out) :: task_id
integer :: rc, i, j, k
integer, intent(out) :: N_buf(3)
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
if(rc /= 4) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0)
if(rc /= 4) stop "pulla"
if(cur_cp /= -1) then
rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0)
if(rc /= sendt*N_states*N_det) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0)
if(rc /= sendt*N_states*N_det) stop "pulld"
do i=1,2
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k)
do j=1,N_det
do k=1,N_states
delta_loc(k,j,i) = real(real4buf(k,j,i), 8)
end do
end do
end do
else
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
if(rc /= 8*N_states) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)
if(rc /= 4*3) stop "pull"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0)
if(rc /= 4*N_buf(1)) stop "pull1"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0)
if(rc /= 8*N_buf(2)) stop "pull2"
end if
if(N_buf(3) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3"
end if
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "pull4"
end if
! Activate is zmq_socket_pull is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE
rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
IRP_ENDIF
end subroutine