10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-25 05:43:47 +01:00

OMP master

This commit is contained in:
Yann Garniron 2018-04-30 13:25:58 +02:00
parent 9966697ab2
commit f61661a832
2 changed files with 75 additions and 39 deletions

View File

@ -74,7 +74,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error)
ipos=1 ipos=1
ntas = 0 ntas = 0
do i=1,N_dress_jobs+1 do i=1,N_dress_jobs+1
flushme = (i==N_dress_jobs+1 .or. block_i == size(block)) flushme = (i==N_dress_jobs+1 .or. block_i == size(block) .or. block_i >=cur_tooth_reduce )
if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce)
if(flushme .and. block_i > 0) then if(flushme .and. block_i > 0) then
@ -131,16 +131,16 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error)
print *, irp_here, ': Failed in zmq_set_running' print *, irp_here, ': Failed in zmq_set_running'
endif endif
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc) & !!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc) &
!$OMP PRIVATE(i) ! !$OMP PRIVATE(i)
i = omp_get_thread_num() !i = omp_get_thread_num()
if (i==0) then !if (i==0) then
call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,& call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,&
dress_stoch_istate) dress_stoch_istate)
else !else
call dress_slave_inproc(i) ! call dress_slave_inproc(i)
endif !endif
!$OMP END PARALLEL !!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress')
print *, '========== ================= ================= =================' print *, '========== ================= ================= ================='
@ -197,7 +197,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
integer :: total_computed integer :: total_computed
integer :: delta_loc_cur, is, N_buf(3) integer :: delta_loc_cur, is, N_buf(3)
double precision :: fac , wei double precision :: fac , wei
logical :: ok
integer, allocatable :: int_buf(:) integer, allocatable :: int_buf(:)
double precision, allocatable :: double_buf(:) double precision, allocatable :: double_buf(:)
integer(bit_kind), allocatable :: det_buf(:,:,:) integer(bit_kind), allocatable :: det_buf(:,:,:)
@ -275,23 +274,25 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
do j=1,N_cp !! optimizable do j=1,N_cp !! optimizable
fac = 0d0 fac = 0d0
ok = .false.
!fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step
fac = cps(ind, j) * wei * comb_step fac = cps(ind, j) * wei * comb_step
if(fac /= 0) then if(fac /= 0) then
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is)
do i=1,sparse(0) do i=1,sparse(0)
do is=1,N_states do is=1,N_states
cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac
end do end do
end do end do
!$OMP END PARALLEL DO
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i,is)
do i=1,sparse(0) do i=1,sparse(0)
do is=1,N_states do is=1,N_states
cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac
end do end do
end do end do
!$OMP END PARALLEL DO
end if end if
end do end do
@ -304,17 +305,21 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) if(fracted) fracted = (ind == first_det_of_teeth(toothMwen))
if(fracted .and. .false.) then if(fracted .and. .false.) then
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i)
do i=1,sparse(0) do i=1,sparse(0)
delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen))
delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen))
delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen))
delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen))
end do end do
!$OMP END PARALLEL DO
else if(.false.) then else if(.false.) then
!$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(i)
do i=1,sparse(0) do i=1,sparse(0)
delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1) delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1)
delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2) delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2)
end do end do
!$OMP END PARALLEL DO
end if end if
parts_to_get(ind) -= 1 parts_to_get(ind) -= 1
@ -326,7 +331,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
time = omp_get_wtime() time = omp_get_wtime()
if((time - timeLast > 2d0) .or. (.not. loop)) then if((time - timeLast > 5d0) .or. (.not. loop)) then
timeLast = time timeLast = time
cur_cp = N_cp cur_cp = N_cp
@ -347,7 +352,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
su = 0d0 su = 0d0
su2 = 0d0 su2 = 0d0
!$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i, val) SHARED(comb, dress_detail, &
!$OMP cur_cp,istate,cps_N) REDUCTION(+:su) REDUCTION(+:su2)
do i=1, int(cps_N(cur_cp)) do i=1, int(cps_N(cur_cp))
call get_comb_val(comb(i), dress_detail, cur_cp, val, istate) call get_comb_val(comb(i), dress_detail, cur_cp, val, istate)
su += val su += val

View File

@ -150,28 +150,53 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
end do end do
end do end do
rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(sparsei /= 0) then if(sparsei /= 0) then
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) if(sparsei < N_det / 2) then
if(rc /= 8*N_states) stop "push" rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
if(rc /= 4*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 4*N_states*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 4*N_states*sparsei) stop "push"
!do i=sparsei,1 rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
! tmp(:,:) = delta_loc(:,i,:) if(rc /= 8*N_states) stop "push"
! delta_loc(:,i,:) = 0d0
! delta_loc(:,sparse(i),:) = tmp(:,:) rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
!end do if(rc /= 4*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 4*N_states*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 4*N_states*sparsei) stop "push"
else
rc = f77_zmq_send( zmq_socket_push, -1, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
do i=1,N_det
sparse(i) = i
do k=1,2
do l=1,N_states
delta_loc4(l,i,k) = real(delta_loc(l,i,k), kind=4)
end do
end do
end do
!rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
!if(rc /= 4*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 4*N_states*N_det) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc4(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 4*N_states*N_det) stop "push"
end if
else
rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
end if end if
@ -187,7 +212,7 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
end if end if
N_buf = N_bufi N_buf = N_bufi
N_buf = (/0, 1, 0/) !N_buf = (/0, 1, 0/)
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5" if(rc /= 4*3) stop "push5"
@ -256,10 +281,15 @@ subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, delta_loc4,
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
if(rc /= 8*N_states) stop "pullc" if(rc /= 8*N_states) stop "pullc"
if(sparse(0) == -1) then
rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0) do i=1,N_det
if(rc /= 4*sparse(0)) stop "pullc" sparse(i) = i
end do
sparse(0) = N_det
else
rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0)
if(rc /= 4*sparse(0)) stop "pullc"
end if
rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0) rc = f77_zmq_recv( zmq_socket_pull, delta_loc4(1,1,1), N_states*4*sparse(0), 0)
if(rc /= 4*N_states*sparse(0)) stop "pullc" if(rc /= 4*N_states*sparse(0)) stop "pullc"