mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-03 10:05:57 +01:00
Load balancing in 4idx
This commit is contained in:
parent
31b3b7dd79
commit
af229e7028
@ -2,7 +2,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
i_start, j_start, k_start, l_start, &
|
||||
i_end , j_end , k_end , l_end , &
|
||||
a_start, b_start, c_start, d_start, &
|
||||
a_end , b_end , c_end , d_end, task_id, thread )
|
||||
a_end , b_end , c_end , d_end, task_id, worker_id, thread )
|
||||
implicit none
|
||||
use f77_zmq
|
||||
use map_module
|
||||
@ -19,7 +19,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
integer, intent(in) :: i_end , j_end , k_end , l_end
|
||||
integer, intent(in) :: a_start, b_start, c_start, d_start
|
||||
integer, intent(in) :: a_end , b_end , c_end , d_end
|
||||
integer, intent(in) :: task_id, thread
|
||||
integer, intent(in) :: task_id, thread, worker_id
|
||||
|
||||
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
|
||||
double precision, allocatable :: T2d(:,:), V2d(:,:)
|
||||
@ -63,13 +63,13 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
double precision, allocatable :: a_array_value(:)
|
||||
|
||||
integer*8 :: new_size
|
||||
new_size = max(1024_8, 5_8 * map_a % n_elements )
|
||||
new_size = max(2048_8, 5_8 * map_a % n_elements )
|
||||
|
||||
integer*8 :: tempspace
|
||||
integer :: npass, l_block
|
||||
|
||||
tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
|
||||
npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 1024_8),4) ! 1 GiB of scratch space
|
||||
tempspace = (new_size * 16_8) / (2048_8 * 2048_8)
|
||||
npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 2048_8),4) ! 2 GiB of scratch space
|
||||
l_block = (l_end-l_start+1)/npass
|
||||
|
||||
allocate(a_array_ik(new_size/npass), a_array_j(new_size/npass), a_array_value(new_size/npass))
|
||||
@ -133,17 +133,13 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
!open(unit=10,file='OUTPUT',form='FORMATTED')
|
||||
! END INPUT DATA
|
||||
|
||||
PROVIDE nproc
|
||||
integer :: n_running_threads
|
||||
n_running_threads = 0
|
||||
call omp_set_nested(.true.)
|
||||
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
||||
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
|
||||
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
|
||||
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
|
||||
!$OMP matrix_B,l_pointer,thread,task_id,n_running_threads,nproc) &
|
||||
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
|
||||
!$OMP a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push)
|
||||
!$OMP matrix_B,l_pointer,thread,task_id,worker_id) &
|
||||
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll,zmq_to_qp_run_socket, &
|
||||
!$OMP a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push)
|
||||
|
||||
integer(ZMQ_PTR) :: zmq_socket_push
|
||||
integer(ZMQ_PTR), external :: new_zmq_push_socket
|
||||
@ -153,9 +149,6 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
|
||||
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
|
||||
|
||||
!$OMP ATOMIC
|
||||
n_running_threads = n_running_threads+1
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic,1)
|
||||
do d=d_start,d_end
|
||||
U = 0.d0
|
||||
@ -185,13 +178,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
V2d(1,b_start), size(V2d,1) )
|
||||
deallocate(T2d)
|
||||
|
||||
!$OMP FLUSH(n_running_threads)
|
||||
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
||||
!$OMP a_start,b_start,b_end,c_start,c_end,i_start,k_start,k_end, &
|
||||
!$OMP matrix_B,U,l,d,V2d,i_end,a_end,n_running_threads) &
|
||||
!$OMP PRIVATE(T,V,i,k,ik) NUM_THREADS(nproc-n_running_threads+1)
|
||||
allocate( V(i_start:i_end, k_start:k_end), T(k_start:k_end, a_start:a_end))
|
||||
!$OMP DO SCHEDULE(static,1)
|
||||
do b=b_start,d
|
||||
ik = 0
|
||||
do k=k_start,k_end
|
||||
@ -221,10 +208,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
U(a_start,b+1,b), size(U,1) )
|
||||
endif
|
||||
enddo
|
||||
deallocate(T,V)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(V2d)
|
||||
deallocate(T,V,V2d)
|
||||
enddo
|
||||
|
||||
idx = 0_8
|
||||
@ -253,19 +237,23 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
||||
deallocate(key,value)
|
||||
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
deallocate(U)
|
||||
|
||||
!$OMP ATOMIC
|
||||
n_running_threads = n_running_threads-1
|
||||
|
||||
!$OMP BARRIER
|
||||
!$OMP MASTER
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR), external :: task_done_to_taskserver, new_zmq_to_qp_run_socket
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
|
||||
stop 'Unable to send task done'
|
||||
endif
|
||||
call four_idx_push_results(zmq_socket_push, 0_8, 0.d0, 0, task_id)
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
!$OMP END MASTER
|
||||
call end_zmq_push_socket(zmq_socket_push)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
deallocate(l_pointer,a_array_ik,a_array_j,a_array_value)
|
||||
|
||||
end
|
||||
|
@ -62,13 +62,13 @@ subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, &
|
||||
call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'four_idx')
|
||||
|
||||
integer*8 :: new_size
|
||||
new_size = max(1024_8, 5_8 * map_a % n_elements )
|
||||
new_size = max(2048_8, 5_8 * map_a % n_elements )
|
||||
|
||||
integer :: npass
|
||||
integer*8 :: tempspace
|
||||
|
||||
tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
|
||||
npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 1024_8),4) ! 1 GiB of scratch space
|
||||
tempspace = (new_size * 16_8) / (2048_8 * 2048_8)
|
||||
npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 2048_8),4) ! 2 GiB of scratch space
|
||||
l_block = (l_end-l_start+1)/npass
|
||||
|
||||
! Create tasks
|
||||
@ -98,9 +98,11 @@ subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, &
|
||||
|
||||
PROVIDE nproc
|
||||
|
||||
integer :: ithread, sqnproc
|
||||
sqnproc = int(sqrt(float(nproc))+0.5)
|
||||
call omp_set_nested(.True.)
|
||||
integer :: ithread
|
||||
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
|
||||
call omp_set_dynamic(.True.)
|
||||
!$OMP PARALLEL NUM_THREADS(1+max(1,sqnproc)) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
if (ithread==0) then
|
||||
call four_idx_collector(zmq_socket_pull,map_c)
|
||||
@ -178,19 +180,13 @@ subroutine four_index_transform_slave(thread,worker_id)
|
||||
i_end , j_end , k_end , l_end_block , &
|
||||
a_start, b_start, c_start, d_start, &
|
||||
a_end , b_end , c_end , d_end, &
|
||||
task_id, thread)
|
||||
task_id, worker_id, thread)
|
||||
|
||||
integer, external :: task_done_to_taskserver
|
||||
if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
|
||||
print *, irp_here, ': Unable to send task_done'
|
||||
stop
|
||||
endif
|
||||
|
||||
enddo
|
||||
integer, external :: disconnect_from_taskserver
|
||||
if (disconnect_from_taskserver(zmq_to_qp_run_socket,thread) == -1) then
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
return
|
||||
continue
|
||||
endif
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
|
||||
@ -287,8 +283,6 @@ subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
|
||||
rc8 = f77_zmq_recv8( zmq_socket_pull, value, int(integral_kind*sze,8), 0)
|
||||
if(rc8 /= integral_kind*sze) stop 'four_idx_pull_results failed to pull value'
|
||||
|
||||
call map_update(map_c, key, value, sze, mo_integrals_threshold)
|
||||
|
||||
! Activate if zmq_socket_pull is a REP
|
||||
IRP_IF ZMQ_PUSH
|
||||
IRP_ELSE
|
||||
@ -299,6 +293,8 @@ subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
|
||||
endif
|
||||
IRP_ENDIF
|
||||
|
||||
call map_update(map_c, key, value, sze, mo_integrals_threshold)
|
||||
|
||||
deallocate(key, value)
|
||||
end
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user