mirror of
https://github.com/LCPQ/quantum_package
synced 2024-09-12 22:28:30 +02:00
Load balancing in 4idx
This commit is contained in:
parent
31b3b7dd79
commit
af229e7028
@ -2,7 +2,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
i_start, j_start, k_start, l_start, &
|
i_start, j_start, k_start, l_start, &
|
||||||
i_end , j_end , k_end , l_end , &
|
i_end , j_end , k_end , l_end , &
|
||||||
a_start, b_start, c_start, d_start, &
|
a_start, b_start, c_start, d_start, &
|
||||||
a_end , b_end , c_end , d_end, task_id, thread )
|
a_end , b_end , c_end , d_end, task_id, worker_id, thread )
|
||||||
implicit none
|
implicit none
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
use map_module
|
use map_module
|
||||||
@ -19,7 +19,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
integer, intent(in) :: i_end , j_end , k_end , l_end
|
integer, intent(in) :: i_end , j_end , k_end , l_end
|
||||||
integer, intent(in) :: a_start, b_start, c_start, d_start
|
integer, intent(in) :: a_start, b_start, c_start, d_start
|
||||||
integer, intent(in) :: a_end , b_end , c_end , d_end
|
integer, intent(in) :: a_end , b_end , c_end , d_end
|
||||||
integer, intent(in) :: task_id, thread
|
integer, intent(in) :: task_id, thread, worker_id
|
||||||
|
|
||||||
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
|
double precision, allocatable :: T(:,:), U(:,:,:), V(:,:)
|
||||||
double precision, allocatable :: T2d(:,:), V2d(:,:)
|
double precision, allocatable :: T2d(:,:), V2d(:,:)
|
||||||
@ -63,13 +63,13 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
double precision, allocatable :: a_array_value(:)
|
double precision, allocatable :: a_array_value(:)
|
||||||
|
|
||||||
integer*8 :: new_size
|
integer*8 :: new_size
|
||||||
new_size = max(1024_8, 5_8 * map_a % n_elements )
|
new_size = max(2048_8, 5_8 * map_a % n_elements )
|
||||||
|
|
||||||
integer*8 :: tempspace
|
integer*8 :: tempspace
|
||||||
integer :: npass, l_block
|
integer :: npass, l_block
|
||||||
|
|
||||||
tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
|
tempspace = (new_size * 16_8) / (2048_8 * 2048_8)
|
||||||
npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 1024_8),4) ! 1 GiB of scratch space
|
npass = int(min(1_8+int(l_end-l_start,8),1_8 + tempspace / 2048_8),4) ! 2 GiB of scratch space
|
||||||
l_block = (l_end-l_start+1)/npass
|
l_block = (l_end-l_start+1)/npass
|
||||||
|
|
||||||
allocate(a_array_ik(new_size/npass), a_array_j(new_size/npass), a_array_value(new_size/npass))
|
allocate(a_array_ik(new_size/npass), a_array_j(new_size/npass), a_array_value(new_size/npass))
|
||||||
@ -133,16 +133,12 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
!open(unit=10,file='OUTPUT',form='FORMATTED')
|
!open(unit=10,file='OUTPUT',form='FORMATTED')
|
||||||
! END INPUT DATA
|
! END INPUT DATA
|
||||||
|
|
||||||
PROVIDE nproc
|
|
||||||
integer :: n_running_threads
|
|
||||||
n_running_threads = 0
|
|
||||||
call omp_set_nested(.true.)
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
||||||
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
|
!$OMP a_start,a_end,b_start,b_end,c_start,c_end,d_start,d_end,&
|
||||||
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
|
!$OMP i_start,i_end,j_start,j_end,k_start,k_end,l_start,l_end,&
|
||||||
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
|
!$OMP i_min,i_max,j_min,j_max,k_min,k_max,l_min,l_max, &
|
||||||
!$OMP matrix_B,l_pointer,thread,task_id,n_running_threads,nproc) &
|
!$OMP matrix_B,l_pointer,thread,task_id,worker_id) &
|
||||||
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll, &
|
!$OMP PRIVATE(key,value,T,U,V,i,j,k,l,idx,ik,ll,zmq_to_qp_run_socket, &
|
||||||
!$OMP a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push)
|
!$OMP a,b,c,d,p,q,tmp,T2d,V2d,ii,zmq_socket_push)
|
||||||
|
|
||||||
integer(ZMQ_PTR) :: zmq_socket_push
|
integer(ZMQ_PTR) :: zmq_socket_push
|
||||||
@ -153,9 +149,6 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
|
|
||||||
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
|
allocate( U(a_start:a_end, c_start:c_end, b_start:b_end) )
|
||||||
|
|
||||||
!$OMP ATOMIC
|
|
||||||
n_running_threads = n_running_threads+1
|
|
||||||
|
|
||||||
!$OMP DO SCHEDULE(dynamic,1)
|
!$OMP DO SCHEDULE(dynamic,1)
|
||||||
do d=d_start,d_end
|
do d=d_start,d_end
|
||||||
U = 0.d0
|
U = 0.d0
|
||||||
@ -185,13 +178,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
V2d(1,b_start), size(V2d,1) )
|
V2d(1,b_start), size(V2d,1) )
|
||||||
deallocate(T2d)
|
deallocate(T2d)
|
||||||
|
|
||||||
!$OMP FLUSH(n_running_threads)
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) SHARED(a_array_ik,a_array_j,a_array_value, &
|
|
||||||
!$OMP a_start,b_start,b_end,c_start,c_end,i_start,k_start,k_end, &
|
|
||||||
!$OMP matrix_B,U,l,d,V2d,i_end,a_end,n_running_threads) &
|
|
||||||
!$OMP PRIVATE(T,V,i,k,ik) NUM_THREADS(nproc-n_running_threads+1)
|
|
||||||
allocate( V(i_start:i_end, k_start:k_end), T(k_start:k_end, a_start:a_end))
|
allocate( V(i_start:i_end, k_start:k_end), T(k_start:k_end, a_start:a_end))
|
||||||
!$OMP DO SCHEDULE(static,1)
|
|
||||||
do b=b_start,d
|
do b=b_start,d
|
||||||
ik = 0
|
ik = 0
|
||||||
do k=k_start,k_end
|
do k=k_start,k_end
|
||||||
@ -221,10 +208,7 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
U(a_start,b+1,b), size(U,1) )
|
U(a_start,b+1,b), size(U,1) )
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
deallocate(T,V)
|
deallocate(T,V,V2d)
|
||||||
!$OMP END PARALLEL
|
|
||||||
|
|
||||||
deallocate(V2d)
|
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
idx = 0_8
|
idx = 0_8
|
||||||
@ -253,19 +237,23 @@ subroutine four_index_transform_slave_work(map_a,matrix_B,LDB, &
|
|||||||
deallocate(key,value)
|
deallocate(key,value)
|
||||||
|
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO NOWAIT
|
|
||||||
deallocate(U)
|
deallocate(U)
|
||||||
|
|
||||||
!$OMP ATOMIC
|
|
||||||
n_running_threads = n_running_threads-1
|
|
||||||
|
|
||||||
!$OMP BARRIER
|
!$OMP BARRIER
|
||||||
!$OMP MASTER
|
!$OMP MASTER
|
||||||
|
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||||
|
integer(ZMQ_PTR), external :: task_done_to_taskserver, new_zmq_to_qp_run_socket
|
||||||
|
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||||
|
if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
|
||||||
|
stop 'Unable to send task done'
|
||||||
|
endif
|
||||||
call four_idx_push_results(zmq_socket_push, 0_8, 0.d0, 0, task_id)
|
call four_idx_push_results(zmq_socket_push, 0_8, 0.d0, 0, task_id)
|
||||||
|
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||||
!$OMP END MASTER
|
!$OMP END MASTER
|
||||||
call end_zmq_push_socket(zmq_socket_push)
|
call end_zmq_push_socket(zmq_socket_push)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
|
||||||
deallocate(l_pointer,a_array_ik,a_array_j,a_array_value)
|
deallocate(l_pointer,a_array_ik,a_array_j,a_array_value)
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -62,13 +62,13 @@ subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, &
|
|||||||
call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'four_idx')
|
call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'four_idx')
|
||||||
|
|
||||||
integer*8 :: new_size
|
integer*8 :: new_size
|
||||||
new_size = max(1024_8, 5_8 * map_a % n_elements )
|
new_size = max(2048_8, 5_8 * map_a % n_elements )
|
||||||
|
|
||||||
integer :: npass
|
integer :: npass
|
||||||
integer*8 :: tempspace
|
integer*8 :: tempspace
|
||||||
|
|
||||||
tempspace = (new_size * 16_8) / (1024_8 * 1024_8)
|
tempspace = (new_size * 16_8) / (2048_8 * 2048_8)
|
||||||
npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 1024_8),4) ! 1 GiB of scratch space
|
npass = int(min(int(l_end-l_start,8),1_8 + tempspace / 2048_8),4) ! 2 GiB of scratch space
|
||||||
l_block = (l_end-l_start+1)/npass
|
l_block = (l_end-l_start+1)/npass
|
||||||
|
|
||||||
! Create tasks
|
! Create tasks
|
||||||
@ -98,9 +98,11 @@ subroutine four_index_transform_zmq(map_a,map_c,matrix_B,LDB, &
|
|||||||
|
|
||||||
PROVIDE nproc
|
PROVIDE nproc
|
||||||
|
|
||||||
|
integer :: ithread, sqnproc
|
||||||
|
sqnproc = int(sqrt(float(nproc))+0.5)
|
||||||
call omp_set_nested(.True.)
|
call omp_set_nested(.True.)
|
||||||
integer :: ithread
|
call omp_set_dynamic(.True.)
|
||||||
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
|
!$OMP PARALLEL NUM_THREADS(1+max(1,sqnproc)) PRIVATE(ithread)
|
||||||
ithread = omp_get_thread_num()
|
ithread = omp_get_thread_num()
|
||||||
if (ithread==0) then
|
if (ithread==0) then
|
||||||
call four_idx_collector(zmq_socket_pull,map_c)
|
call four_idx_collector(zmq_socket_pull,map_c)
|
||||||
@ -178,19 +180,13 @@ subroutine four_index_transform_slave(thread,worker_id)
|
|||||||
i_end , j_end , k_end , l_end_block , &
|
i_end , j_end , k_end , l_end_block , &
|
||||||
a_start, b_start, c_start, d_start, &
|
a_start, b_start, c_start, d_start, &
|
||||||
a_end , b_end , c_end , d_end, &
|
a_end , b_end , c_end , d_end, &
|
||||||
task_id, thread)
|
task_id, worker_id, thread)
|
||||||
|
|
||||||
integer, external :: task_done_to_taskserver
|
|
||||||
if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
|
|
||||||
print *, irp_here, ': Unable to send task_done'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
enddo
|
enddo
|
||||||
integer, external :: disconnect_from_taskserver
|
integer, external :: disconnect_from_taskserver
|
||||||
if (disconnect_from_taskserver(zmq_to_qp_run_socket,thread) == -1) then
|
if (disconnect_from_taskserver(zmq_to_qp_run_socket,thread) == -1) then
|
||||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
continue
|
||||||
return
|
|
||||||
endif
|
endif
|
||||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||||
|
|
||||||
@ -287,8 +283,6 @@ subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
|
|||||||
rc8 = f77_zmq_recv8( zmq_socket_pull, value, int(integral_kind*sze,8), 0)
|
rc8 = f77_zmq_recv8( zmq_socket_pull, value, int(integral_kind*sze,8), 0)
|
||||||
if(rc8 /= integral_kind*sze) stop 'four_idx_pull_results failed to pull value'
|
if(rc8 /= integral_kind*sze) stop 'four_idx_pull_results failed to pull value'
|
||||||
|
|
||||||
call map_update(map_c, key, value, sze, mo_integrals_threshold)
|
|
||||||
|
|
||||||
! Activate if zmq_socket_pull is a REP
|
! Activate if zmq_socket_pull is a REP
|
||||||
IRP_IF ZMQ_PUSH
|
IRP_IF ZMQ_PUSH
|
||||||
IRP_ELSE
|
IRP_ELSE
|
||||||
@ -299,6 +293,8 @@ subroutine four_idx_pull_results(zmq_socket_pull, map_c, task_id)
|
|||||||
endif
|
endif
|
||||||
IRP_ENDIF
|
IRP_ENDIF
|
||||||
|
|
||||||
|
call map_update(map_c, key, value, sze, mo_integrals_threshold)
|
||||||
|
|
||||||
deallocate(key, value)
|
deallocate(key, value)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user