Distributed only when Ndet > 100 000

This commit is contained in:
Anthony Scemama 2019-02-04 18:21:21 +01:00
parent ba3c3ab7e1
commit acab743773
5 changed files with 189 additions and 42 deletions

View File

@ -23,7 +23,22 @@ BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ]
call omp_unset_lock(global_selection_buffer_lock)
END_PROVIDER
subroutine run_pt2_slave(thread,iproc,energy)
use selection_types
use f77_zmq
implicit none
double precision, intent(in) :: energy(N_states_diag)
integer, intent(in) :: thread, iproc
if (N_det > 1000000) then
call run_pt2_slave_large(thread,iproc,energy)
else
call run_pt2_slave_small(thread,iproc,energy)
endif
end
subroutine run_pt2_slave_small(thread,iproc,energy)
use selection_types
use f77_zmq
implicit none
@ -49,15 +64,132 @@ subroutine run_pt2_slave(thread,iproc,energy)
integer :: n_tasks, k, N
integer, allocatable :: i_generator(:), subset(:)
double precision :: rss
double precision, external :: memory_of_double, memory_of_int
integer :: bsize ! Size of selection buffers
! logical :: sending
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
allocate(variance(N_states,pt2_n_tasks_max))
allocate(norm(N_states,pt2_n_tasks_max))
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
integer, external :: connect_to_taskserver
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
return
endif
zmq_socket_push = new_zmq_push_socket(thread)
b%N = 0
buffer_ready = .False.
n_tasks = 1
! sending = .False.
done = .False.
do while (.not.done)
n_tasks = max(1,n_tasks)
n_tasks = min(pt2_n_tasks_max,n_tasks)
integer, external :: get_tasks_from_taskserver
if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
exit
endif
done = task_id(n_tasks) == 0
if (done) then
n_tasks = n_tasks-1
endif
if (n_tasks == 0) exit
do k=1,n_tasks
read (task(k),*) subset(k), i_generator(k), N
enddo
if (b%N == 0) then
! Only first time
bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
call create_selection_buffer(bsize, bsize*2, b)
buffer_ready = .True.
else
ASSERT (N == b%N)
endif
double precision :: time0, time1
call wall_time(time0)
do k=1,n_tasks
pt2(:,k) = 0.d0
variance(:,k) = 0.d0
norm(:,k) = 0.d0
b%cur = 0
!double precision :: time2
!call wall_time(time2)
call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k)))
!call wall_time(time1)
!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
enddo
call wall_time(time1)
!print *, '-->', i_generator(1), time1-time0, n_tasks
integer, external :: tasks_done_to_taskserver
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
done = .true.
endif
call sort_selection_buffer(b)
call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks)
b%cur=0
! ! Try to adjust n_tasks around nproc/2 seconds per job
! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
n_tasks = 1
end do
integer, external :: disconnect_from_taskserver
do i=1,300
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
call sleep(1)
print *, 'Retry disconnect...'
end do
call end_zmq_push_socket(zmq_socket_push,thread)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
if (buffer_ready) then
call delete_selection_buffer(b)
endif
end subroutine
subroutine run_pt2_slave_large(thread,iproc,energy)
use selection_types
use f77_zmq
implicit none
double precision, intent(in) :: energy(N_states_diag)
integer, intent(in) :: thread, iproc
integer :: rc, i
integer :: worker_id, ctask, ltask
character*(512), allocatable :: task(:)
integer, allocatable :: task_id(:)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_push_socket
integer(ZMQ_PTR) :: zmq_socket_push
type(selection_buffer) :: b
logical :: done, buffer_ready
double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:)
integer :: n_tasks, k, N
integer, allocatable :: i_generator(:), subset(:)
integer :: bsize ! Size of selection buffers
logical :: sending
PROVIDE global_selection_buffer global_selection_buffer_lock
rss = memory_of_int(pt2_n_tasks_max)*67.d0
rss += memory_of_double(pt2_n_tasks_max)*(N_states*3)
call check_mem(rss,irp_here)
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))

View File

@ -232,6 +232,7 @@ subroutine run_slave_main
endif
IRP_ENDIF
IRP_IF MPI_DEBUG
call mpi_print('Entering OpenMP section')
IRP_ENDIF
@ -251,7 +252,7 @@ subroutine run_slave_main
+ 64.d0*pt2_n_tasks_max & ! task
+ 3.d0*pt2_n_tasks_max*N_states & ! pt2, variance, norm
+ 1.d0*pt2_n_tasks_max & ! i_generator, subset
+ 2.d0*(N_int*2.d0*ii+ ii) & ! selection buffer
+ 3.d0*(N_int*2.d0*ii+ ii) & ! selection buffer
+ 1.d0*(N_int*2.d0*ii+ ii) & ! sort selection buffer
+ 2.0d0*(ii) & ! preinteresting, interesting,
! prefullinteresting, fullinteresting
@ -272,36 +273,36 @@ subroutine run_slave_main
nproc_target = nproc_target - 1
enddo
if (N_det > 100000) then
if (mpi_master) then
print *, 'N_det', N_det
print *, 'N_det_generators', N_det_generators
print *, 'N_det_selectors', N_det_selectors
print *, 'pt2_e0_denominator', pt2_e0_denominator
print *, 'pt2_stoch_istate', pt2_stoch_istate
print *, 'state_average_weight', state_average_weight
print *, 'Number of threads', nproc_target
endif
if (.true.) then
PROVIDE psi_det_hii
if (mpi_master) then
print *, 'N_det', N_det
print *, 'N_det_generators', N_det_generators
print *, 'N_det_selectors', N_det_selectors
print *, 'pt2_e0_denominator', pt2_e0_denominator
print *, 'pt2_stoch_istate', pt2_stoch_istate
print *, 'state_average_weight', state_average_weight
print *, 'Number of threads', nproc_target
endif
if (h0_type == 'SOP') then
PROVIDE psi_occ_pattern_hii det_to_occ_pattern
PROVIDE det_to_occ_pattern
endif
endif
PROVIDE global_selection_buffer
if (mpi_master) then
print *, 'Running PT2'
PROVIDE global_selection_buffer
if (mpi_master) then
print *, 'Running PT2'
endif
!$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
i = omp_get_thread_num()
call run_pt2_slave(0,i,pt2_e0_denominator)
!$OMP END PARALLEL
FREE state_average_weight
print *, mpi_rank, ': PT2 done'
print *, '-------'
endif
!$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
i = omp_get_thread_num()
call run_pt2_slave(0,i,pt2_e0_denominator)
!$OMP END PARALLEL
FREE state_average_weight
print *, mpi_rank, ': PT2 done'
print *, '-------'
endif
IRP_IF MPI

View File

@ -41,13 +41,27 @@ subroutine u_0_H_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze)
double precision, allocatable :: v_0(:,:), s_vec(:,:), u_1(:,:)
double precision :: u_dot_u,u_dot_v,diag_H_mat_elem
integer :: i,j
integer :: i,j, istate
if ((n > 100000).and.distributed_davidson) then
allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag))
u_1(:,:) = 0.d0
u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
call H_S2_u_0_nstates_zmq(v_0,s_vec,u_1,N_states_diag,n)
else if (n < n_det_max_full) then
allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st))
v_0(:,:) = 0.d0
u_1(:,:) = 0.d0
s_vec(:,:) = 0.d0
u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
do istate = 1,N_st
do j=1,n
do i=1,n
v_0(i,istate) = h_matrix_all_dets(i,j) * u_0(j,istate)
s_vec(i,istate) = S2_matrix_all_dets(i,j) * u_0(j,istate)
enddo
enddo
enddo
else
allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st))
u_1(:,:) = 0.d0

View File

@ -1775,7 +1775,17 @@ subroutine ac_operator(iorb,ispin,key,hjj,Nint,na,nb)
integer :: other_spin
integer :: k,l,i
ASSERT (iorb > 0)
if (iorb < 1) then
print *, irp_here, 'iorb < 1'
print *, iorb, mo_num
stop -1
endif
if (iorb > mo_num) then
print *, irp_here, 'iorb > mo_num'
print *, iorb, mo_num
stop -1
endif
ASSERT (ispin > 0)
ASSERT (ispin < 3)
ASSERT (Nint > 0)
@ -1793,16 +1803,6 @@ subroutine ac_operator(iorb,ispin,key,hjj,Nint,na,nb)
key(k,ispin) = ibset(key(k,ispin),l)
other_spin = iand(ispin,1)+1
if (iorb < 1) then
print *, irp_here, 'iorb < 1'
print *, iorb, mo_num
stop -1
endif
if (iorb > mo_num) then
print *, irp_here, 'iorb > mo_num'
print *, iorb, mo_num
stop -1
endif
hjj = hjj + mo_one_e_integrals(iorb,iorb)
! Same spin

View File

@ -603,12 +603,12 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,name_in)
do i=3600,1,-1
rc = f77_zmq_send(zmq_to_qp_run_socket, 'end_job '//trim(zmq_state),8+len(trim(zmq_state)),0)
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 512, 0)
call sleep(1)
if (trim(message(1:13)) == 'error waiting') then
cycle
else if (message(1:2) == 'ok') then
exit
endif
call sleep(1)
end do
if (i==0) then
print *, '.. Forcing kill ..'