10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-11 05:28:24 +01:00

Accelerating PT2 again

This commit is contained in:
Anthony Scemama 2022-03-08 23:43:29 +01:00
parent 6b118362df
commit 0c8f5e5f0b

View File

@ -172,8 +172,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
integer :: rc, i integer :: rc, i
integer :: worker_id, ctask, ltask integer :: worker_id, ctask, ltask
character*(512), allocatable :: task(:) character*(512) :: task
integer, allocatable :: task_id(:) integer :: task_id(1)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket
@ -184,16 +184,15 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
type(selection_buffer) :: b type(selection_buffer) :: b
logical :: done, buffer_ready logical :: done, buffer_ready
type(pt2_type), allocatable :: pt2_data(:) type(pt2_type) :: pt2_data
integer :: n_tasks, k, N integer :: n_tasks, k, N
integer, allocatable :: i_generator(:), subset(:) integer :: i_generator, subset
integer :: bsize ! Size of selection buffers integer :: bsize ! Size of selection buffers
logical :: sending logical :: sending
PROVIDE global_selection_buffer global_selection_buffer_lock PROVIDE global_selection_buffer global_selection_buffer_lock
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
integer, external :: connect_to_taskserver integer, external :: connect_to_taskserver
@ -212,9 +211,6 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
done = .False. done = .False.
do while (.not.done) do while (.not.done)
n_tasks = max(1,n_tasks)
n_tasks = min(pt2_n_tasks_max,n_tasks)
integer, external :: get_tasks_from_taskserver integer, external :: get_tasks_from_taskserver
if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
exit exit
@ -225,9 +221,11 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
endif endif
if (n_tasks == 0) exit if (n_tasks == 0) exit
do k=1,n_tasks call sscanf_ddd(task, subset, i_generator, N)
call sscanf_ddd(task(k), subset(k), i_generator(k), N) if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then
enddo print *, irp_here
stop 'bug in selection'
endif
if (b%N == 0) then if (b%N == 0) then
! Only first time ! Only first time
bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
@ -237,14 +235,9 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
ASSERT (b%N == bsize) ASSERT (b%N == bsize)
endif endif
double precision :: time0, time1 call pt2_alloc(pt2_data,N_states)
call wall_time(time0)
do k=1,n_tasks
call pt2_alloc(pt2_data(k),N_states)
b%cur = 0 b%cur = 0
call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
enddo
call wall_time(time1)
integer, external :: tasks_done_to_taskserver integer, external :: tasks_done_to_taskserver
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
@ -256,7 +249,7 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
call merge_selection_buffers(b,global_selection_buffer) call merge_selection_buffers(b,global_selection_buffer)
b%cur=0 b%cur=0
call omp_unset_lock(global_selection_buffer_lock) call omp_unset_lock(global_selection_buffer_lock)
if ( iproc == 1 .or. i_generator(1) < 100 .or. done) then if ( iproc == 1 .or. i_generator < 100 .or. done) then
call omp_set_lock(global_selection_buffer_lock) call omp_set_lock(global_selection_buffer_lock)
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
@ -267,13 +260,7 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending)
endif endif
do k=1,n_tasks call pt2_dealloc(pt2_data)
call pt2_dealloc(pt2_data(k))
enddo
b%cur=0
! ! Try to adjust n_tasks at least 5 seconds per task
n_tasks = min(2*n_tasks,int( dble(5*n_tasks) / (time1 - time0 + 1.d0)))
n_tasks = min(n_tasks, pt2_n_tasks_max)
end do end do
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)