diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index 731e40ac..f9171a42 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -31,11 +31,11 @@ subroutine run_pt2_slave(thread,iproc,energy) double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: thread, iproc -! if (N_det > 100000 ) then -! call run_pt2_slave_large(thread,iproc,energy) -! else + if (N_det > 100000 ) then + call run_pt2_slave_large(thread,iproc,energy) + else call run_pt2_slave_small(thread,iproc,energy) -! endif + endif end subroutine run_pt2_slave_small(thread,iproc,energy) @@ -116,10 +116,10 @@ subroutine run_pt2_slave_small(thread,iproc,energy) do k=1,n_tasks call pt2_alloc(pt2_data(k),N_states) b%cur = 0 - double precision :: time2 - call wall_time(time2) +! double precision :: time2 +! call wall_time(time2) call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) - call wall_time(time1) +! call wall_time(time1) ! print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) enddo call wall_time(time1) @@ -172,8 +172,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy) integer :: rc, i integer :: worker_id, ctask, ltask - character*(512) :: task - integer :: task_id(1) + character*(512), allocatable :: task(:) + integer, allocatable :: task_id(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -184,15 +184,16 @@ subroutine run_pt2_slave_large(thread,iproc,energy) type(selection_buffer) :: b logical :: done, buffer_ready - type(pt2_type) :: pt2_data + type(pt2_type), allocatable :: pt2_data(:) integer :: n_tasks, k, N - integer :: i_generator, subset - + integer, allocatable :: i_generator(:), subset(:) integer :: bsize ! Size of selection buffers logical :: sending PROVIDE global_selection_buffer global_selection_buffer_lock + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() integer, external :: connect_to_taskserver @@ -211,6 +212,9 @@ subroutine run_pt2_slave_large(thread,iproc,energy) done = .False. do while (.not.done) + n_tasks = max(1,n_tasks) + n_tasks = min(pt2_n_tasks_max,n_tasks) + integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then exit @@ -221,11 +225,9 @@ subroutine run_pt2_slave_large(thread,iproc,energy) endif if (n_tasks == 0) exit - call sscanf_ddd(task, subset, i_generator, N) - if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then - print *, irp_here - stop 'bug in selection' - endif + do k=1,n_tasks + call sscanf_ddd(task(k), subset(k), i_generator(k), N) + enddo if (b%N == 0) then ! Only first time bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) @@ -235,9 +237,14 @@ subroutine run_pt2_slave_large(thread,iproc,energy) ASSERT (b%N == bsize) endif - call pt2_alloc(pt2_data,N_states) - b%cur = 0 - call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator)) + double precision :: time0, time1 + call wall_time(time0) + do k=1,n_tasks + call pt2_alloc(pt2_data(k),N_states) + b%cur = 0 + call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) + enddo + call wall_time(time1) integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then @@ -249,7 +256,7 @@ subroutine run_pt2_slave_large(thread,iproc,energy) call merge_selection_buffers(b,global_selection_buffer) b%cur=0 call omp_unset_lock(global_selection_buffer_lock) - if ( iproc == 1 .or. i_generator < 100 .or. done) then + if ( iproc == 1 .or. i_generator(1) < 100 .or. done) then call omp_set_lock(global_selection_buffer_lock) call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) @@ -260,7 +267,13 @@ subroutine run_pt2_slave_large(thread,iproc,energy) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending) endif - call pt2_dealloc(pt2_data) + do k=1,n_tasks + call pt2_dealloc(pt2_data(k)) + enddo + b%cur=0 +! ! Try to adjust n_tasks at least 5 seconds per task + n_tasks = min(2*n_tasks,int( dble(5*n_tasks) / (time1 - time0 + 1.d0))) + n_tasks = min(n_tasks, pt2_n_tasks_max) end do call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)