From e0ce8d026bb5b06356e333e58052296e7fcea01c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 27 Apr 2021 00:35:18 +0200 Subject: [PATCH] Fixed distributed PT2 --- src/cipsi/pt2_stoch_routines.irp.f | 3 +-- src/cipsi/run_pt2_slave.irp.f | 28 +++++++++++++++------------- src/cipsi/slave_cipsi.irp.f | 13 ++++++++++--- src/fci/fci.irp.f | 2 +- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index a0107513..7554c39e 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -15,7 +15,7 @@ END_PROVIDER pt2_n_tasks_max = min(pt2_n_tasks_max,1+N_det_generators/10000) call write_int(6,pt2_n_tasks_max,'pt2_n_tasks_max') - pt2_F(:) = int(sqrt(float(pt2_n_tasks_max))) + pt2_F(:) = max(int(sqrt(float(pt2_n_tasks_max))),1) do i=1,pt2_n_0(1+pt2_N_teeth/4) pt2_F(i) = pt2_n_tasks_max*pt2_min_parallel_tasks enddo @@ -26,7 +26,6 @@ END_PROVIDER pt2_F(i) = 1 enddo - END_PROVIDER BEGIN_PROVIDER [ integer, pt2_N_teeth ] diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index ca55871c..5f7d5073 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -183,9 +183,9 @@ subroutine run_pt2_slave_large(thread,iproc,energy) type(selection_buffer) :: b logical :: done, buffer_ready - type(pt2_type) :: pt2_data(1) + type(pt2_type) :: pt2_data integer :: n_tasks, k, N - integer :: i_generator(1), subset + integer :: i_generator, subset integer :: bsize ! Size of selection buffers logical :: sending @@ -220,7 +220,14 @@ subroutine run_pt2_slave_large(thread,iproc,energy) endif if (n_tasks == 0) exit - call sscanf_ddd(task, subset, i_generator(1), N) + call sscanf_ddd(task, subset, i_generator, N) +!$OMP CRITICAL +print *, ':task:', subset, i_generator, N, pt2_F(i_generator) +!$OMP END CRITICAL + if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then + print *, irp_here + stop 'bug in selection' + endif if (b%N == 0) then ! Only first time bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) @@ -232,15 +239,10 @@ subroutine run_pt2_slave_large(thread,iproc,energy) double precision :: time0, time1 call wall_time(time0) - call pt2_alloc(pt2_data(1),N_states) + call pt2_alloc(pt2_data,N_states) b%cur = 0 -!double precision :: time2 -!call wall_time(time2) - call select_connected(i_generator(1),energy,pt2_data(1),b,subset,pt2_F(i_generator(1))) -!call wall_time(time1) -!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) + call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator)) call wall_time(time1) -!print *, '-->', i_generator(1), time1-time0, n_tasks integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then @@ -255,14 +257,14 @@ subroutine run_pt2_slave_large(thread,iproc,energy) call omp_unset_lock(global_selection_buffer_lock) if ( iproc == 1 ) then call omp_set_lock(global_selection_buffer_lock) - call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, global_selection_buffer, task_id, n_tasks,sending) + call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, global_selection_buffer, task_id, 1,sending) global_selection_buffer%cur = 0 call omp_unset_lock(global_selection_buffer_lock) else - call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks,sending) + call push_pt2_results_async_send(zmq_socket_push, i_generator, pt2_data, b, task_id, 1,sending) endif - call pt2_dealloc(pt2_data(1)) + call pt2_dealloc(pt2_data) end do call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) diff --git a/src/cipsi/slave_cipsi.irp.f b/src/cipsi/slave_cipsi.irp.f index 2c612986..510c667b 100644 --- a/src/cipsi/slave_cipsi.irp.f +++ b/src/cipsi/slave_cipsi.irp.f @@ -122,7 +122,7 @@ subroutine run_slave_main IRP_ENDIF if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle pt2_e0_denominator(1:N_states) = energy(1:N_states) - TOUCH pt2_e0_denominator state_average_weight threshold_generators selection_weight + TOUCH pt2_e0_denominator state_average_weight threshold_generators selection_weight psi_det psi_coef if (mpi_master) then print *, 'N_det', N_det @@ -230,7 +230,8 @@ subroutine run_slave_main IRP_ENDIF if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle pt2_e0_denominator(1:N_states) = energy(1:N_states) - SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators selection_weight + SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators selection_weight psi_det psi_coef N_det_generators N_det_selectors + call wall_time(t1) call write_double(6,(t1-t0),'Broadcast time') @@ -300,7 +301,13 @@ subroutine run_slave_main PROVIDE det_to_configuration endif - PROVIDE global_selection_buffer + PROVIDE global_selection_buffer pt2_N_teeth pt2_F N_det_generators + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted + PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks + if (mpi_master) then print *, 'Running PT2' endif diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f index 5c747081..9d9c0b7d 100644 --- a/src/fci/fci.irp.f +++ b/src/fci/fci.irp.f @@ -46,7 +46,7 @@ program fci endif else - PROVIDE mo_two_e_integrals_in_map + PROVIDE mo_two_e_integrals_in_map pt2_min_parallel_tasks call run_slave_cipsi