diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index a32a4172..ff048d95 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -23,7 +23,22 @@ BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ] call omp_unset_lock(global_selection_buffer_lock) END_PROVIDER + subroutine run_pt2_slave(thread,iproc,energy) + use selection_types + use f77_zmq + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + if (N_det > 1000000) then + call run_pt2_slave_large(thread,iproc,energy) + else + call run_pt2_slave_small(thread,iproc,energy) + endif +end + +subroutine run_pt2_slave_small(thread,iproc,energy) use selection_types use f77_zmq implicit none @@ -49,15 +64,132 @@ subroutine run_pt2_slave(thread,iproc,energy) integer :: n_tasks, k, N integer, allocatable :: i_generator(:), subset(:) - double precision :: rss double precision, external :: memory_of_double, memory_of_int integer :: bsize ! Size of selection buffers +! logical :: sending + + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) + allocate(variance(N_states,pt2_n_tasks_max)) + allocate(norm(N_states,pt2_n_tasks_max)) + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + integer, external :: connect_to_taskserver + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + b%N = 0 + buffer_ready = .False. + n_tasks = 1 + +! sending = .False. + done = .False. + do while (.not.done) + + n_tasks = max(1,n_tasks) + n_tasks = min(pt2_n_tasks_max,n_tasks) + + integer, external :: get_tasks_from_taskserver + if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then + exit + endif + done = task_id(n_tasks) == 0 + if (done) then + n_tasks = n_tasks-1 + endif + if (n_tasks == 0) exit + + do k=1,n_tasks + read (task(k),*) subset(k), i_generator(k), N + enddo + if (b%N == 0) then + ! Only first time + bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2) + call create_selection_buffer(bsize, bsize*2, b) + buffer_ready = .True. + else + ASSERT (N == b%N) + endif + + double precision :: time0, time1 + call wall_time(time0) + do k=1,n_tasks + pt2(:,k) = 0.d0 + variance(:,k) = 0.d0 + norm(:,k) = 0.d0 + b%cur = 0 +!double precision :: time2 +!call wall_time(time2) + call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),b,subset(k),pt2_F(i_generator(k))) +!call wall_time(time1) +!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) + enddo + call wall_time(time1) +!print *, '-->', i_generator(1), time1-time0, n_tasks + + integer, external :: tasks_done_to_taskserver + if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then + done = .true. + endif + call sort_selection_buffer(b) + call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, b, task_id, n_tasks) + b%cur=0 + +! ! Try to adjust n_tasks around nproc/2 seconds per job +! n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0))) + n_tasks = 1 + end do + + integer, external :: disconnect_from_taskserver + do i=1,300 + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit + call sleep(1) + print *, 'Retry disconnect...' + end do + + call end_zmq_push_socket(zmq_socket_push,thread) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + if (buffer_ready) then + call delete_selection_buffer(b) + endif +end subroutine + + +subroutine run_pt2_slave_large(thread,iproc,energy) + use selection_types + use f77_zmq + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i + + integer :: worker_id, ctask, ltask + character*(512), allocatable :: task(:) + integer, allocatable :: task_id(:) + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + type(selection_buffer) :: b + logical :: done, buffer_ready + + double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:) + integer :: n_tasks, k, N + integer, allocatable :: i_generator(:), subset(:) + + integer :: bsize ! Size of selection buffers logical :: sending PROVIDE global_selection_buffer global_selection_buffer_lock - rss = memory_of_int(pt2_n_tasks_max)*67.d0 - rss += memory_of_double(pt2_n_tasks_max)*(N_states*3) - call check_mem(rss,irp_here) allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) diff --git a/src/cipsi/slave_cipsi.irp.f b/src/cipsi/slave_cipsi.irp.f index 034adc6f..58b53f94 100644 --- a/src/cipsi/slave_cipsi.irp.f +++ b/src/cipsi/slave_cipsi.irp.f @@ -232,6 +232,7 @@ subroutine run_slave_main endif IRP_ENDIF + IRP_IF MPI_DEBUG call mpi_print('Entering OpenMP section') IRP_ENDIF @@ -251,7 +252,7 @@ subroutine run_slave_main + 64.d0*pt2_n_tasks_max & ! task + 3.d0*pt2_n_tasks_max*N_states & ! pt2, variance, norm + 1.d0*pt2_n_tasks_max & ! i_generator, subset - + 2.d0*(N_int*2.d0*ii+ ii) & ! selection buffer + + 3.d0*(N_int*2.d0*ii+ ii) & ! selection buffer + 1.d0*(N_int*2.d0*ii+ ii) & ! sort selection buffer + 2.0d0*(ii) & ! preinteresting, interesting, ! prefullinteresting, fullinteresting @@ -272,36 +273,36 @@ subroutine run_slave_main nproc_target = nproc_target - 1 enddo + + if (N_det > 100000) then - if (mpi_master) then - print *, 'N_det', N_det - print *, 'N_det_generators', N_det_generators - print *, 'N_det_selectors', N_det_selectors - print *, 'pt2_e0_denominator', pt2_e0_denominator - print *, 'pt2_stoch_istate', pt2_stoch_istate - print *, 'state_average_weight', state_average_weight - print *, 'Number of threads', nproc_target - endif - - if (.true.) then - PROVIDE psi_det_hii + if (mpi_master) then + print *, 'N_det', N_det + print *, 'N_det_generators', N_det_generators + print *, 'N_det_selectors', N_det_selectors + print *, 'pt2_e0_denominator', pt2_e0_denominator + print *, 'pt2_stoch_istate', pt2_stoch_istate + print *, 'state_average_weight', state_average_weight + print *, 'Number of threads', nproc_target + endif if (h0_type == 'SOP') then - PROVIDE psi_occ_pattern_hii det_to_occ_pattern + PROVIDE det_to_occ_pattern endif - endif - PROVIDE global_selection_buffer - if (mpi_master) then - print *, 'Running PT2' + PROVIDE global_selection_buffer + if (mpi_master) then + print *, 'Running PT2' + endif + !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1) + i = omp_get_thread_num() + call run_pt2_slave(0,i,pt2_e0_denominator) + !$OMP END PARALLEL + FREE state_average_weight + print *, mpi_rank, ': PT2 done' + print *, '-------' + endif - !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1) - i = omp_get_thread_num() - call run_pt2_slave(0,i,pt2_e0_denominator) - !$OMP END PARALLEL - FREE state_average_weight - print *, mpi_rank, ': PT2 done' - print *, '-------' endif IRP_IF MPI diff --git a/src/davidson/u0_h_u0.irp.f b/src/davidson/u0_h_u0.irp.f index c55cb631..cec05a9d 100644 --- a/src/davidson/u0_h_u0.irp.f +++ b/src/davidson/u0_h_u0.irp.f @@ -41,13 +41,27 @@ subroutine u_0_H_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze) double precision, allocatable :: v_0(:,:), s_vec(:,:), u_1(:,:) double precision :: u_dot_u,u_dot_v,diag_H_mat_elem - integer :: i,j + integer :: i,j, istate if ((n > 100000).and.distributed_davidson) then allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag)) u_1(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) call H_S2_u_0_nstates_zmq(v_0,s_vec,u_1,N_states_diag,n) + else if (n < n_det_max_full) then + allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st)) + v_0(:,:) = 0.d0 + u_1(:,:) = 0.d0 + s_vec(:,:) = 0.d0 + u_1(1:n,1:N_st) = u_0(1:n,1:N_st) + do istate = 1,N_st + do j=1,n + do i=1,n + v_0(i,istate) = h_matrix_all_dets(i,j) * u_0(j,istate) + s_vec(i,istate) = S2_matrix_all_dets(i,j) * u_0(j,istate) + enddo + enddo + enddo else allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st)) u_1(:,:) = 0.d0 diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f index 2c273f84..d8581397 100644 --- a/src/determinants/slater_rules.irp.f +++ b/src/determinants/slater_rules.irp.f @@ -1775,7 +1775,17 @@ subroutine ac_operator(iorb,ispin,key,hjj,Nint,na,nb) integer :: other_spin integer :: k,l,i - ASSERT (iorb > 0) + if (iorb < 1) then + print *, irp_here, 'iorb < 1' + print *, iorb, mo_num + stop -1 + endif + if (iorb > mo_num) then + print *, irp_here, 'iorb > mo_num' + print *, iorb, mo_num + stop -1 + endif + ASSERT (ispin > 0) ASSERT (ispin < 3) ASSERT (Nint > 0) @@ -1793,16 +1803,6 @@ subroutine ac_operator(iorb,ispin,key,hjj,Nint,na,nb) key(k,ispin) = ibset(key(k,ispin),l) other_spin = iand(ispin,1)+1 - if (iorb < 1) then - print *, irp_here, 'iorb < 1' - print *, iorb, mo_num - stop -1 - endif - if (iorb > mo_num) then - print *, irp_here, 'iorb > mo_num' - print *, iorb, mo_num - stop -1 - endif hjj = hjj + mo_one_e_integrals(iorb,iorb) ! Same spin diff --git a/src/zmq/utils.irp.f b/src/zmq/utils.irp.f index ae21b899..520be0d7 100644 --- a/src/zmq/utils.irp.f +++ b/src/zmq/utils.irp.f @@ -603,12 +603,12 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,name_in) do i=3600,1,-1 rc = f77_zmq_send(zmq_to_qp_run_socket, 'end_job '//trim(zmq_state),8+len(trim(zmq_state)),0) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 512, 0) - call sleep(1) if (trim(message(1:13)) == 'error waiting') then cycle else if (message(1:2) == 'ok') then exit endif + call sleep(1) end do if (i==0) then print *, '.. Forcing kill ..'