diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 753afcc9..d7c98933 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -342,7 +342,7 @@ subroutine get_last_full_tooth(computed, last_tooth) last_tooth = 0 combLoop : do i=comb_teeth, 1, -1 - missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-6) ! /64 + missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-5) ! /32 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 if(.not.computed(j)) then missing -= 1 @@ -385,7 +385,7 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) if (icount > n) then call get_filling_teeth(computed, tbc) icount = 0 - n = ishft(tbc_save,-1) + n = ishft(tbc_save,-4) endif enddo diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 587618c8..de7c93f8 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -546,6 +546,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d e_pert = 0.5d0 * ( tmp - delta_E) pt2(istate) = pt2(istate) + e_pert max_e_pert = min(e_pert,max_e_pert) +! ci(istate) = e_pert / mat(istate, p1, p2) end do if(dabs(max_e_pert) > buf%mini) then diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index cede52c9..724aac08 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -20,9 +20,10 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) double precision :: s2, hij logical, allocatable :: wrotten(:) + PROVIDE dav_det ref_bitmask_energy + allocate(wrotten(bs)) wrotten = .false. - PROVIDE dav_det ii=0 sh = blockb @@ -43,14 +44,15 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1 if(i == j) cycle - org_j = sort_idx_(j,1) ext = exa do ni=1,N_int ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) + if(ext > 4) exit end do if(ext <= 4) then - call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) + org_j = sort_idx_(j,1) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) + call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) if(.not. wrotten(ii)) then wrotten(ii) = .true. idx(ii) = org_i @@ -58,8 +60,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) st (:,ii) = 0d0 end if do istate=1,N_states_diag - vt (istate,ii) += hij*dav_ut(istate,org_j) - st (istate,ii) += s2*dav_ut(istate,org_j) + vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j) + st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j) enddo endif enddo @@ -76,23 +78,25 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep) do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 if(i == j) cycle org_j = sort_idx_(j,2) - ext = 0 - do ni=1,N_int + ext = popcnt(xor(sorted_(1,i,2), sorted_(1,j,2))) + if (ext > 4) cycle + do ni=2,N_int ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2))) + if (ext > 4) exit end do if(ext == 4) then - call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) - call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) - if(.not. wrotten(ii)) then - wrotten(ii) = .true. - idx(ii) = org_i - vt (:,ii) = 0d0 - st (:,ii) = 0d0 - end if - do istate=1,N_states_diag - vt (istate,ii) += hij*dav_ut(istate,org_j) - st (istate,ii) += s2*dav_ut(istate,org_j) - enddo + call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) + call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) + if(.not. wrotten(ii)) then + wrotten(ii) = .true. + idx(ii) = org_i + vt (:,ii) = 0d0 + st (:,ii) = 0d0 + end if + do istate=1,N_states_diag + vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j) + st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j) + enddo end if end do end do @@ -320,6 +324,15 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "davidson_push_results failed to push task_id" + +! Activate is zmq_socket_push is a REQ + integer :: idummy + rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) + if (rc /= 4) then + print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)' + stop 'error' + endif + end subroutine @@ -358,6 +371,14 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" + +! Activate if zmq_socket_pull is a REP + rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) + if (rc /= 4) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop 'error' + endif + end subroutine @@ -434,18 +455,14 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA) double precision , intent(inout) :: v0(LDA, N_states_diag) double precision , intent(inout) :: s0(LDA, N_states_diag) - call zmq_set_running(zmq_to_qp_run_socket) - - zmq_collector = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - i = omp_get_thread_num() - PROVIDE nproc !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i) i = omp_get_thread_num() if (i == 0 ) then + zmq_collector = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) call end_zmq_to_qp_run_socket(zmq_collector) call end_zmq_pull_socket(zmq_socket_pull) @@ -457,7 +474,6 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA) endif !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'davidson') end subroutine diff --git a/src/Davidson/parameters.irp.f b/src/Davidson/parameters.irp.f index ae8babaa..7d383192 100644 --- a/src/Davidson/parameters.irp.f +++ b/src/Davidson/parameters.irp.f @@ -18,6 +18,11 @@ subroutine davidson_converged(energy,residual,wall,iterations,cpu,N_st,converged double precision :: E(N_st), time double precision, allocatable, save :: energy_old(:) + if (iterations < 2) then + converged = .False. + return + endif + if (.not.allocated(energy_old)) then allocate(energy_old(N_st)) energy_old = 0.d0 diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 9e76bc92..026921d0 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -267,6 +267,7 @@ END_PROVIDER subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) + use omp_lib use bitmasks use f77_zmq implicit none @@ -287,7 +288,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ double precision :: hij,s2 double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii - integer :: i0, j0 + integer :: i0, j0, ithread integer, allocatable :: shortcut(:,:), sort_idx(:) integer(bit_kind), allocatable :: sorted(:,:), version(:,:) @@ -321,41 +322,55 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ ut(istate,i) = u_0(i,istate) enddo enddo - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) - call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) blockb = shortcut(0,1) call davidson_init(handler,n,N_st_8,ut) - - ave_workload = 0.d0 - do sh=1,shortcut(0,1) - ave_workload += shortcut(0,1) - ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2 - end do - end do - enddo - ave_workload = ave_workload/dble(shortcut(0,1)) - target_workload_inv = 0.001d0/ave_workload - - - do sh=1,shortcut(0,1),1 - workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - workload += (shortcut(j+1,2) - shortcut(j, 2))**2 - end do - end do - istep = 1+ int(workload*target_workload_inv) - do blockb2=0, istep-1 - call davidson_add_task(handler, sh, blockb2, istep) - enddo - enddo - call davidson_run(handler, v_0, s_0, size(v_0,1)) + PROVIDE nproc + + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread == 0 ) then + + call zmq_set_running(handler) + ave_workload = 0.d0 + do sh=1,shortcut(0,1) + ave_workload += shortcut(0,1) + ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 + do i=sh, shortcut(0,2), shortcut(0,1) + do j=i, min(i, shortcut(0,2)) + ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2 + end do + end do + enddo + ave_workload = ave_workload/dble(shortcut(0,1)) + target_workload_inv = 0.01d0/ave_workload + + + do sh=1,shortcut(0,1),1 + workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 + do i=sh, shortcut(0,2), shortcut(0,1) + do j=i, min(i, shortcut(0,2)) + workload += (shortcut(j+1,2) - shortcut(j, 2))**2 + end do + end do + istep = 1+ int(workload*target_workload_inv) + do blockb2=0, istep-1 + call davidson_add_task(handler, sh, blockb2, istep) + enddo + enddo + call davidson_run(handler, v_0, s_0, size(v_0,1)) + else if (ithread == 1 ) then + call davidson_miniserver_run () + else + call davidson_slave_inproc(ithread) + endif + !$OMP END PARALLEL + + call end_parallel_job(handler, 'davidson') do istate=1,N_st do i=1,n @@ -551,7 +566,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo do j=i+1,shortcut(sh+1,1)-1 - if (i==j) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint