mirror of
https://github.com/LCPQ/quantum_package
synced 2024-12-23 04:43:50 +01:00
Fixed distributed Davidson
This commit is contained in:
parent
20edbbd777
commit
9a7db910d9
@ -342,7 +342,7 @@ subroutine get_last_full_tooth(computed, last_tooth)
|
||||
|
||||
last_tooth = 0
|
||||
combLoop : do i=comb_teeth, 1, -1
|
||||
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-6) ! /64
|
||||
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-5) ! /32
|
||||
do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1
|
||||
if(.not.computed(j)) then
|
||||
missing -= 1
|
||||
@ -385,7 +385,7 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
|
||||
if (icount > n) then
|
||||
call get_filling_teeth(computed, tbc)
|
||||
icount = 0
|
||||
n = ishft(tbc_save,-1)
|
||||
n = ishft(tbc_save,-4)
|
||||
endif
|
||||
enddo
|
||||
|
||||
|
@ -546,6 +546,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
|
||||
e_pert = 0.5d0 * ( tmp - delta_E)
|
||||
pt2(istate) = pt2(istate) + e_pert
|
||||
max_e_pert = min(e_pert,max_e_pert)
|
||||
! ci(istate) = e_pert / mat(istate, p1, p2)
|
||||
end do
|
||||
|
||||
if(dabs(max_e_pert) > buf%mini) then
|
||||
|
@ -20,9 +20,10 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
double precision :: s2, hij
|
||||
logical, allocatable :: wrotten(:)
|
||||
|
||||
PROVIDE dav_det ref_bitmask_energy
|
||||
|
||||
allocate(wrotten(bs))
|
||||
wrotten = .false.
|
||||
PROVIDE dav_det
|
||||
|
||||
ii=0
|
||||
sh = blockb
|
||||
@ -43,14 +44,15 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
|
||||
do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1
|
||||
if(i == j) cycle
|
||||
org_j = sort_idx_(j,1)
|
||||
ext = exa
|
||||
do ni=1,N_int
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1)))
|
||||
if(ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
org_j = sort_idx_(j,1)
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
@ -58,8 +60,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
@ -76,23 +78,25 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
|
||||
if(i == j) cycle
|
||||
org_j = sort_idx_(j,2)
|
||||
ext = 0
|
||||
do ni=1,N_int
|
||||
ext = popcnt(xor(sorted_(1,i,2), sorted_(1,j,2)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,N_int
|
||||
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
vt (:,ii) = 0d0
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
vt (:,ii) = 0d0
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
@ -320,6 +324,15 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
|
||||
if(rc /= 4) stop "davidson_push_results failed to push task_id"
|
||||
|
||||
! Activate is zmq_socket_push is a REQ
|
||||
integer :: idummy
|
||||
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
|
||||
if (rc /= 4) then
|
||||
print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
@ -358,6 +371,14 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
|
||||
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
|
||||
|
||||
! Activate if zmq_socket_pull is a REP
|
||||
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
|
||||
if (rc /= 4) then
|
||||
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
@ -434,18 +455,14 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
|
||||
double precision , intent(inout) :: v0(LDA, N_states_diag)
|
||||
double precision , intent(inout) :: s0(LDA, N_states_diag)
|
||||
|
||||
call zmq_set_running(zmq_to_qp_run_socket)
|
||||
|
||||
zmq_collector = new_zmq_to_qp_run_socket()
|
||||
zmq_socket_pull = new_zmq_pull_socket()
|
||||
i = omp_get_thread_num()
|
||||
|
||||
|
||||
PROVIDE nproc
|
||||
|
||||
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
if (i == 0 ) then
|
||||
zmq_collector = new_zmq_to_qp_run_socket()
|
||||
zmq_socket_pull = new_zmq_pull_socket()
|
||||
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
|
||||
call end_zmq_to_qp_run_socket(zmq_collector)
|
||||
call end_zmq_pull_socket(zmq_socket_pull)
|
||||
@ -457,7 +474,6 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
|
||||
end subroutine
|
||||
|
||||
|
||||
|
@ -18,6 +18,11 @@ subroutine davidson_converged(energy,residual,wall,iterations,cpu,N_st,converged
|
||||
double precision :: E(N_st), time
|
||||
double precision, allocatable, save :: energy_old(:)
|
||||
|
||||
if (iterations < 2) then
|
||||
converged = .False.
|
||||
return
|
||||
endif
|
||||
|
||||
if (.not.allocated(energy_old)) then
|
||||
allocate(energy_old(N_st))
|
||||
energy_old = 0.d0
|
||||
|
@ -267,6 +267,7 @@ END_PROVIDER
|
||||
|
||||
|
||||
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
use omp_lib
|
||||
use bitmasks
|
||||
use f77_zmq
|
||||
implicit none
|
||||
@ -287,7 +288,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
|
||||
double precision :: hij,s2
|
||||
double precision, allocatable :: ut(:,:)
|
||||
integer :: i,j,k,l, jj,ii
|
||||
integer :: i0, j0
|
||||
integer :: i0, j0, ithread
|
||||
|
||||
integer, allocatable :: shortcut(:,:), sort_idx(:)
|
||||
integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
|
||||
@ -321,41 +322,55 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
|
||||
ut(istate,i) = u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
|
||||
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
|
||||
|
||||
blockb = shortcut(0,1)
|
||||
call davidson_init(handler,n,N_st_8,ut)
|
||||
|
||||
|
||||
ave_workload = 0.d0
|
||||
do sh=1,shortcut(0,1)
|
||||
ave_workload += shortcut(0,1)
|
||||
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
ave_workload = ave_workload/dble(shortcut(0,1))
|
||||
target_workload_inv = 0.001d0/ave_workload
|
||||
PROVIDE nproc
|
||||
|
||||
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
if (ithread == 0 ) then
|
||||
|
||||
do sh=1,shortcut(0,1),1
|
||||
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
call zmq_set_running(handler)
|
||||
ave_workload = 0.d0
|
||||
do sh=1,shortcut(0,1)
|
||||
ave_workload += shortcut(0,1)
|
||||
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
istep = 1+ int(workload*target_workload_inv)
|
||||
do blockb2=0, istep-1
|
||||
call davidson_add_task(handler, sh, blockb2, istep)
|
||||
enddo
|
||||
enddo
|
||||
ave_workload = ave_workload/dble(shortcut(0,1))
|
||||
target_workload_inv = 0.01d0/ave_workload
|
||||
|
||||
call davidson_run(handler, v_0, s_0, size(v_0,1))
|
||||
|
||||
do sh=1,shortcut(0,1),1
|
||||
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
istep = 1+ int(workload*target_workload_inv)
|
||||
do blockb2=0, istep-1
|
||||
call davidson_add_task(handler, sh, blockb2, istep)
|
||||
enddo
|
||||
enddo
|
||||
call davidson_run(handler, v_0, s_0, size(v_0,1))
|
||||
else if (ithread == 1 ) then
|
||||
call davidson_miniserver_run ()
|
||||
else
|
||||
call davidson_slave_inproc(ithread)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call end_parallel_job(handler, 'davidson')
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
@ -551,7 +566,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
enddo
|
||||
|
||||
do j=i+1,shortcut(sh+1,1)-1
|
||||
if (i==j) cycle
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
|
Loading…
Reference in New Issue
Block a user