10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-22 20:35:19 +01:00

Fixed distributed Davidson

This commit is contained in:
Anthony Scemama 2017-02-08 20:54:15 +01:00
parent 20edbbd777
commit 9a7db910d9
5 changed files with 96 additions and 60 deletions

View File

@ -342,7 +342,7 @@ subroutine get_last_full_tooth(computed, last_tooth)
last_tooth = 0
combLoop : do i=comb_teeth, 1, -1
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-6) ! /64
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-5) ! /32
do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1
if(.not.computed(j)) then
missing -= 1
@ -385,7 +385,7 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
if (icount > n) then
call get_filling_teeth(computed, tbc)
icount = 0
n = ishft(tbc_save,-1)
n = ishft(tbc_save,-4)
endif
enddo

View File

@ -546,6 +546,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
e_pert = 0.5d0 * ( tmp - delta_E)
pt2(istate) = pt2(istate) + e_pert
max_e_pert = min(e_pert,max_e_pert)
! ci(istate) = e_pert / mat(istate, p1, p2)
end do
if(dabs(max_e_pert) > buf%mini) then

View File

@ -20,9 +20,10 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
double precision :: s2, hij
logical, allocatable :: wrotten(:)
PROVIDE dav_det ref_bitmask_energy
allocate(wrotten(bs))
wrotten = .false.
PROVIDE dav_det
ii=0
sh = blockb
@ -43,14 +44,15 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1
if(i == j) cycle
org_j = sort_idx_(j,1)
ext = exa
do ni=1,N_int
ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1)))
if(ext > 4) exit
end do
if(ext <= 4) then
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
org_j = sort_idx_(j,1)
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
@ -58,8 +60,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
st (:,ii) = 0d0
end if
do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j)
vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j)
st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j)
enddo
endif
enddo
@ -76,23 +78,25 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
if(i == j) cycle
org_j = sort_idx_(j,2)
ext = 0
do ni=1,N_int
ext = popcnt(xor(sorted_(1,i,2), sorted_(1,j,2)))
if (ext > 4) cycle
do ni=2,N_int
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
if (ext > 4) exit
end do
if(ext == 4) then
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
vt (:,ii) = 0d0
st (:,ii) = 0d0
end if
do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j)
enddo
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
vt (:,ii) = 0d0
st (:,ii) = 0d0
end if
do istate=1,N_states_diag
vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j)
st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j)
enddo
end if
end do
end do
@ -320,6 +324,15 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to push task_id"
! Activate is zmq_socket_push is a REQ
integer :: idummy
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
if (rc /= 4) then
print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
stop 'error'
endif
end subroutine
@ -358,6 +371,14 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
! Activate if zmq_socket_pull is a REP
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
if (rc /= 4) then
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
stop 'error'
endif
end subroutine
@ -434,18 +455,14 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
double precision , intent(inout) :: v0(LDA, N_states_diag)
double precision , intent(inout) :: s0(LDA, N_states_diag)
call zmq_set_running(zmq_to_qp_run_socket)
zmq_collector = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
i = omp_get_thread_num()
PROVIDE nproc
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
i = omp_get_thread_num()
if (i == 0 ) then
zmq_collector = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
call end_zmq_to_qp_run_socket(zmq_collector)
call end_zmq_pull_socket(zmq_socket_pull)
@ -457,7 +474,6 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
end subroutine

View File

@ -18,6 +18,11 @@ subroutine davidson_converged(energy,residual,wall,iterations,cpu,N_st,converged
double precision :: E(N_st), time
double precision, allocatable, save :: energy_old(:)
if (iterations < 2) then
converged = .False.
return
endif
if (.not.allocated(energy_old)) then
allocate(energy_old(N_st))
energy_old = 0.d0

View File

@ -267,6 +267,7 @@ END_PROVIDER
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
use omp_lib
use bitmasks
use f77_zmq
implicit none
@ -287,7 +288,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
double precision :: hij,s2
double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer :: i0, j0, ithread
integer, allocatable :: shortcut(:,:), sort_idx(:)
integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
@ -321,41 +322,55 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
ut(istate,i) = u_0(i,istate)
enddo
enddo
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
blockb = shortcut(0,1)
call davidson_init(handler,n,N_st_8,ut)
ave_workload = 0.d0
do sh=1,shortcut(0,1)
ave_workload += shortcut(0,1)
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
enddo
ave_workload = ave_workload/dble(shortcut(0,1))
target_workload_inv = 0.001d0/ave_workload
do sh=1,shortcut(0,1),1
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
istep = 1+ int(workload*target_workload_inv)
do blockb2=0, istep-1
call davidson_add_task(handler, sh, blockb2, istep)
enddo
enddo
call davidson_run(handler, v_0, s_0, size(v_0,1))
PROVIDE nproc
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread)
ithread = omp_get_thread_num()
if (ithread == 0 ) then
call zmq_set_running(handler)
ave_workload = 0.d0
do sh=1,shortcut(0,1)
ave_workload += shortcut(0,1)
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
enddo
ave_workload = ave_workload/dble(shortcut(0,1))
target_workload_inv = 0.01d0/ave_workload
do sh=1,shortcut(0,1),1
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
istep = 1+ int(workload*target_workload_inv)
do blockb2=0, istep-1
call davidson_add_task(handler, sh, blockb2, istep)
enddo
enddo
call davidson_run(handler, v_0, s_0, size(v_0,1))
else if (ithread == 1 ) then
call davidson_miniserver_run ()
else
call davidson_slave_inproc(ithread)
endif
!$OMP END PARALLEL
call end_parallel_job(handler, 'davidson')
do istate=1,N_st
do i=1,n
@ -551,7 +566,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
enddo
do j=i+1,shortcut(sh+1,1)-1
if (i==j) cycle
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint