10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-07-22 10:47:33 +02:00

Fixed distributed Davidson

This commit is contained in:
Anthony Scemama 2017-02-08 20:54:15 +01:00
parent 20edbbd777
commit 9a7db910d9
5 changed files with 96 additions and 60 deletions

View File

@ -342,7 +342,7 @@ subroutine get_last_full_tooth(computed, last_tooth)
last_tooth = 0 last_tooth = 0
combLoop : do i=comb_teeth, 1, -1 combLoop : do i=comb_teeth, 1, -1
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-6) ! /64 missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-5) ! /32
do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1 do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1
if(.not.computed(j)) then if(.not.computed(j)) then
missing -= 1 missing -= 1
@ -385,7 +385,7 @@ subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
if (icount > n) then if (icount > n) then
call get_filling_teeth(computed, tbc) call get_filling_teeth(computed, tbc)
icount = 0 icount = 0
n = ishft(tbc_save,-1) n = ishft(tbc_save,-4)
endif endif
enddo enddo

View File

@ -546,6 +546,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
e_pert = 0.5d0 * ( tmp - delta_E) e_pert = 0.5d0 * ( tmp - delta_E)
pt2(istate) = pt2(istate) + e_pert pt2(istate) = pt2(istate) + e_pert
max_e_pert = min(e_pert,max_e_pert) max_e_pert = min(e_pert,max_e_pert)
! ci(istate) = e_pert / mat(istate, p1, p2)
end do end do
if(dabs(max_e_pert) > buf%mini) then if(dabs(max_e_pert) > buf%mini) then

View File

@ -20,9 +20,10 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
double precision :: s2, hij double precision :: s2, hij
logical, allocatable :: wrotten(:) logical, allocatable :: wrotten(:)
PROVIDE dav_det ref_bitmask_energy
allocate(wrotten(bs)) allocate(wrotten(bs))
wrotten = .false. wrotten = .false.
PROVIDE dav_det
ii=0 ii=0
sh = blockb sh = blockb
@ -43,14 +44,15 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1 do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1
if(i == j) cycle if(i == j) cycle
org_j = sort_idx_(j,1)
ext = exa ext = exa
do ni=1,N_int do ni=1,N_int
ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1))) ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1)))
if(ext > 4) exit
end do end do
if(ext <= 4) then if(ext <= 4) then
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) org_j = sort_idx_(j,1)
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then if(.not. wrotten(ii)) then
wrotten(ii) = .true. wrotten(ii) = .true.
idx(ii) = org_i idx(ii) = org_i
@ -58,8 +60,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
st (:,ii) = 0d0 st (:,ii) = 0d0
end if end if
do istate=1,N_states_diag do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j) vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j) st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j)
enddo enddo
endif endif
enddo enddo
@ -76,9 +78,11 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
if(i == j) cycle if(i == j) cycle
org_j = sort_idx_(j,2) org_j = sort_idx_(j,2)
ext = 0 ext = popcnt(xor(sorted_(1,i,2), sorted_(1,j,2)))
do ni=1,N_int if (ext > 4) cycle
do ni=2,N_int
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2))) ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
if (ext > 4) exit
end do end do
if(ext == 4) then if(ext == 4) then
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
@ -90,8 +94,8 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
st (:,ii) = 0d0 st (:,ii) = 0d0
end if end if
do istate=1,N_states_diag do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j) vt (istate,ii) = vt (istate,ii) +hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j) st (istate,ii) = st (istate,ii) +s2*dav_ut(istate,org_j)
enddo enddo
end if end if
end do end do
@ -320,6 +324,15 @@ subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to push task_id" if(rc /= 4) stop "davidson_push_results failed to push task_id"
! Activate is zmq_socket_push is a REQ
integer :: idummy
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
if (rc /= 4) then
print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
stop 'error'
endif
end subroutine end subroutine
@ -358,6 +371,14 @@ subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id" if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
! Activate if zmq_socket_pull is a REP
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
if (rc /= 4) then
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
stop 'error'
endif
end subroutine end subroutine
@ -434,18 +455,14 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
double precision , intent(inout) :: v0(LDA, N_states_diag) double precision , intent(inout) :: v0(LDA, N_states_diag)
double precision , intent(inout) :: s0(LDA, N_states_diag) double precision , intent(inout) :: s0(LDA, N_states_diag)
call zmq_set_running(zmq_to_qp_run_socket)
zmq_collector = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
i = omp_get_thread_num()
PROVIDE nproc PROVIDE nproc
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i) !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
i = omp_get_thread_num() i = omp_get_thread_num()
if (i == 0 ) then if (i == 0 ) then
zmq_collector = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
call end_zmq_to_qp_run_socket(zmq_collector) call end_zmq_to_qp_run_socket(zmq_collector)
call end_zmq_pull_socket(zmq_socket_pull) call end_zmq_pull_socket(zmq_socket_pull)
@ -457,7 +474,6 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
endif endif
!$OMP END PARALLEL !$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
end subroutine end subroutine

View File

@ -18,6 +18,11 @@ subroutine davidson_converged(energy,residual,wall,iterations,cpu,N_st,converged
double precision :: E(N_st), time double precision :: E(N_st), time
double precision, allocatable, save :: energy_old(:) double precision, allocatable, save :: energy_old(:)
if (iterations < 2) then
converged = .False.
return
endif
if (.not.allocated(energy_old)) then if (.not.allocated(energy_old)) then
allocate(energy_old(N_st)) allocate(energy_old(N_st))
energy_old = 0.d0 energy_old = 0.d0

View File

@ -267,6 +267,7 @@ END_PROVIDER
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
use omp_lib
use bitmasks use bitmasks
use f77_zmq use f77_zmq
implicit none implicit none
@ -287,7 +288,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
double precision :: hij,s2 double precision :: hij,s2
double precision, allocatable :: ut(:,:) double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii integer :: i,j,k,l, jj,ii
integer :: i0, j0 integer :: i0, j0, ithread
integer, allocatable :: shortcut(:,:), sort_idx(:) integer, allocatable :: shortcut(:,:), sort_idx(:)
integer(bit_kind), allocatable :: sorted(:,:), version(:,:) integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
@ -328,6 +329,13 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
call davidson_init(handler,n,N_st_8,ut) call davidson_init(handler,n,N_st_8,ut)
PROVIDE nproc
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread)
ithread = omp_get_thread_num()
if (ithread == 0 ) then
call zmq_set_running(handler)
ave_workload = 0.d0 ave_workload = 0.d0
do sh=1,shortcut(0,1) do sh=1,shortcut(0,1)
ave_workload += shortcut(0,1) ave_workload += shortcut(0,1)
@ -339,7 +347,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
end do end do
enddo enddo
ave_workload = ave_workload/dble(shortcut(0,1)) ave_workload = ave_workload/dble(shortcut(0,1))
target_workload_inv = 0.001d0/ave_workload target_workload_inv = 0.01d0/ave_workload
do sh=1,shortcut(0,1),1 do sh=1,shortcut(0,1),1
@ -354,8 +362,15 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
call davidson_add_task(handler, sh, blockb2, istep) call davidson_add_task(handler, sh, blockb2, istep)
enddo enddo
enddo enddo
call davidson_run(handler, v_0, s_0, size(v_0,1)) call davidson_run(handler, v_0, s_0, size(v_0,1))
else if (ithread == 1 ) then
call davidson_miniserver_run ()
else
call davidson_slave_inproc(ithread)
endif
!$OMP END PARALLEL
call end_parallel_job(handler, 'davidson')
do istate=1,N_st do istate=1,N_st
do i=1,n do i=1,n
@ -551,7 +566,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
enddo enddo
do j=i+1,shortcut(sh+1,1)-1 do j=i+1,shortcut(sh+1,1)-1
if (i==j) cycle
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle if (ext > 4) cycle
do ni=2,Nint do ni=2,Nint