mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-10 13:08:23 +01:00
Improved parallel davidson
This commit is contained in:
parent
f3a46c55c1
commit
7fcb4008de
@ -20,11 +20,12 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
double precision :: s2, hij
|
||||
logical, allocatable :: wrotten(:)
|
||||
|
||||
|
||||
allocate(wrotten(bs))
|
||||
wrotten = .false.
|
||||
PROVIDE dav_det
|
||||
|
||||
do sh = blockb, blocke
|
||||
ii=0
|
||||
sh = blockb
|
||||
do sh2=1,shortcut_(0,1)
|
||||
exa = 0
|
||||
do ni=1,N_int
|
||||
@ -32,7 +33,7 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
end do
|
||||
if(exa > 2) cycle
|
||||
|
||||
do i=shortcut_(sh,1),shortcut_(sh+1,1)-1
|
||||
do i=blockb2+shortcut_(sh,1),shortcut_(sh+1,1)-1, istep
|
||||
ii = i - shortcut_(blockb,1) + 1
|
||||
|
||||
org_i = sort_idx_(i,1)
|
||||
@ -60,47 +61,43 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
! call daxpy(N_states_diag,hij,dav_ut(1,org_j),1,vt(1,org_i),1)
|
||||
! call daxpy(N_states_diag,s2, dav_ut(1,org_j),1,st(1,org_i),1)
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
|
||||
do sh=blockb,min(blocke, shortcut_(0,2))
|
||||
do sh2=sh, shortcut_(0,2), shortcut_(0,1)
|
||||
do i=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
|
||||
ii += 1
|
||||
org_i = sort_idx_(i,2)
|
||||
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
|
||||
if(i == j) cycle
|
||||
org_j = sort_idx_(j,2)
|
||||
ext = 0
|
||||
do ni=1,N_int
|
||||
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
|
||||
if (blockb <= shortcut_(0,2)) then
|
||||
sh=blockb
|
||||
do sh2=sh, shortcut_(0,2), shortcut_(0,1)
|
||||
do i=blockb2+shortcut_(sh2,2),shortcut_(sh2+1,2)-1, istep
|
||||
ii += 1
|
||||
org_i = sort_idx_(i,2)
|
||||
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
|
||||
if(i == j) cycle
|
||||
org_j = sort_idx_(j,2)
|
||||
ext = 0
|
||||
do ni=1,N_int
|
||||
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
vt (:,ii) = 0d0
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
vt (:,ii) = 0d0
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
! call daxpy(N_states_diag,hij,dav_ut(1,org_j),1,vt(1,ii),1)
|
||||
! call daxpy(N_states_diag,s2, dav_ut(1,org_j),1,st(1,ii),1)
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
N=0
|
||||
do i=1,bs
|
||||
@ -112,16 +109,16 @@ subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
end if
|
||||
end do
|
||||
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
|
||||
subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
|
||||
subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
|
||||
implicit none
|
||||
|
||||
|
||||
integer , intent(in) :: blockb, blocke
|
||||
integer , intent(in) :: N
|
||||
integer , intent(in) :: idx(N)
|
||||
double precision , intent(in) :: vt(N_states_diag, N)
|
||||
@ -175,16 +172,16 @@ end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blocke)
|
||||
subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket
|
||||
integer ,intent(in) :: blockb, blocke
|
||||
integer ,intent(in) :: blockb, blockb2, istep
|
||||
character*(512) :: task
|
||||
|
||||
|
||||
write(task,*) blockb, blocke
|
||||
write(task,*) blockb, blockb2, istep
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket, task)
|
||||
end subroutine
|
||||
|
||||
@ -213,7 +210,7 @@ subroutine davidson_run_slave(thread,iproc)
|
||||
|
||||
integer, intent(in) :: thread, iproc
|
||||
|
||||
integer :: worker_id, task_id, blockb, blocke
|
||||
integer :: worker_id, task_id, blockb
|
||||
character*(512) :: task
|
||||
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
@ -253,7 +250,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
||||
character*(512) :: task
|
||||
|
||||
|
||||
integer :: blockb, blocke
|
||||
integer :: blockb, blockb2, istep
|
||||
integer :: N
|
||||
integer , allocatable :: idx(:)
|
||||
double precision , allocatable :: vt(:,:)
|
||||
@ -266,10 +263,10 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
||||
do
|
||||
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
|
||||
if(task_id == 0) exit
|
||||
read (task,*) blockb, blocke
|
||||
bs = shortcut_(blocke+1,1) - shortcut_(blockb, 1)
|
||||
read (task,*) blockb, blockb2, istep
|
||||
bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1)
|
||||
do i=blockb, shortcut_(0,2), shortcut_(0,1)
|
||||
do j=i, min(i+blocke-blockb, shortcut_(0,2))
|
||||
do j=i, min(i, shortcut_(0,2))
|
||||
bs += shortcut_(j+1,2) - shortcut_(j, 2)
|
||||
end do
|
||||
end do
|
||||
@ -280,10 +277,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
||||
allocate(st(N_states_diag, bs))
|
||||
end if
|
||||
|
||||
call davidson_process(blockb, blocke, N, idx, vt, st, bs)
|
||||
|
||||
call davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
||||
call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id)
|
||||
call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
|
||||
end do
|
||||
|
||||
end subroutine
|
||||
@ -387,7 +383,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
|
||||
|
||||
integer :: msize
|
||||
|
||||
msize = (max_workload + max_blocksize)*2
|
||||
msize = (1 + max_blocksize)*2
|
||||
allocate(idx(msize))
|
||||
allocate(vt(N_states_diag, msize))
|
||||
allocate(st(N_states_diag, msize))
|
||||
@ -402,7 +398,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
|
||||
do while (more == 1)
|
||||
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
|
||||
!DIR$ FORCEINLINE
|
||||
call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
|
||||
call davidson_collect(N, idx, vt, st , v0t, s0t)
|
||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
||||
end do
|
||||
deallocate(idx,vt,st)
|
||||
|
@ -208,7 +208,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
integer :: N_st_8
|
||||
|
||||
integer, external :: align_double
|
||||
integer :: workload, blockb, blocke
|
||||
integer :: blockb, blockb2, istep
|
||||
double precision :: ave_workload, workload
|
||||
|
||||
integer(ZMQ_PTR) :: handler
|
||||
|
||||
@ -234,21 +235,38 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
|
||||
|
||||
workload = 0
|
||||
blockb = shortcut(0,1)
|
||||
blocke = blockb
|
||||
call davidson_init(handler,n,N_st_8,ut)
|
||||
|
||||
|
||||
ave_workload = 0.d0
|
||||
do sh=1,shortcut(0,1)
|
||||
ave_workload += shortcut(0,1)
|
||||
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
ave_workload = ave_workload/dble(shortcut(0,1))
|
||||
|
||||
|
||||
print *, 'Ave workload :', ave_workload
|
||||
|
||||
do sh=shortcut(0,1),1,-1
|
||||
workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
if(workload > max_workload) then
|
||||
blocke = sh
|
||||
call davidson_add_task(handler, blocke, blockb)
|
||||
blockb = sh-1
|
||||
workload = 0
|
||||
end if
|
||||
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
istep = 1+ int(0.5d0*workload/ave_workload)
|
||||
do blockb2=0, istep-1
|
||||
call davidson_add_task(handler, sh, blockb2, istep)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
if(blockb > 0) call davidson_add_task(handler, 1, blockb)
|
||||
call davidson_run(handler, v_0, s_0, size(v_0,1))
|
||||
|
||||
do istate=1,N_st
|
||||
@ -262,8 +280,4 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
end
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, max_workload ]
|
||||
max_workload = 1000
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
|
@ -364,7 +364,7 @@ double precision function get_mo_bielec_integral(i,j,k,l,map)
|
||||
integer(key_kind) :: idx
|
||||
type(map_type), intent(inout) :: map
|
||||
real(integral_kind) :: tmp
|
||||
PROVIDE mo_bielec_integrals_in_map
|
||||
PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
|
||||
if ( (i >= mo_integrals_cache_min) .and. &
|
||||
(j >= mo_integrals_cache_min) .and. &
|
||||
(k >= mo_integrals_cache_min) .and. &
|
||||
@ -393,7 +393,7 @@ double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map)
|
||||
integer(key_kind) :: idx
|
||||
type(map_type), intent(inout) :: map
|
||||
real(integral_kind) :: tmp
|
||||
PROVIDE mo_bielec_integrals_in_map
|
||||
PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
|
||||
if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then
|
||||
double precision, external :: get_mo_bielec_integral
|
||||
!DIR$ FORCEINLINE
|
||||
@ -411,7 +411,7 @@ double precision function mo_bielec_integral(i,j,k,l)
|
||||
END_DOC
|
||||
integer, intent(in) :: i,j,k,l
|
||||
double precision :: get_mo_bielec_integral
|
||||
PROVIDE mo_bielec_integrals_in_map
|
||||
PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
|
||||
!DIR$ FORCEINLINE
|
||||
mo_bielec_integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user