mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-10 04:58:25 +01:00
Improve parallel davidson
This commit is contained in:
parent
1de02c46e6
commit
693604d338
@ -91,7 +91,7 @@ end subroutine
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0, s0)
|
subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
|
|
||||||
@ -100,14 +100,19 @@ subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0, s0)
|
|||||||
integer , intent(in) :: idx(N)
|
integer , intent(in) :: idx(N)
|
||||||
double precision , intent(in) :: vt(N_states_diag, N)
|
double precision , intent(in) :: vt(N_states_diag, N)
|
||||||
double precision , intent(in) :: st(N_states_diag, N)
|
double precision , intent(in) :: st(N_states_diag, N)
|
||||||
double precision , intent(inout) :: v0(dav_size, N_states_diag)
|
double precision , intent(inout) :: v0t(N_states_diag,dav_size)
|
||||||
double precision , intent(inout) :: s0(dav_size, N_states_diag)
|
double precision , intent(inout) :: s0t(N_states_diag,dav_size)
|
||||||
|
|
||||||
integer :: i
|
integer :: i, j, k
|
||||||
|
|
||||||
|
!DIR$ IVDEP
|
||||||
do i=1,N
|
do i=1,N
|
||||||
v0(idx(i), :) += vt(:, i)
|
k = idx(i)
|
||||||
s0(idx(i), :) += st(:, i)
|
!DIR$ IVDEP
|
||||||
|
do j=1,N_states_diag
|
||||||
|
v0t(j,k) = v0t(j,k) + vt(j,i)
|
||||||
|
s0t(j,k) = s0t(j,k) + st(j,i)
|
||||||
|
enddo
|
||||||
end do
|
end do
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
@ -221,7 +226,6 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
|||||||
|
|
||||||
call davidson_process(blockb, blocke, N, idx, vt, st)
|
call davidson_process(blockb, blocke, N, idx, vt, st)
|
||||||
|
|
||||||
! reverse ?
|
|
||||||
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
||||||
call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id)
|
call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id)
|
||||||
end do
|
end do
|
||||||
@ -321,7 +325,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0)
|
|||||||
integer :: blockb, blocke
|
integer :: blockb, blocke
|
||||||
integer :: N
|
integer :: N
|
||||||
integer , allocatable :: idx(:)
|
integer , allocatable :: idx(:)
|
||||||
double precision , allocatable :: vt(:,:)
|
double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:)
|
||||||
double precision , allocatable :: st(:,:)
|
double precision , allocatable :: st(:,:)
|
||||||
integer :: deleted
|
integer :: deleted
|
||||||
logical, allocatable :: done(:)
|
logical, allocatable :: done(:)
|
||||||
@ -332,18 +336,23 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0)
|
|||||||
allocate(idx(dav_size))
|
allocate(idx(dav_size))
|
||||||
allocate(vt(N_states_diag, dav_size))
|
allocate(vt(N_states_diag, dav_size))
|
||||||
allocate(st(N_states_diag, dav_size))
|
allocate(st(N_states_diag, dav_size))
|
||||||
|
allocate(v0t(N_states_diag, dav_size))
|
||||||
|
allocate(s0t(N_states_diag, dav_size))
|
||||||
|
|
||||||
more = 1
|
more = 1
|
||||||
|
|
||||||
do while (more == 1)
|
do while (more == 1)
|
||||||
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
|
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
|
||||||
call davidson_collect(blockb, blocke, N, idx, vt, st , v0, s0)
|
!DIR$ FORCEINLINE
|
||||||
! done(block) = .true.
|
call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
|
||||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
||||||
deleted += 1
|
deleted += 1
|
||||||
! print *, "DELETED", deleted, done
|
|
||||||
end do
|
end do
|
||||||
! print *, "done collector"
|
deallocate(idx,vt,st,done)
|
||||||
|
|
||||||
|
call dtranspose(v0t,size(v0t,1), v0, size(v0,1), N_states_diag, dav_size)
|
||||||
|
call dtranspose(s0t,size(s0t,1), s0, size(s0,1), N_states_diag, dav_size)
|
||||||
|
deallocate(v0t,s0t)
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
@ -382,7 +391,7 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0)
|
|||||||
else if(i==1) then
|
else if(i==1) then
|
||||||
call davidson_miniserver_run()
|
call davidson_miniserver_run()
|
||||||
else
|
else
|
||||||
call davidson_slave_inproc(i)
|
call davidson_slave_inproc(i-1)
|
||||||
endif
|
endif
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
|
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
|
||||||
@ -393,7 +402,6 @@ end subroutine
|
|||||||
subroutine davidson_miniserver_run()
|
subroutine davidson_miniserver_run()
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
implicit none
|
implicit none
|
||||||
integer(ZMQ_PTR) context
|
|
||||||
integer(ZMQ_PTR) responder
|
integer(ZMQ_PTR) responder
|
||||||
character*(64) address
|
character*(64) address
|
||||||
character(len=:), allocatable :: buffer
|
character(len=:), allocatable :: buffer
|
||||||
@ -402,8 +410,7 @@ subroutine davidson_miniserver_run()
|
|||||||
allocate (character(len=20) :: buffer)
|
allocate (character(len=20) :: buffer)
|
||||||
address = 'tcp://*:11223'
|
address = 'tcp://*:11223'
|
||||||
|
|
||||||
context = f77_zmq_ctx_new()
|
responder = f77_zmq_socket(zmq_context, ZMQ_REP)
|
||||||
responder = f77_zmq_socket(context, ZMQ_REP)
|
|
||||||
rc = f77_zmq_bind(responder,address)
|
rc = f77_zmq_bind(responder,address)
|
||||||
|
|
||||||
do
|
do
|
||||||
@ -419,7 +426,6 @@ subroutine davidson_miniserver_run()
|
|||||||
enddo
|
enddo
|
||||||
|
|
||||||
rc = f77_zmq_close(responder)
|
rc = f77_zmq_close(responder)
|
||||||
rc = f77_zmq_ctx_destroy(context)
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
@ -427,21 +433,20 @@ subroutine davidson_miniserver_end()
|
|||||||
implicit none
|
implicit none
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
|
|
||||||
integer(ZMQ_PTR) context
|
|
||||||
integer(ZMQ_PTR) requester
|
integer(ZMQ_PTR) requester
|
||||||
character*(64) address
|
character*(64) address
|
||||||
integer rc
|
integer rc
|
||||||
character*(64) buf
|
character*(64) buf
|
||||||
|
|
||||||
address = trim(qp_run_address)//':11223'
|
address = trim(qp_run_address)//':11223'
|
||||||
context = f77_zmq_ctx_new()
|
requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
|
||||||
requester = f77_zmq_socket(context, ZMQ_REQ)
|
|
||||||
rc = f77_zmq_connect(requester,address)
|
rc = f77_zmq_connect(requester,address)
|
||||||
|
|
||||||
rc = f77_zmq_send(requester, "end", 3, 0)
|
rc = f77_zmq_send(requester, "end", 3, ZMQ_NOBLOCK)
|
||||||
rc = f77_zmq_recv(requester, buf, 3, 0)
|
if (rc > 0) then
|
||||||
|
rc = f77_zmq_recv(requester, buf, 3, 0)
|
||||||
|
endif
|
||||||
rc = f77_zmq_close(requester)
|
rc = f77_zmq_close(requester)
|
||||||
rc = f77_zmq_ctx_destroy(context)
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
@ -449,7 +454,6 @@ subroutine davidson_miniserver_get()
|
|||||||
implicit none
|
implicit none
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
|
|
||||||
integer(ZMQ_PTR) context
|
|
||||||
integer(ZMQ_PTR) requester
|
integer(ZMQ_PTR) requester
|
||||||
character*(64) address
|
character*(64) address
|
||||||
character*(20) buffer
|
character*(20) buffer
|
||||||
@ -457,8 +461,7 @@ subroutine davidson_miniserver_get()
|
|||||||
|
|
||||||
address = trim(qp_run_address)//':11223'
|
address = trim(qp_run_address)//':11223'
|
||||||
|
|
||||||
context = f77_zmq_ctx_new()
|
requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
|
||||||
requester = f77_zmq_socket(context, ZMQ_REQ)
|
|
||||||
rc = f77_zmq_connect(requester,address)
|
rc = f77_zmq_connect(requester,address)
|
||||||
|
|
||||||
rc = f77_zmq_send(requester, "Hello", 5, 0)
|
rc = f77_zmq_send(requester, "Hello", 5, 0)
|
||||||
@ -469,9 +472,7 @@ subroutine davidson_miniserver_get()
|
|||||||
TOUCH dav_det dav_ut
|
TOUCH dav_det dav_ut
|
||||||
|
|
||||||
rc = f77_zmq_close(requester)
|
rc = f77_zmq_close(requester)
|
||||||
rc = f77_zmq_ctx_destroy(context)
|
|
||||||
|
|
||||||
touch dav_det dav_ut
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
@ -249,7 +249,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
|||||||
call davidson_init(handler)
|
call davidson_init(handler)
|
||||||
do sh=shortcut(0,1),1,-1
|
do sh=shortcut(0,1),1,-1
|
||||||
workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||||
if(workload > 1000000) then
|
if(workload > 100000) then
|
||||||
blocke = sh
|
blocke = sh
|
||||||
call davidson_add_task(handler, blocke, blockb)
|
call davidson_add_task(handler, blocke, blockb)
|
||||||
blockb = sh-1
|
blockb = sh-1
|
||||||
@ -258,51 +258,51 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
|||||||
enddo
|
enddo
|
||||||
|
|
||||||
if(blockb > 0) call davidson_add_task(handler, 1, blockb)
|
if(blockb > 0) call davidson_add_task(handler, 1, blockb)
|
||||||
|
|
||||||
call davidson_run(handler, v_0, s_0)
|
call davidson_run(handler, v_0, s_0)
|
||||||
|
|
||||||
!$OMP PARALLEL DEFAULT(NONE) &
|
!$OMP PARALLEL DEFAULT(NONE) &
|
||||||
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
|
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
|
||||||
!$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
|
!$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
|
||||||
allocate(vt(N_st_8,n),st(N_st_8,n))
|
|
||||||
Vt = 0.d0
|
|
||||||
St = 0.d0
|
|
||||||
|
|
||||||
!$OMP DO SCHEDULE(dynamic)
|
allocate(vt(N_st_8,n),st(N_st_8,n))
|
||||||
do sh=1,shortcut(0,2)
|
Vt = 0.d0
|
||||||
do i=shortcut(sh,2),shortcut(sh+1,2)-1
|
St = 0.d0
|
||||||
org_i = sort_idx(i,2)
|
|
||||||
do j=shortcut(sh,2),i-1
|
!$OMP DO SCHEDULE(dynamic)
|
||||||
org_j = sort_idx(j,2)
|
do sh=1,shortcut(0,2)
|
||||||
ext = 0
|
do i=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||||
do ni=1,Nint
|
org_i = sort_idx(i,2)
|
||||||
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
|
do j=shortcut(sh,2),i-1
|
||||||
|
org_j = sort_idx(j,2)
|
||||||
|
ext = 0
|
||||||
|
do ni=1,Nint
|
||||||
|
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
|
||||||
|
end do
|
||||||
|
if(ext == 4) then
|
||||||
|
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||||
|
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||||
|
do istate=1,n_st
|
||||||
|
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
|
||||||
|
vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
|
||||||
|
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
|
||||||
|
st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
|
||||||
|
enddo
|
||||||
|
end if
|
||||||
end do
|
end do
|
||||||
if(ext == 4) then
|
|
||||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
|
||||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
|
||||||
do istate=1,n_st
|
|
||||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
|
|
||||||
vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
|
|
||||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
|
|
||||||
st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
|
|
||||||
enddo
|
|
||||||
end if
|
|
||||||
end do
|
end do
|
||||||
end do
|
|
||||||
enddo
|
|
||||||
!$OMP END DO NOWAIT
|
|
||||||
|
|
||||||
!$OMP CRITICAL
|
|
||||||
do istate=1,N_st
|
|
||||||
do i=n,1,-1
|
|
||||||
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
|
|
||||||
s_0(i,istate) = s_0(i,istate) + st(istate,i)
|
|
||||||
enddo
|
enddo
|
||||||
enddo
|
!$OMP END DO NOWAIT
|
||||||
!$OMP END CRITICAL
|
|
||||||
|
|
||||||
deallocate(vt,st)
|
!$OMP CRITICAL
|
||||||
|
do istate=1,N_st
|
||||||
|
do i=n,1,-1
|
||||||
|
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
|
||||||
|
s_0(i,istate) = s_0(i,istate) + st(istate,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END CRITICAL
|
||||||
|
|
||||||
|
deallocate(vt,st)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
do istate=1,N_st
|
do istate=1,N_st
|
||||||
@ -311,6 +311,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
|||||||
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
deallocate (shortcut, sort_idx, sorted, version)
|
deallocate (shortcut, sort_idx, sorted, version)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
78
src/Utils/transpose.irp.f
Normal file
78
src/Utils/transpose.irp.f
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
!DIR$ attributes forceinline :: transpose
|
||||||
|
recursive subroutine transpose(A,LDA,B,LDB,d1,d2)
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Transpose input matrix A into output matrix B
|
||||||
|
END_DOC
|
||||||
|
integer, intent(in) :: d1, d2, LDA, LDB
|
||||||
|
real, intent(in) :: A(LDA,d2)
|
||||||
|
real, intent(out) :: B(LDB,d1)
|
||||||
|
|
||||||
|
integer :: i,j,k, mod_align
|
||||||
|
if ( d2 < 32 ) then
|
||||||
|
do j=1,d1
|
||||||
|
!DIR$ LOOP COUNT (16)
|
||||||
|
do i=1,d2
|
||||||
|
B(i,j ) = A(j ,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
return
|
||||||
|
else if (d1 > d2) then
|
||||||
|
!DIR$ forceinline
|
||||||
|
k=d1/2
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call transpose(A(1,1),LDA,B(1,1),LDB,k,d2)
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call transpose(A(k+1,1),LDA,B(1,k+1),LDB,d1-k,d2)
|
||||||
|
return
|
||||||
|
else
|
||||||
|
!DIR$ forceinline
|
||||||
|
k=d2/2
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call transpose(A(1,k+1),LDA,B(k+1,1),LDB,d1,d2-k)
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call transpose(A(1,1),LDA,B(1,1),LDB,d1,k)
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
!DIR$ attributes forceinline :: dtranspose
|
||||||
|
recursive subroutine dtranspose(A,LDA,B,LDB,d1,d2)
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Transpose input matrix A into output matrix B
|
||||||
|
END_DOC
|
||||||
|
integer, intent(in) :: d1, d2, LDA, LDB
|
||||||
|
double precision, intent(in) :: A(LDA,d2)
|
||||||
|
double precision, intent(out) :: B(LDB,d1)
|
||||||
|
|
||||||
|
integer :: i,j,k, mod_align
|
||||||
|
if ( d2 < 32 ) then
|
||||||
|
do j=1,d1
|
||||||
|
!DIR$ LOOP COUNT (16)
|
||||||
|
do i=1,d2
|
||||||
|
B(i,j ) = A(j ,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
return
|
||||||
|
else if (d1 > d2) then
|
||||||
|
!DIR$ forceinline
|
||||||
|
k=d1/2
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call dtranspose(A(1,1),LDA,B(1,1),LDB,k,d2)
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call dtranspose(A(k+1,1),LDA,B(1,k+1),LDB,d1-k,d2)
|
||||||
|
return
|
||||||
|
else
|
||||||
|
!DIR$ forceinline
|
||||||
|
k=d2/2
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call dtranspose(A(1,k+1),LDA,B(k+1,1),LDB,d1,d2-k)
|
||||||
|
!DIR$ forceinline recursive
|
||||||
|
call dtranspose(A(1,1),LDA,B(1,1),LDB,d1,k)
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
end
|
||||||
|
|
Loading…
Reference in New Issue
Block a user