10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-09 12:44:07 +01:00

shared memory davidson

This commit is contained in:
Yann Garniron 2016-09-30 15:29:06 +02:00
parent bca504aebd
commit 02b48aabce
2 changed files with 439 additions and 57 deletions

View File

@ -0,0 +1,366 @@
!brought to you by garniroy inc.
use bitmasks
subroutine davidson_process(block, N, idx, vt, st)
implicit none
integer , intent(in) :: block
integer , intent(inout) :: N
integer , intent(inout) :: idx(N_det)
double precision , intent(inout) :: vt(N_states, N_det)
double precision , intent(inout) :: st(N_states, N_det)
integer :: i, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi
integer(bit_kind) :: sorted_i(N_int)
double precision :: s2, hij
vt = 0d0
st = 0d0
N = N_det
do i=1,N
idx(i) = i
end do
sh = block
do sh2=1,sh
exa = 0
do ni=1,N_int
exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut_(sh,1),shortcut_(sh+1,1)-1
org_i = sort_idx_(i,1)
if(sh==sh2) then
endi = i-1
else
endi = shortcut_(sh2+1,1)-1
end if
do ni=1,N_int
sorted_i(ni) = sorted_(ni,i,1)
enddo
do j=shortcut_(sh2,1),endi
org_j = sort_idx_(j,1)
ext = exa
do ni=1,N_int
ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1)))
end do
if(ext <= 4) then
call i_h_j (psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,hij)
call get_s2(psi_det(1,1,org_j),psi_det(1,1,org_i),n_int,s2)
do istate=1,N_states
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
enddo
endif
enddo
enddo
enddo
end subroutine
BEGIN_PROVIDER [ integer, shortcut_, (0:N_det+1, 2) ]
&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, N_det, 2) ]
&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, N_det, 2) ]
&BEGIN_PROVIDER [ integer, sort_idx_, (N_det, 2) ]
implicit none
call sort_dets_ab_v(psi_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), n_det, N_int)
call sort_dets_ba_v(psi_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), n_det, N_int)
END_PROVIDER
subroutine davidson_collect(block, N, idx, vt, st , v0, s0)
implicit none
integer , intent(in) :: block
integer , intent(in) :: N
integer , intent(in) :: idx(N)
double precision , intent(in) :: vt(N_states, N)
double precision , intent(in) :: st(N_states, N)
double precision , intent(inout) :: v0(N_det, N_states)
double precision , intent(inout) :: s0(N_det, N_states)
integer :: i
do i=1,N
v0(idx(i), :) += vt(:, i)
s0(idx(i), :) += st(:, i)
end do
end subroutine
subroutine davidson_init(zmq_to_qp_run_socket)
use f77_zmq
implicit none
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket ! zmq_to_qp_run_socket
call new_parallel_job(zmq_to_qp_run_socket,'davidson')
end subroutine
subroutine davidson_add_task(zmq_to_qp_run_socket, block)
use f77_zmq
implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket
integer ,intent(in) :: block
character*(512) :: task
write(task,*) block
call add_task_to_taskserver(zmq_to_qp_run_socket, task)
end subroutine
subroutine davidson_slave_inproc(i)
implicit none
integer, intent(in) :: i
call davidson_run_slave(1,i)
end
subroutine davidson_slave_tcp(i)
implicit none
integer, intent(in) :: i
call davidson_run_slave(0,i)
end
subroutine davidson_run_slave(thread,iproc)
use f77_zmq
implicit none
integer, intent(in) :: thread, iproc
integer :: worker_id, task_id, block
character*(512) :: task
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_push_socket
integer(ZMQ_PTR) :: zmq_socket_push
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_push = new_zmq_push_socket(thread)
call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
if(worker_id == -1) then
print *, "WORKER -1"
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
return
end if
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
end subroutine
subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
use f77_zmq
implicit none
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),intent(in) :: zmq_socket_push
integer,intent(in) :: worker_id
integer :: task_id
character*(512) :: task
integer :: block
integer :: N
integer , allocatable :: idx(:)
double precision , allocatable :: vt(:,:)
double precision , allocatable :: st(:,:)
allocate(idx(N_det))
allocate(vt(N_states, N_det))
allocate(st(N_states, N_det))
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
if(task_id == 0) exit
read (task,*) block
call davidson_process(block,N, idx, vt, st)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id)
end do
end subroutine
subroutine davidson_push_results(zmq_socket_push, block, N, idx, vt, st, task_id)
use f77_zmq
implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
integer ,intent(in) :: task_id
integer ,intent(in) :: block
integer ,intent(in) :: N
integer ,intent(in) :: idx(N)
double precision ,intent(in) :: vt(N_states, N)
double precision ,intent(in) :: st(N_states, N)
integer :: rc
rc = f77_zmq_send( zmq_socket_push, block, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "davidson_push_results failed to push block"
rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "davidson_push_results failed to push N"
rc = f77_zmq_send( zmq_socket_push, idx, 4*N, ZMQ_SNDMORE)
if(rc /= 4*N) stop "davidson_push_results failed to push idx"
rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states* N, ZMQ_SNDMORE)
if(rc /= 8*N_states* N) stop "davidson_push_results failed to push vt"
rc = f77_zmq_send( zmq_socket_push, st, 8*N_states* N, ZMQ_SNDMORE)
if(rc /= 8*N_states* N) stop "davidson_push_results failed to push st"
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to push task_id"
end subroutine
subroutine davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id)
use f77_zmq
implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
integer ,intent(out) :: task_id
integer ,intent(out) :: block
integer ,intent(out) :: N
integer ,intent(out) :: idx(N_det)
double precision ,intent(out) :: vt(N_states, N_det)
double precision ,intent(out) :: st(N_states, N_det)
integer :: rc
rc = f77_zmq_recv( zmq_socket_pull, block, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to pull block"
rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to pull N"
rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0)
if(rc /= 4*N) stop "davidson_push_results failed to pull idx"
rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states* N, 0)
if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull vt"
rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states* N, 0)
if(rc /= 8*N_states* N) stop "davidson_push_results failed to pull st"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
end subroutine
subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0)
use f77_zmq
implicit none
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
double precision ,intent(inout) :: v0(N_det, N_states)
double precision ,intent(inout) :: s0(N_det, N_states)
integer :: more, task_id
integer :: block
integer :: N
integer , allocatable :: idx(:)
double precision , allocatable :: vt(:,:)
double precision , allocatable :: st(:,:)
allocate(idx(N_det))
allocate(vt(N_states, N_det))
allocate(st(N_states, N_det))
more = 1
do while (more == 1)
call davidson_pull_results(zmq_socket_pull, block, N, idx, vt, st, task_id)
call davidson_collect(block, N, idx, vt, st , v0, s0)
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
end do
end subroutine
subroutine davidson_run(zmq_to_qp_run_socket , v0, s0)
use f77_zmq
implicit none
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_collector
integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull
integer :: i
integer, external :: omp_get_thread_num
double precision , intent(inout) :: v0(N_det, N_states)
double precision , intent(inout) :: s0(N_det, N_states)
call zmq_set_running(zmq_to_qp_run_socket)
zmq_collector = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
i = omp_get_thread_num()
PROVIDE nproc
!$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1)
i = omp_get_thread_num()
if (i==0) then
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0)
call end_zmq_to_qp_run_socket(zmq_collector)
call end_zmq_pull_socket(zmq_socket_pull)
else
call davidson_slave_inproc(i)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
end subroutine

View File

@ -43,7 +43,8 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
double precision, intent(in) :: H_jj(n) double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij double precision :: hij
double precision, allocatable :: vt(:,:), ut(:,:) double precision, allocatable :: vt(:,:)
! double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii integer :: i,j,k,l, jj,ii
integer :: i0, j0 integer :: i0, j0
@ -55,9 +56,10 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
integer :: N_st_8 integer :: N_st_8
integer, external :: align_double integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
N_st_8 = align_double(N_st) if(N_st /= N_states) stop "H_u_0_nstates N_st /= N_states"
N_st_8 = N_states ! align_double(N_st)
ASSERT (Nint > 0) ASSERT (Nint > 0)
ASSERT (Nint == N_int) ASSERT (Nint == N_int)
@ -163,7 +165,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
v_0(i,istate) += H_jj(i) * u_0(i,istate) v_0(i,istate) += H_jj(i) * u_0(i,istate)
enddo enddo
enddo enddo
deallocate (shortcut, sort_idx, sorted, version, ut) !deallocate (shortcut, sort_idx, sorted, version, ut)
end end
BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
@ -175,11 +177,14 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, ut, (N_states, N_det) ]
ut = 0d0
END_PROVIDER
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
use bitmasks use bitmasks
use f77_zmq
implicit none implicit none
BEGIN_DOC BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
@ -196,7 +201,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
double precision, intent(in) :: H_jj(n), S2_jj(n) double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2 double precision :: hij,s2
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:) double precision, allocatable :: vt(:,:), st(:,:)
!double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii integer :: i,j,k,l, jj,ii
integer :: i0, j0 integer :: i0, j0
@ -208,9 +214,12 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
integer :: N_st_8 integer :: N_st_8
integer, external :: align_double integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut !!!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
N_st_8 = align_double(N_st) integer(ZMQ_PTR) :: handler
if(N_st /= N_states .or. sze_8 /= N_det) stop "SPEP"
N_st_8 = N_st !! align_double(N_st)
ASSERT (Nint > 0) ASSERT (Nint > 0)
ASSERT (Nint == N_int) ASSERT (Nint == N_int)
@ -218,20 +227,27 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
PROVIDE ref_bitmask_energy PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2)) allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
allocate(ut(N_st_8,n)) !allocate(ut(N_st_8,n))
v_0 = 0.d0 v_0 = 0.d0
s_0 = 0.d0 s_0 = 0.d0
provide ut
do i=1,n do i=1,n
do istate=1,N_st do istate=1,N_st
ut(istate,i) = u_0(i,istate) ut(istate,i) = u_0(i,istate)
enddo enddo
enddo enddo
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
call davidson_init(handler)
do sh=shortcut(0,1),1,-1
call davidson_add_task(handler, sh)
enddo
call davidson_run(handler, v_0, s_0)
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
!$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
@ -239,49 +255,49 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
Vt = 0.d0 Vt = 0.d0
St = 0.d0 St = 0.d0
!$OMP DO SCHEDULE(dynamic) ! ! !$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,1) ! ! do sh=1,shortcut(0,1)
do sh2=sh,shortcut(0,1) ! ! do sh2=sh,shortcut(0,1)
exa = 0 ! ! exa = 0
do ni=1,Nint ! ! do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) ! ! exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do ! ! end do
if(exa > 2) then ! ! if(exa > 2) then
cycle ! ! cycle
end if ! ! end if
! !
do i=shortcut(sh,1),shortcut(sh+1,1)-1 ! ! do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1) ! ! org_i = sort_idx(i,1)
if(sh==sh2) then ! ! if(sh==sh2) then
endi = i-1 ! ! endi = i-1
else ! ! else
endi = shortcut(sh2+1,1)-1 ! ! endi = shortcut(sh2+1,1)-1
end if ! ! end if
do ni=1,Nint ! ! do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1) ! ! sorted_i(ni) = sorted(ni,i,1)
enddo ! ! enddo
! !
do j=shortcut(sh2,1),endi ! ! do j=shortcut(sh2,1),endi
org_j = sort_idx(j,1) ! ! org_j = sort_idx(j,1)
ext = exa ! ! ext = exa
do ni=1,Nint ! ! do ni=1,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1))) ! ! ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
end do ! ! end do
if(ext <= 4) then ! ! if(ext <= 4) then
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) ! ! call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) ! ! call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
do istate=1,n_st ! ! do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) ! ! vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) ! ! vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) ! ! st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) ! ! st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
enddo ! ! enddo
endif ! ! endif
enddo ! ! enddo
enddo ! ! enddo
enddo ! ! enddo
enddo ! ! enddo
!$OMP END DO NOWAIT ! ! !$OMP END DO NOWAIT
!$OMP DO SCHEDULE(dynamic) !$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,2) do sh=1,shortcut(0,2)
@ -326,6 +342,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
enddo enddo
enddo enddo
deallocate (shortcut, sort_idx, sorted, version, ut) deallocate (shortcut, sort_idx, sorted, version)
end end