mirror of
https://github.com/LCPQ/quantum_package
synced 2024-12-23 04:43:50 +01:00
Dissymmetrized u0Hu0: less memory and communication
This commit is contained in:
parent
ef07f9bc8e
commit
ef5f905afc
@ -64,8 +64,8 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
|
|||||||
integer :: imin, imax, ishift, istep
|
integer :: imin, imax, ishift, istep
|
||||||
|
|
||||||
integer, allocatable :: psi_det_read(:,:,:)
|
integer, allocatable :: psi_det_read(:,:,:)
|
||||||
double precision, allocatable :: v_0(:,:), s_0(:,:), u_t(:,:)
|
double precision, allocatable :: v_t(:,:), s_t(:,:), u_t(:,:)
|
||||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0
|
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t, s_t
|
||||||
|
|
||||||
! Get wave function (u_t)
|
! Get wave function (u_t)
|
||||||
! -----------------------
|
! -----------------------
|
||||||
@ -108,7 +108,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
|
|||||||
TOUCH N_det
|
TOUCH N_det
|
||||||
endif
|
endif
|
||||||
|
|
||||||
allocate(v_0(sze,N_st), s_0(sze,N_st),u_t(N_st,N_det_read))
|
allocate(u_t(N_st,N_det_read))
|
||||||
|
|
||||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det_read*bit_kind,0)
|
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det_read*bit_kind,0)
|
||||||
if (rc /= N_int*2*N_det_read*bit_kind) then
|
if (rc /= N_int*2*N_det_read*bit_kind) then
|
||||||
@ -133,41 +133,50 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
|
|||||||
! ---------
|
! ---------
|
||||||
|
|
||||||
|
|
||||||
|
allocate(v_t(N_st,N_det), s_t(N_st,N_det))
|
||||||
do
|
do
|
||||||
v_0 = 0.d0
|
|
||||||
s_0 = 0.d0
|
|
||||||
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
|
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
|
||||||
if(task_id == 0) exit
|
if(task_id == 0) exit
|
||||||
read (msg,*) imin, imax, ishift, istep
|
read (msg,*) imin, imax, ishift, istep
|
||||||
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,N_det,imin,imax,ishift,istep)
|
v_t = 0.d0
|
||||||
|
s_t = 0.d0
|
||||||
|
call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,N_det,imin,imax,ishift,istep)
|
||||||
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
||||||
call davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
|
call davidson_push_results(zmq_socket_push, v_t, s_t, imin, imax, task_id)
|
||||||
end do
|
end do
|
||||||
deallocate(v_0, s_0, u_t)
|
deallocate(u_t,v_t, s_t)
|
||||||
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
subroutine davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
|
subroutine davidson_push_results(zmq_socket_push, v_t, s_t, imin, imax, task_id)
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
|
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
|
||||||
integer ,intent(in) :: task_id
|
integer ,intent(in) :: task_id, imin, imax
|
||||||
double precision ,intent(in) :: v_0(N_det,N_states_diag)
|
double precision ,intent(in) :: v_t(N_states_diag,N_det)
|
||||||
double precision ,intent(in) :: s_0(N_det,N_states_diag)
|
double precision ,intent(in) :: s_t(N_states_diag,N_det)
|
||||||
integer :: rc
|
integer :: rc, sz
|
||||||
|
|
||||||
rc = f77_zmq_send( zmq_socket_push, v_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
|
sz = (imax-imin+1)*N_states_diag
|
||||||
if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push vt"
|
|
||||||
|
|
||||||
rc = f77_zmq_send( zmq_socket_push, s_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
|
rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
|
||||||
if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push st"
|
|
||||||
|
|
||||||
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
|
|
||||||
if(rc /= 4) stop "davidson_push_results failed to push task_id"
|
if(rc /= 4) stop "davidson_push_results failed to push task_id"
|
||||||
|
|
||||||
|
rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE)
|
||||||
|
if(rc /= 4) stop "davidson_push_results failed to push imin"
|
||||||
|
|
||||||
|
rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE)
|
||||||
|
if(rc /= 4) stop "davidson_push_results failed to push imax"
|
||||||
|
|
||||||
|
rc = f77_zmq_send( zmq_socket_push, v_t(1,imin), 8*sz, ZMQ_SNDMORE)
|
||||||
|
if(rc /= 8*sz) stop "davidson_push_results failed to push vt"
|
||||||
|
|
||||||
|
rc = f77_zmq_send( zmq_socket_push, s_t(1,imin), 8*sz, 0)
|
||||||
|
if(rc /= 8*sz) stop "davidson_push_results failed to push st"
|
||||||
|
|
||||||
! Activate is zmq_socket_push is a REQ
|
! Activate is zmq_socket_push is a REQ
|
||||||
IRP_IF ZMQ_PUSH
|
IRP_IF ZMQ_PUSH
|
||||||
IRP_ELSE
|
IRP_ELSE
|
||||||
@ -183,26 +192,34 @@ end subroutine
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
subroutine davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
|
subroutine davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
|
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
|
||||||
integer ,intent(out) :: task_id
|
integer ,intent(out) :: task_id, imin, imax
|
||||||
double precision ,intent(out) :: v_0(N_det,N_states_diag)
|
double precision ,intent(out) :: v_t(N_states_diag,N_det)
|
||||||
double precision ,intent(out) :: s_0(N_det,N_states_diag)
|
double precision ,intent(out) :: s_t(N_states_diag,N_det)
|
||||||
|
|
||||||
integer :: rc
|
integer :: rc, sz
|
||||||
|
|
||||||
rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*N_det*N_states_diag, 0)
|
|
||||||
if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull v_0"
|
|
||||||
|
|
||||||
rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*N_det*N_states_diag, 0)
|
|
||||||
if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull s_0"
|
|
||||||
|
|
||||||
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
|
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
|
||||||
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
|
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
|
||||||
|
|
||||||
|
rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0)
|
||||||
|
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
|
||||||
|
|
||||||
|
rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0)
|
||||||
|
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
|
||||||
|
|
||||||
|
sz = (imax-imin+1)*N_states_diag
|
||||||
|
|
||||||
|
rc = f77_zmq_recv( zmq_socket_pull, v_t(1,imin), 8*sz, 0)
|
||||||
|
if(rc /= 8*sz) stop "davidson_pull_results failed to pull v_t"
|
||||||
|
|
||||||
|
rc = f77_zmq_recv( zmq_socket_pull, s_t(1,imin), 8*sz, 0)
|
||||||
|
if(rc /= 8*sz) stop "davidson_pull_results failed to pull s_t"
|
||||||
|
|
||||||
! Activate if zmq_socket_pull is a REP
|
! Activate if zmq_socket_pull is a REP
|
||||||
IRP_IF ZMQ_PUSH
|
IRP_IF ZMQ_PUSH
|
||||||
IRP_ELSE
|
IRP_ELSE
|
||||||
@ -227,29 +244,29 @@ subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze, N_st)
|
|||||||
double precision ,intent(inout) :: v0(sze, N_st)
|
double precision ,intent(inout) :: v0(sze, N_st)
|
||||||
double precision ,intent(inout) :: s0(sze, N_st)
|
double precision ,intent(inout) :: s0(sze, N_st)
|
||||||
|
|
||||||
integer :: more, task_id
|
integer :: more, task_id, imin, imax
|
||||||
|
|
||||||
double precision, allocatable :: v_0(:,:), s_0(:,:)
|
double precision, allocatable :: v_t(:,:), s_t(:,:)
|
||||||
integer :: i,j
|
integer :: i,j
|
||||||
integer(ZMQ_PTR), external :: new_zmq_pull_socket
|
integer(ZMQ_PTR), external :: new_zmq_pull_socket
|
||||||
integer(ZMQ_PTR) :: zmq_socket_pull
|
integer(ZMQ_PTR) :: zmq_socket_pull
|
||||||
|
|
||||||
allocate(v_0(N_det,N_st), s_0(N_det,N_st))
|
allocate(v_t(N_st,N_det), s_t(N_st,N_det))
|
||||||
v0 = 0.d0
|
v0 = 0.d0
|
||||||
s0 = 0.d0
|
s0 = 0.d0
|
||||||
more = 1
|
more = 1
|
||||||
zmq_socket_pull = new_zmq_pull_socket()
|
zmq_socket_pull = new_zmq_pull_socket()
|
||||||
do while (more == 1)
|
do while (more == 1)
|
||||||
call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
|
call davidson_pull_results(zmq_socket_pull, v_t, s_t, imin, imax, task_id)
|
||||||
do j=1,N_st
|
do j=1,N_st
|
||||||
do i=1,N_det
|
do i=imin,imax
|
||||||
v0(i,j) = v0(i,j) + v_0(i,j)
|
v0(i,j) = v0(i,j) + v_t(j,i)
|
||||||
s0(i,j) = s0(i,j) + s_0(i,j)
|
s0(i,j) = s0(i,j) + s_t(j,i)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
||||||
end do
|
end do
|
||||||
deallocate(v_0,s_0)
|
deallocate(v_t,s_t)
|
||||||
call end_zmq_pull_socket(zmq_socket_pull)
|
call end_zmq_pull_socket(zmq_socket_pull)
|
||||||
|
|
||||||
end subroutine
|
end subroutine
|
||||||
@ -349,16 +366,15 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
|
|||||||
|
|
||||||
integer :: istep, imin, imax, ishift
|
integer :: istep, imin, imax, ishift
|
||||||
double precision :: w, max_workload, N_det_inv, di
|
double precision :: w, max_workload, N_det_inv, di
|
||||||
max_workload = 1000000.d0
|
|
||||||
w = 0.d0
|
w = 0.d0
|
||||||
istep=8
|
istep=1
|
||||||
ishift=0
|
ishift=0
|
||||||
imin=1
|
imin=1
|
||||||
N_det_inv = 1.d0/dble(N_det)
|
N_det_inv = 1.d0/dble(N_det)
|
||||||
di = dble(N_det)
|
di = dble(N_det)
|
||||||
|
max_workload = 50000.d0
|
||||||
do imax=1,N_det
|
do imax=1,N_det
|
||||||
di = di-1.d0
|
w = w + 1.d0
|
||||||
w = w + di*N_det_inv
|
|
||||||
if (w > max_workload) then
|
if (w > max_workload) then
|
||||||
do ishift=0,istep-1
|
do ishift=0,istep-1
|
||||||
write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
|
write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
|
||||||
|
@ -21,14 +21,14 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
|
|||||||
integer, intent(in) :: N_st,sze
|
integer, intent(in) :: N_st,sze
|
||||||
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
|
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
|
||||||
integer :: k
|
integer :: k
|
||||||
double precision, allocatable :: u_t(:,:)
|
double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:)
|
||||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
|
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
|
||||||
allocate(u_t(N_st,N_det))
|
allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det))
|
||||||
do k=1,N_st
|
do k=1,N_st
|
||||||
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
|
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
|
||||||
enddo
|
enddo
|
||||||
v_0 = 0.d0
|
v_t = 0.d0
|
||||||
s_0 = 0.d0
|
s_t = 0.d0
|
||||||
call dtranspose( &
|
call dtranspose( &
|
||||||
u_0, &
|
u_0, &
|
||||||
size(u_0, 1), &
|
size(u_0, 1), &
|
||||||
@ -36,9 +36,23 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
|
|||||||
size(u_t, 1), &
|
size(u_t, 1), &
|
||||||
N_det, N_st)
|
N_det, N_st)
|
||||||
|
|
||||||
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,1,N_det,0,1)
|
call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1)
|
||||||
deallocate(u_t)
|
deallocate(u_t)
|
||||||
|
|
||||||
|
call dtranspose( &
|
||||||
|
v_t, &
|
||||||
|
size(v_t, 1), &
|
||||||
|
v_0, &
|
||||||
|
size(v_0, 1), &
|
||||||
|
N_st, N_det)
|
||||||
|
call dtranspose( &
|
||||||
|
s_t, &
|
||||||
|
size(s_t, 1), &
|
||||||
|
s_0, &
|
||||||
|
size(s_0, 1), &
|
||||||
|
N_st, N_det)
|
||||||
|
deallocate(v_t,s_t)
|
||||||
|
|
||||||
do k=1,N_st
|
do k=1,N_st
|
||||||
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||||
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||||
@ -48,47 +62,47 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
use bitmasks
|
use bitmasks
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
! Computes v_t = H|u_t> and s_t = S^2 |u_t>
|
||||||
!
|
!
|
||||||
! Default should be 1,N_det,0,1
|
! Default should be 1,N_det,0,1
|
||||||
END_DOC
|
END_DOC
|
||||||
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
|
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
|
||||||
double precision, intent(in) :: u_t(N_st,N_det)
|
double precision, intent(in) :: u_t(N_st,N_det)
|
||||||
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
|
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
|
||||||
|
|
||||||
|
|
||||||
PROVIDE ref_bitmask_energy N_int
|
PROVIDE ref_bitmask_energy N_int
|
||||||
|
|
||||||
select case (N_int)
|
select case (N_int)
|
||||||
case (1)
|
case (1)
|
||||||
call H_S2_u_0_nstates_openmp_work_1(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
call H_S2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
case (2)
|
case (2)
|
||||||
call H_S2_u_0_nstates_openmp_work_2(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
call H_S2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
case (3)
|
case (3)
|
||||||
call H_S2_u_0_nstates_openmp_work_3(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
call H_S2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
case (4)
|
case (4)
|
||||||
call H_S2_u_0_nstates_openmp_work_4(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
call H_S2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
case default
|
case default
|
||||||
call H_S2_u_0_nstates_openmp_work_N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
call H_S2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
end select
|
end select
|
||||||
end
|
end
|
||||||
BEGIN_TEMPLATE
|
BEGIN_TEMPLATE
|
||||||
|
|
||||||
subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||||
use bitmasks
|
use bitmasks
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
! Computes v_t = H|u_t> and s_t = S^2 |u_t>
|
||||||
!
|
!
|
||||||
! Default should be 1,N_det,0,1
|
! Default should be 1,N_det,0,1
|
||||||
END_DOC
|
END_DOC
|
||||||
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
|
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
|
||||||
double precision, intent(in) :: u_t(N_st,N_det)
|
double precision, intent(in) :: u_t(N_st,N_det)
|
||||||
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
|
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
|
||||||
|
|
||||||
double precision :: hij, sij
|
double precision :: hij, sij
|
||||||
integer :: i,j,k,l
|
integer :: i,j,k,l
|
||||||
@ -109,8 +123,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
integer, allocatable :: idx(:), idx0(:)
|
integer, allocatable :: idx(:), idx0(:)
|
||||||
integer :: maxab, n_singles_a, n_singles_b, kcol_prev
|
integer :: maxab, n_singles_a, n_singles_b, kcol_prev
|
||||||
integer*8 :: k8
|
integer*8 :: k8
|
||||||
double precision, allocatable :: v_t(:,:), s_t(:,:)
|
|
||||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t
|
|
||||||
|
|
||||||
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
|
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
|
||||||
allocate(idx0(maxab))
|
allocate(idx0(maxab))
|
||||||
@ -133,14 +145,15 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
!$OMP psi_bilinear_matrix_transp_order, N_st, &
|
!$OMP psi_bilinear_matrix_transp_order, N_st, &
|
||||||
!$OMP psi_bilinear_matrix_order_transp_reverse, &
|
!$OMP psi_bilinear_matrix_order_transp_reverse, &
|
||||||
!$OMP psi_bilinear_matrix_columns_loc, &
|
!$OMP psi_bilinear_matrix_columns_loc, &
|
||||||
!$OMP istart, iend, istep, irp_here, &
|
!$OMP psi_bilinear_matrix_transp_rows_loc, &
|
||||||
!$OMP ishift, idx0, u_t, maxab, v_0, s_0) &
|
!$OMP istart, iend, istep, irp_here, v_t, s_t, &
|
||||||
|
!$OMP ishift, idx0, u_t, maxab) &
|
||||||
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
|
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
|
||||||
!$OMP lcol, lrow, l_a, l_b, &
|
!$OMP lcol, lrow, l_a, l_b, &
|
||||||
!$OMP buffer, doubles, n_doubles, &
|
!$OMP buffer, doubles, n_doubles, &
|
||||||
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, v_t, &
|
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
|
||||||
!$OMP singles_a, n_singles_a, singles_b, &
|
!$OMP singles_a, n_singles_a, singles_b, &
|
||||||
!$OMP n_singles_b, s_t, k8)
|
!$OMP n_singles_b, k8)
|
||||||
|
|
||||||
! Alpha/Beta double excitations
|
! Alpha/Beta double excitations
|
||||||
! =============================
|
! =============================
|
||||||
@ -149,12 +162,9 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
singles_a(maxab), &
|
singles_a(maxab), &
|
||||||
singles_b(maxab), &
|
singles_b(maxab), &
|
||||||
doubles(maxab), &
|
doubles(maxab), &
|
||||||
idx(maxab), &
|
idx(maxab))
|
||||||
v_t(N_st,N_det), s_t(N_st,N_det))
|
|
||||||
kcol_prev=-1
|
|
||||||
|
|
||||||
v_t = 0.d0
|
kcol_prev=-1
|
||||||
s_t = 0.d0
|
|
||||||
|
|
||||||
ASSERT (iend <= N_det)
|
ASSERT (iend <= N_det)
|
||||||
ASSERT (istart > 0)
|
ASSERT (istart > 0)
|
||||||
@ -174,14 +184,14 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
|
|
||||||
if (kcol /= kcol_prev) then
|
if (kcol /= kcol_prev) then
|
||||||
call get_all_spin_singles_$N_int( &
|
call get_all_spin_singles_$N_int( &
|
||||||
psi_det_beta_unique(1,kcol+1), idx0(kcol+1), &
|
psi_det_beta_unique, idx0, &
|
||||||
tmp_det(1,2), N_det_beta_unique-kcol, &
|
tmp_det(1,2), N_det_beta_unique, &
|
||||||
singles_b, n_singles_b)
|
singles_b, n_singles_b)
|
||||||
endif
|
endif
|
||||||
kcol_prev = kcol
|
kcol_prev = kcol
|
||||||
|
|
||||||
! Loop over singly excited beta columns > current column
|
! Loop over singly excited beta columns
|
||||||
! ------------------------------------------------------
|
! -------------------------------------
|
||||||
|
|
||||||
do i=1,n_singles_b
|
do i=1,n_singles_b
|
||||||
lcol = singles_b(i)
|
lcol = singles_b(i)
|
||||||
@ -223,8 +233,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
|
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
|
||||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
|
||||||
s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
|
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
@ -262,7 +270,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
spindet(1:$N_int) = tmp_det(1:$N_int,1)
|
spindet(1:$N_int) = tmp_det(1:$N_int,1)
|
||||||
|
|
||||||
! Loop inside the beta column to gather all the connected alphas
|
! Loop inside the beta column to gather all the connected alphas
|
||||||
l_a = k_a+1
|
lcol = psi_bilinear_matrix_columns(k_a)
|
||||||
|
l_a = psi_bilinear_matrix_columns_loc(lcol)
|
||||||
do i=1,N_det_alpha_unique
|
do i=1,N_det_alpha_unique
|
||||||
if (l_a > N_det) exit
|
if (l_a > N_det) exit
|
||||||
lcol = psi_bilinear_matrix_columns(l_a)
|
lcol = psi_bilinear_matrix_columns(l_a)
|
||||||
@ -295,7 +304,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
|
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
|
||||||
|
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! single => sij = 0
|
! single => sij = 0
|
||||||
enddo
|
enddo
|
||||||
@ -314,7 +322,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
|
|
||||||
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! same spin => sij = 0
|
! same spin => sij = 0
|
||||||
enddo
|
enddo
|
||||||
@ -343,7 +350,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
ASSERT (k_b <= N_det)
|
ASSERT (k_b <= N_det)
|
||||||
|
|
||||||
! Loop inside the alpha row to gather all the connected betas
|
! Loop inside the alpha row to gather all the connected betas
|
||||||
l_b = k_b+1
|
lrow = psi_bilinear_matrix_transp_rows(k_b)
|
||||||
|
l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
|
||||||
do i=1,N_det_beta_unique
|
do i=1,N_det_beta_unique
|
||||||
if (l_b > N_det) exit
|
if (l_b > N_det) exit
|
||||||
lrow = psi_bilinear_matrix_transp_rows(l_b)
|
lrow = psi_bilinear_matrix_transp_rows(l_b)
|
||||||
@ -377,7 +385,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! single => sij = 0
|
! single => sij = 0
|
||||||
enddo
|
enddo
|
||||||
@ -398,7 +405,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! same spin => sij = 0
|
! same spin => sij = 0
|
||||||
enddo
|
enddo
|
||||||
@ -431,20 +437,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,
|
|||||||
enddo
|
enddo
|
||||||
|
|
||||||
end do
|
end do
|
||||||
!$OMP END DO NOWAIT
|
!$OMP END DO
|
||||||
deallocate(buffer, singles_a, singles_b, doubles, idx)
|
deallocate(buffer, singles_a, singles_b, doubles, idx)
|
||||||
|
|
||||||
!$OMP CRITICAL
|
|
||||||
do l=1,N_st
|
|
||||||
do i=1, N_det
|
|
||||||
v_0(i,l) = v_0(i,l) + v_t(l,i)
|
|
||||||
s_0(i,l) = s_0(i,l) + s_t(l,i)
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
!$OMP END CRITICAL
|
|
||||||
deallocate(v_t, s_t)
|
|
||||||
|
|
||||||
!$OMP BARRIER
|
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -500,7 +500,7 @@ subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze
|
|||||||
! if (exc(0,1,2) /= 0) cycle
|
! if (exc(0,1,2) /= 0) cycle
|
||||||
! if (exc(0,1,1) == 2) cycle
|
! if (exc(0,1,1) == 2) cycle
|
||||||
! if (exc(0,1,2) == 2) cycle
|
! if (exc(0,1,2) == 2) cycle
|
||||||
if ((degree==1).and.(exc(0,1,1) == 1)) cycle
|
! if ((degree==1).and.(exc(0,1,1) == 1)) cycle
|
||||||
call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
|
call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
!$OMP ATOMIC
|
!$OMP ATOMIC
|
||||||
|
Loading…
Reference in New Issue
Block a user