10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-10 21:18:29 +01:00

reduced synchronization in davidson_process

This commit is contained in:
Yann Garniron 2016-10-09 15:03:56 +02:00
parent 874e6845f0
commit b167dcebef

View File

@ -17,6 +17,7 @@ subroutine davidson_process(blockb, blocke, vt, st)
integer(bit_kind) :: sorted_i(N_int) integer(bit_kind) :: sorted_i(N_int)
double precision :: s2, hij double precision :: s2, hij
integer, external :: omp_get_thread_num integer, external :: omp_get_thread_num
double precision, allocatable :: locals(:,:), localv(:,:)
provide dav_det dav_ut shortcut_ provide dav_det dav_ut shortcut_
!useless calls not to provide in the parallel section !useless calls not to provide in the parallel section
@ -25,9 +26,15 @@ subroutine davidson_process(blockb, blocke, vt, st)
!!!!! !!!!!
do sh = blockb, blocke do sh = blockb, blocke
!$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP PARALLEL &
!$OMP default(none) &
!$OMP shared(vt, st, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) & !$OMP shared(vt, st, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) &
!$OMP private(exa, ni, ext, org_i, org_j, sorted_i, endi, hij, s2) !$OMP private(i,j,sh2, locals, localv, exa, ni, ext, org_i, org_j, sorted_i, endi, hij, s2)
allocate(locals(N_states_diag, shortcut_(sh+1,1) - shortcut_(sh,1)))
allocate(localv(N_states_diag, shortcut_(sh+1,1) - shortcut_(sh,1)))
locals = 0d0
localv = 0d0
!$OMP DO
do sh2=1,sh do sh2=1,sh
exa = 0 exa = 0
do ni=1,N_int do ni=1,N_int
@ -57,21 +64,41 @@ subroutine davidson_process(blockb, blocke, vt, st)
if(ext <= 4) then if(ext <= 4) then
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij) call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2) call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
!$OMP CRITICAL
do istate=1,N_states_diag if(sh == sh2) then
vt (istate,org_i) += hij*dav_ut(istate,org_j) !$OMP CRITICAL
st (istate,org_i) += s2*dav_ut(istate,org_j) do istate=1,N_states_diag
vt (istate,org_j) += hij*dav_ut(istate,org_i) localv (istate,i+1-shortcut_(sh,1)) += hij*dav_ut(istate,org_j)
st (istate,org_j) += s2*dav_ut(istate,org_i) locals (istate,i+1-shortcut_(sh,1)) += s2*dav_ut(istate,org_j)
enddo vt (istate,org_j) += hij*dav_ut(istate,org_i)
!$OMP END CRITICAL st (istate,org_j) += s2*dav_ut(istate,org_i)
enddo
!$OMP END CRITICAL
else
do istate=1,N_states_diag
localv (istate,i+1-shortcut_(sh,1)) += hij*dav_ut(istate,org_j)
locals (istate,i+1-shortcut_(sh,1)) += s2*dav_ut(istate,org_j)
vt (istate,org_j) += hij*dav_ut(istate,org_i)
st (istate,org_j) += s2*dav_ut(istate,org_i)
enddo
end if
endif endif
enddo enddo
enddo enddo
enddo enddo
!$OMP END PARALLEL DO !$OMP ENDDO
!$OMP CRITICAL
do i=1,shortcut_(sh+1,1) - shortcut_(sh,1)
do istate=1,N_states_diag
vt(istate, sort_idx_(shortcut_(sh,1) - 1 + i, 1)) += localv(istate,i)
st(istate, sort_idx_(shortcut_(sh,1) - 1 + i, 1)) += locals(istate,i)
end do
end do
!$OMP END CRITICAL
!$OMP END PARALLEL
enddo enddo
do sh=blockb,min(blocke, shortcut_(0,2)) do sh=blockb,min(blocke, shortcut_(0,2))
!$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP PARALLEL DO default(none) schedule(dynamic) &
!$OMP shared(vt, st, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) & !$OMP shared(vt, st, blockb, blocke, sh, shortcut_, version_, sorted_, sort_idx_, dav_det, dav_ut, N_int, N_states_diag) &