10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-22 20:35:19 +01:00

corrected parallelism bug

This commit is contained in:
Yann Garniron 2015-11-02 20:56:02 +01:00
parent 67836776e1
commit 4eed037ecf
3 changed files with 123 additions and 93 deletions

View File

@ -495,6 +495,8 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate)
integer :: shortcut(0:n+1), sort_idx(n)
integer(bit_kind) :: sorted(Nint,n), version(Nint,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass
!
@ -520,61 +522,72 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate)
allocate(idx(0:n), vt(n))
Vt = 0.d0
do pass=1,2
if(pass == 1) then
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
else
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
end if
!$OMP SINGLE
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
if(pass == 2) then
endi = sh
else
endi = 1
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do sh2=1,sh
exa = 0
do ni=1,Nint
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do sh2=endi,sh
exa = 0
do ni=1,Nint
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( (dabs(u_0(org_j)) > 1.d-7).or.((dabs(u_0(org_i)) > 1.d-7)) ) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
endif
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
end if
end do
end do
enddo
!$OMP END DO
end do
enddo
!$OMP END DO
!$OMP SINGLE
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
do j=shortcut(sh),i-1
ext = 0
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
end if
end do
end do
enddo
!$OMP END DO
!$OMP DO SCHEDULE(guided)

View File

@ -178,6 +178,7 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
shortcut(0) = 1
shortcut(1) = 1
version(:,1) = key(:,1,1)
do i=2,N_key
do ni=1,nint
if(key(ni,1,i) /= key(ni,1,i-1)) then

View File

@ -132,62 +132,78 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
!$OMP REDUCTION(+:s2)
allocate(idx(0:n))
do pass=1,2
if(pass == 1) then
call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
else
call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP SINGLE
call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do sh2=1,sh
exa = 0
do ni=1,N_int
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
if(pass == 2) then
endi = sh
else
endi = 1
end if
do sh2=endi,sh
exa = 0
do ni=1,N_int
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
end do
end do
enddo
!$OMP END DO
end do
enddo
!$OMP END DO
!$OMP SINGLE
call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
do j=shortcut(sh),i-1
ext = 0
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
end do
end do
enddo
!$OMP END DO
deallocate(idx)
!$OMP END PARALLEL
s2 = s2+s2