From 4eed037ecf8a1d2828b19845525fe33655c7f645 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 2 Nov 2015 20:56:02 +0100 Subject: [PATCH] corrected parallelism bug --- plugins/MRCC_Utils/davidson.irp.f | 103 +++++++++++++++------------ src/Determinants/davidson.irp.f | 1 + src/Determinants/s2.irp.f | 112 +++++++++++++++++------------- 3 files changed, 123 insertions(+), 93 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 1757305c..9216877c 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -495,6 +495,8 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) integer :: shortcut(0:n+1), sort_idx(n) integer(bit_kind) :: sorted(Nint,n), version(Nint,n) + + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass ! @@ -520,61 +522,72 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) allocate(idx(0:n), vt(n)) Vt = 0.d0 - do pass=1,2 - if(pass == 1) then - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - else - call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - end if - + + !$OMP SINGLE + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0) - - if(pass == 2) then - endi = sh - else - endi = 1 + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do sh2=1,sh + exa = 0 + do ni=1,Nint + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle end if - do sh2=endi,sh - exa = 0 - do ni=1,Nint - exa += popcnt(xor(version(ni,sh), version(ni,sh2))) - end do - if(exa > 2) then - cycle + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 end if - do i=shortcut(sh),shortcut(sh+1)-1 - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1)-1 - end if - - do j=shortcut(sh2),endi - ext = exa - do ni=1,Nint - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext <= 4) then - org_i = sort_idx(i) - org_j = sort_idx(j) - if ( (dabs(u_0(org_j)) > 1.d-7).or.((dabs(u_0(org_i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - vt (org_i) = vt (org_i) + hij*u_0(org_j) - vt (org_j) = vt (org_j) + hij*u_0(org_i) - endif - end if + do j=shortcut(sh2),endi + ext = exa + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if end do end do - enddo - !$OMP END DO end do - + enddo + !$OMP END DO + !$OMP SINGLE + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if + end do + end do + enddo + !$OMP END DO !$OMP DO SCHEDULE(guided) diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index ebc30e99..5e931f14 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -178,6 +178,7 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) shortcut(0) = 1 shortcut(1) = 1 + version(:,1) = key(:,1,1) do i=2,N_key do ni=1,nint if(key(ni,1,i) /= key(ni,1,i-1)) then diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 89d0a72b..c11b2161 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -132,62 +132,78 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) !$OMP REDUCTION(+:s2) allocate(idx(0:n)) - do pass=1,2 - if(pass == 1) then - call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) - else - call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + + !$OMP SINGLE + call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + + do sh2=1,sh + exa = 0 + do ni=1,N_int + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle end if - - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0) - - if(pass == 2) then - endi = sh - else - endi = 1 - end if - - do sh2=endi,sh - exa = 0 - do ni=1,N_int - exa += popcnt(xor(version(ni,sh), version(ni,sh2))) - end do - if(exa > 2) then - cycle + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 end if - do i=shortcut(sh),shortcut(sh+1)-1 - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1)-1 - end if - - do j=shortcut(sh2),endi - ext = exa - do ni=1,N_int - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext <= 4) then - org_i = sort_idx(i) - org_j = sort_idx(j) - - - if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & - > davidson_threshold ) then - call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp - endif - - end if + do j=shortcut(sh2),endi + ext = exa + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if end do end do - enddo - !$OMP END DO end do + enddo + !$OMP END DO + + !$OMP SINGLE + call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if + end do + end do + enddo + !$OMP END DO + deallocate(idx) !$OMP END PARALLEL s2 = s2+s2