diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index cf28cf12..c7592fad 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -88,7 +88,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,1) - do sh2=sh,shortcut(0,1) + do sh2=1,shortcut(0,1) exa = popcnt(xor(version(1,sh,1), version(1,sh2,1))) if(exa > 2) then cycle @@ -102,16 +102,11 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) do i=shortcut(sh,1),shortcut(sh+1,1)-1 org_i = sort_idx(i,1) - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1,1)-1 - end if do ni=1,Nint sorted_i(ni) = sorted(ni,i,1) enddo - jloop: do j=shortcut(sh2,1),endi + jloop: do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 org_j = sort_idx(j,1) ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if(ext > 4) then @@ -126,7 +121,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) do istate=1,N_st vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) enddo enddo jloop enddo @@ -138,7 +132,7 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) - do j=shortcut(sh,2),i-1 + do j=shortcut(sh,2),shortcut(sh+1,2)-1 org_j = sort_idx(j,2) ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) do ni=2,Nint @@ -150,7 +144,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) do istate=1,N_st vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) enddo end do end do @@ -336,27 +329,29 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) v_0 = 0.d0 s_0 = 0.d0 - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(i,istate) - enddo - enddo - call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint) call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)& - !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) + !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) allocate(vt(N_st_8,n),st(N_st_8,n)) Vt = 0.d0 St = 0.d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(sort_idx(i,2),istate) + enddo + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,2) do i=shortcut(sh,2),shortcut(sh+1,2)-1 org_i = sort_idx(i,2) - do j=shortcut(sh,2),i-1 + do j=shortcut(sh,2),shortcut(sh+1,2)-1 org_j = sort_idx(j,2) ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2))) if (ext > 4) cycle @@ -368,19 +363,26 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) enddo end if end do end do enddo - !$OMP END DO NOWAIT + !$OMP END DO + + !$OMP DO + do i=1,n + do istate=1,N_st + ut(istate,i) = u_0(sort_idx(i,1),istate) + enddo + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0,1) - !$OMP DO SCHEDULE(dynamic) - do sh2=sh,shortcut(0,1) + do sh2=1,shortcut(0,1) exa = 0 do ni=1,Nint exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1))) @@ -391,16 +393,12 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) do i=shortcut(sh,1),shortcut(sh+1,1)-1 org_i = sort_idx(i,1) - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1,1)-1 - end if do ni=1,Nint sorted_i(ni) = sorted(ni,i,1) enddo - do j=shortcut(sh2,1),endi + do j=shortcut(sh2,1),shortcut(sh2+1,1)-1 + if (i==j) cycle ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1))) if (ext > 4) cycle do ni=2,Nint @@ -412,16 +410,14 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij) if (hij /= 0.d0) then do istate=1,n_st - vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j) - vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i) + vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j) enddo endif if (ext /= 2) then call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2) if (s2 /= 0.d0) then do istate=1,n_st - st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j) - st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i) + st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j) enddo endif endif @@ -429,8 +425,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) enddo enddo enddo - !$OMP END DO NOWAIT enddo + !$OMP END DO !$OMP CRITICAL (u0Hu0) do istate=1,N_st