From a6163512e1e93849a1fe0e2ca2bad2343d422e1f Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 5 Nov 2015 15:05:19 +0100 Subject: [PATCH 1/2] optimized minilist order --- plugins/MRCC_Utils/mrcc_dress.irp.f | 43 +++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_dress.irp.f b/plugins/MRCC_Utils/mrcc_dress.irp.f index 7fb04144..a5f9e068 100644 --- a/plugins/MRCC_Utils/mrcc_dress.irp.f +++ b/plugins/MRCC_Utils/mrcc_dress.irp.f @@ -91,16 +91,19 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Ndet_ref, Ndet_non_ref,i_generator,n integer :: iint, ipos integer :: i_state, k_sd, l_sd, i_I, i_alpha - integer(bit_kind),allocatable :: miniList(:,:,:) + integer(bit_kind),allocatable :: miniList(:,:,:), supalist(:,:,:) integer(bit_kind),intent(in) :: key_mask(Nint, 2) integer,allocatable :: idx_miniList(:) - integer :: N_miniList, ni + integer :: N_miniList, N_supalist, ni, leng - - allocate(miniList(Nint, 2, max(N_det_generators, N_det_non_ref)), idx_miniList(max(N_det_generators, N_det_non_ref))) + leng = max(N_det_generators, N_det_non_ref) + allocate(miniList(Nint, 2, leng), idx_miniList(leng), supalist(Nint,2,leng)) l = 0 + N_miniList = 0 + N_supalist = 0 + do ni = 1,Nint l += popcnt(key_mask(ni,1)) + popcnt(key_mask(ni,2)) end do @@ -109,22 +112,36 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Ndet_ref, Ndet_non_ref,i_generator,n N_miniList = i_generator-1 miniList(:,:,:N_miniList) = psi_det_generators(:,:,:N_minilist) else - N_miniList = 0 do i=i_generator-1,1,-1 k = l do ni=1,nint k -= popcnt(iand(key_mask(ni,1), psi_det_generators(ni,1,i))) + popcnt(iand(key_mask(ni,2), psi_det_generators(ni,2,i))) end do - if(k == 0) then - deallocate(miniList, idx_miniList) - return - end if - if(k <= 2) then +! if(k == 0) then +! deallocate(miniList, supalist, idx_miniList) +! return +! else if(k <= 2) then +! N_minilist += 1 +! miniList(:,:,N_minilist) = psi_det_generators(:,:,i) +! end if +! + if(k == 2) then + N_supalist += 1 + supalist(:,:,N_supalist) = psi_det_generators(:,:,i) + else if(k == 1) then N_minilist += 1 miniList(:,:,N_minilist) = psi_det_generators(:,:,i) + else if(k == 0) then + deallocate(miniList, supalist, idx_miniList) + return end if - end do + end do + end if + + if(N_supalist > 0) then + miniList(:,:,N_minilist+1:N_minilist+N_supalist) = supalist(:,:,:N_supalist) + N_minilist = N_minilist + N_supalist end if @@ -286,10 +303,12 @@ subroutine find_triples_and_quadruples(i_generator,n_selected,det_buffer,Nint,tq integer(bit_kind),intent(in) :: miniList(Nint,2,N_det_generators) integer,intent(in) :: N_miniList - + N_tq = 0 + + i_loop : do i=1,N_selected do j=1,N_miniList nt = 0 From 8f7487dee502beb9ea34c0b5892dd49729a26426 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 9 Nov 2015 10:16:13 +0100 Subject: [PATCH 2/2] updated H_u_0 to match the method used in H_u_0_mrcc --- plugins/MRCC_Utils/davidson.irp.f | 7 - src/Determinants/davidson.irp.f | 16 +- src/Determinants/filter_connected.irp.f | 289 ------------------------ src/Determinants/slater_rules.irp.f | 102 ++++++--- 4 files changed, 74 insertions(+), 340 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index db2fe26e..6752afcb 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -105,9 +105,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin double precision :: to_print(2,N_st) double precision :: cpu, wall - integer(bit_kind) :: dets_in_sorted(Nint,2,sze) - integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp - !PROVIDE det_connections call write_time(iunit) @@ -154,8 +151,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin ! ============== - dets_in_sorted(:,:,:) = dets_in(:,:,:) - call sort_dets_ab(dets_in_sorted, idx, shortcut, sze, Nint) k_pairs=0 do l=1,N_st @@ -215,8 +210,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin ! ---------------------- do k=1,N_st - !call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate) - !call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) enddo diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index 87605964..bceb145a 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -287,10 +287,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun double precision :: to_print(2,N_st) double precision :: cpu, wall - integer(bit_kind) :: dets_in_sorted(Nint, 2, sze) - integer :: idx(sze), shortcut(0:sze+1) - - !PROVIDE det_connections + call write_time(iunit) call wall_time(wall) @@ -336,9 +333,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun ! Initialization ! ============== - dets_in_sorted(:,:,:) = dets_in(:,:,:) - call sort_dets_ab(dets_in_sorted, idx, shortcut, sze, Nint) - + k_pairs=0 do l=1,N_st do k=1,l @@ -348,9 +343,9 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun enddo enddo - !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs, & - !$OMP Nint,dets_in_sorted,dets_in,u_in) & + !$OMP Nint,dets_in,u_in) & !$OMP PRIVATE(k,l,kl,i) @@ -397,8 +392,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun ! ---------------------- do k=1,N_st -! call H_u_0_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint) - call H_u_0(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint) + call H_u_0(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint) enddo diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index 3028e32a..cc304b59 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -98,295 +98,6 @@ subroutine filter_connected(key1,key2,Nint,sze,idx) end - -subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer,intent(in) :: warp(2,0:sze+1) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: i_alpha, i_beta, exc_a, exc_b, endloop, ni - integer(bit_kind) :: tmp1, tmp2 - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - i_alpha = 0 - - - if (Nint /= 1) then - do while(i_alpha < warp(1,0) .and. warp(1,i_alpha+1) <= sze) - i_alpha = i_alpha + 1 - exc_a = 0 - do ni=1,Nint - exc_a += popcnt(xor(key1(ni,1,warp(1,i_alpha)), key2(ni,1))) - end do - endloop = min(warp(2,i_alpha), sze) - if(exc_a == 4) then - beta_loop : do i_beta=warp(1,i_alpha),endloop - do ni=1,Nint - if(key1(ni,2,i_beta) /= key2(ni,2)) then - cycle beta_loop - end if - end do - idx(l) = i_beta - l = l + 1 - exit beta_loop - end do beta_loop - else - do i_beta=warp(1,i_alpha),endloop - exc_b = 0 - do ni=1,Nint - exc_b += popcnt(xor(key1(ni,2,i_beta), key2(ni,2))) - end do - if(exc_b + exc_a <= 4) then - idx(l) = i_beta - l = l + 1 - end if - end do - end if - end do - else - do while(i_alpha < warp(1,0) .and. warp(1,i_alpha+1) <= sze) - i_alpha = i_alpha + 1 - exc_a = popcnt(xor(key1(1,1,warp(1,i_alpha)), key2(1,1))) - endloop = min(warp(2,i_alpha), sze) - if(exc_a == 4) then - do i_beta=warp(1,i_alpha),endloop - if(key1(1,2,i_beta) == key2(1,2)) then - idx(l) = i_beta - l = l + 1 - exit - end if - end do - else - do i_beta=warp(1,i_alpha),endloop - exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) - if(exc_b + exc_a <= 4) then - idx(l) = i_beta - l = l + 1 - end if - end do - end if - end do - end if - - idx(0) = l-1 -end - - -! subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) -! use bitmasks -! implicit none -! BEGIN_DOC -! ! Filters out the determinants that are not connected by H -! ! returns the array idx which contains the index of the -! ! determinants in the array key1 that interact -! ! via the H operator with key2. -! ! -! ! idx(0) is the number of determinants that interact with key1 -! ! key1 should come from psi_det_sorted_ab. -! END_DOC -! integer, intent(in) :: Nint, sze -! integer(bit_kind), intent(in) :: key1(Nint,2,sze) -! integer(bit_kind), intent(in) :: key2(Nint,2) -! integer, intent(out) :: idx(0:sze) -! -! integer,intent(in) :: shortcut(0:sze+1) -! -! integer :: i,j,k,l -! integer :: degree_x2 -! integer :: i_alpha, i_beta, exc_a, exc_b, endloop -! integer(bit_kind) :: tmp1, tmp2 -! -! ASSERT (Nint > 0) -! ASSERT (sze >= 0) -! -! l=1 -! i_alpha = 0 -! -! if (Nint==1) then -! do while(shortcut(i_alpha+1) < sze) -! i_alpha = i_alpha + 1 -! exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) -! if(exc_a > 4) then -! cycle -! end if -! endloop = min(shortcut(i_alpha+1)-1, sze) -! if(exc_a == 4) then -! do i_beta = shortcut(i_alpha), endloop -! if(key1(1,2,i_beta) == key2(1,2)) then -! idx(l) = i_beta -! l = l + 1 -! exit -! end if -! end do -! else -! do i_beta = shortcut(i_alpha), endloop -! exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) -! if(exc_b + exc_a <= 4) then -! idx(l) = i_beta -! l = l + 1 -! end if -! end do -! end if -! end do -! else -! print *, "TBD : filter_connected_davidson_shortcut Nint>1" -! stop -! end if -! -! idx(0) = l-1 -! end -! -! subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) -! use bitmasks -! implicit none -! BEGIN_DOC -! ! Filters out the determinants that are not connected by H -! ! returns the array idx which contains the index of the -! ! determinants in the array key1 that interact -! ! via the H operator with key2. -! ! -! ! idx(0) is the number of determinants that interact with key1 -! ! key1 should come from psi_det_sorted_ab. -! END_DOC -! integer, intent(in) :: Nint, sze -! integer(bit_kind), intent(in) :: key1(Nint,2,sze) -! integer(bit_kind), intent(in) :: key2(Nint,2) -! integer, intent(inout) :: idx(0:sze) -! -! integer :: i,j,k,l -! integer :: degree_x2 -! integer :: j_int, j_start -! integer*8 :: itmp -! -! PROVIDE N_con_int det_connections -! -! -! ASSERT (Nint > 0) -! ASSERT (sze >= 0) -! -! l=1 -! -! if (Nint==1) then -! -! i = idx(0) ! lecture dans un intent(out) ? -! do j_int=1,N_con_int -! itmp = det_connections(j_int,i) -! do while (itmp /= 0_8) -! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) -! do j = j_start+1, min(j_start+32,i-1) -! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & -! popcnt(xor( key1(1,2,j), key2(1,2))) -! if (degree_x2 > 4) then -! cycle -! else -! idx(l) = j -! l = l+1 -! endif -! enddo -! itmp = iand(itmp-1_8,itmp) -! enddo -! enddo -! -! else if (Nint==2) then -! -! -! i = idx(0) -! do j_int=1,N_con_int -! itmp = det_connections(j_int,i) -! do while (itmp /= 0_8) -! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) -! do j = j_start+1, min(j_start+32,i-1) -! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & -! popcnt(xor( key1(2,1,j), key2(2,1))) + & -! popcnt(xor( key1(1,2,j), key2(1,2))) + & -! popcnt(xor( key1(2,2,j), key2(2,2))) -! if (degree_x2 > 4) then -! cycle -! else -! idx(l) = j -! l = l+1 -! endif -! enddo -! itmp = iand(itmp-1_8,itmp) -! enddo -! enddo -! -! else if (Nint==3) then -! -! i = idx(0) -! !DIR$ LOOP COUNT (1000) -! do j_int=1,N_con_int -! itmp = det_connections(j_int,i) -! do while (itmp /= 0_8) -! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) -! do j = j_start+1, min(j_start+32,i-1) -! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & -! popcnt(xor( key1(1,2,j), key2(1,2))) + & -! popcnt(xor( key1(2,1,j), key2(2,1))) + & -! popcnt(xor( key1(2,2,j), key2(2,2))) + & -! popcnt(xor( key1(3,1,j), key2(3,1))) + & -! popcnt(xor( key1(3,2,j), key2(3,2))) -! if (degree_x2 > 4) then -! cycle -! else -! idx(l) = j -! l = l+1 -! endif -! enddo -! itmp = iand(itmp-1_8,itmp) -! enddo -! enddo -! -! else -! -! i = idx(0) -! !DIR$ LOOP COUNT (1000) -! do j_int=1,N_con_int -! itmp = det_connections(j_int,i) -! do while (itmp /= 0_8) -! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) -! do j = j_start+1, min(j_start+32,i-1) -! degree_x2 = 0 -! !DEC$ LOOP COUNT MIN(4) -! do k=1,Nint -! degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& -! popcnt(xor( key1(k,2,j), key2(k,2))) -! if (degree_x2 > 4) then -! exit -! endif -! enddo -! if (degree_x2 <= 5) then -! idx(l) = j -! l = l+1 -! endif -! enddo -! itmp = iand(itmp-1_8,itmp) -! enddo -! enddo -! -! endif -! idx(0) = l-1 -! end - subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) use bitmasks BEGIN_DOC diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index acffeb3d..cf3b8c1e 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1216,7 +1216,7 @@ subroutine get_occ_from_key(key,occ,Nint) end -subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) +subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) use bitmasks implicit none BEGIN_DOC @@ -1234,11 +1234,15 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) integer, allocatable :: idx(:) double precision :: hij double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii,sh + integer :: i,j,k,l, jj,ii integer :: i0, j0 - integer,intent(in) :: shortcut(0:n+1), sort_idx(n) - integer :: tmp, warp(2,0:n+1), ni + integer :: shortcut(0:n+1), sort_idx(n) + integer(bit_kind) :: sorted(Nint,n), version(Nint,n) + + + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi +! ASSERT (Nint > 0) @@ -1246,47 +1250,80 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) ASSERT (n>0) PROVIDE ref_bitmask_energy !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,warp,tmp,sh) & - !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,shortcut,sort_idx) + !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) & + !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version,davidson_criterion_is_built) allocate(idx(0:n), vt(n)) Vt = 0.d0 v_0 = 0.d0 - - !$OMP DO SCHEDULE(dynamic) - + !$OMP SINGLE + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) - warp(1,0) = 0 - do ii=1,sh!shortcut(0) - tmp = 0 - do ni=1,Nint - tmp = popcnt(xor(keys_tmp(ni,1, shortcut(ii)), keys_tmp(ni,1,shortcut(sh)))) - end do - if(tmp <= 4) then - warp(1,0) = warp(1,0) + 1 - warp(1,warp(1,0)) = shortcut(ii) - warp(2,warp(1,0)) = shortcut(ii+1)-1 - end if + do sh2=1,sh + exa = 0 + do ni=1,Nint + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) end do + if(exa > 2) then + cycle + end if - - do ii=shortcut(sh),shortcut(sh+1)-1 - idx(0) = ii + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 + end if - call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx) - i = sort_idx(ii) - - do jj=1,idx(0) - j = sort_idx(idx(jj)) - if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then - call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) + do j=shortcut(sh2),endi + ext = exa + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + endif endif enddo enddo enddo + enddo !$OMP END DO + + !$OMP SINGLE + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext == 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if + end if + end do + end do + enddo + !$OMP END DO + !$OMP CRITICAL do i=1,n v_0(i) = v_0(i) + vt(i) @@ -1299,4 +1336,3 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) enddo end -