diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index abf1b63a..db2fe26e 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -108,7 +108,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin integer(bit_kind) :: dets_in_sorted(Nint,2,sze) integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp - PROVIDE det_connections + !PROVIDE det_connections call write_time(iunit) call wall_time(wall) @@ -215,8 +215,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin ! ---------------------- do k=1,N_st - call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate) + !call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate) !call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) + call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) enddo ! Compute h_kl = = @@ -368,7 +369,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin abort_here = abort_all end -subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) + + +subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) use bitmasks implicit none BEGIN_DOC @@ -389,8 +392,11 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) integer :: i,j,k,l, jj,ii integer :: i0, j0 - integer,intent(in) :: shortcut(0:n+1), sort_idx(n) - integer :: tmp, warp(2,0:n+1), sh, ni + integer :: shortcut(0:n+1), sort_idx(n) + integer(bit_kind) :: sorted(Nint,n), version(Nint,n) + + + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass ! ASSERT (Nint > 0) @@ -399,12 +405,14 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) PROVIDE ref_bitmask_energy delta_ij integer, parameter :: block_size = 157 - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,warp,tmp,sh) & - !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,shortcut,sort_idx) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) & + !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,sorted,shortcut,sort_idx,version) - !$OMP DO SCHEDULE(static) + + + !$OMP DO SCHEDULE(static) do i=1,n v_0(i) = H_jj(i) * u_0(i) enddo @@ -412,42 +420,73 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) allocate(idx(0:n), vt(n)) Vt = 0.d0 + + + !$OMP SINGLE + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) - warp(1,0) = 0 - do ii=1,sh!shortcut(0) - tmp = 0 - do ni=1,Nint - tmp = popcnt(xor(keys_tmp(ni,1, shortcut(ii)), keys_tmp(ni,1,shortcut(sh)))) - end do - if(tmp <= 4) then - tmp = warp(1,0) + 1 - warp(1,0) = tmp - warp(1,tmp) = shortcut(ii) - warp(2,tmp) = shortcut(ii+1)-1 - end if + do sh2=1,sh + exa = 0 + do ni=1,Nint + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) end do + if(exa > 2) then + cycle + end if - do ii=shortcut(sh),shortcut(sh+1)-1 - idx(0) = ii + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 + end if - !call filter_connected_davidson_mwen(keys_tmp,shortcut,keys_tmp(1,1,ii),Nint,ii-1,idx) - call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx) - i = sort_idx(ii) - - do jj=1,idx(0) - j = sort_idx(idx(jj)) - !j = idx(jj) - if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) - endif - enddo - enddo + do j=shortcut(sh2),endi + ext = exa + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if + end do + end do + end do enddo !$OMP END DO + !$OMP SINGLE + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext == 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if + end do + end do + enddo + !$OMP END DO !$OMP DO SCHEDULE(guided) @@ -469,80 +508,3 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) !$OMP END PARALLEL end - - -subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint,istate - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii - integer :: i0, j0 - - - - - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy delta_ij - integer, parameter :: block_size = 157 - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) & - !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij) - !$OMP DO SCHEDULE(static) - do i=1,n - v_0(i) = H_jj(i) * u_0(i) - enddo - !$OMP END DO - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - !$OMP DO SCHEDULE(guided) - do i=1,n - idx(0) = i - call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) - do jj=1,idx(0) - j = idx(jj) -! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) - hij = hij - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) -! endif - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(guided) - do ii=1,n_det_ref - i = idx_ref(ii) - do jj = 1, n_det_non_ref - j = idx_non_ref(jj) - vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) - vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) - enddo - enddo - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL -end - diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index 03a699c7..87605964 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -121,35 +121,34 @@ subroutine tamiser(key, idx, no, n, Nint, N_key) end subroutine -subroutine sort_dets_ba_v(key, idx, shortcut, version, N_key, Nint) - +subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint) use bitmasks implicit none - integer(bit_kind),intent(inout) :: key(Nint,2,N_key) + integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) + integer(bit_kind) :: key(Nint,2,N_key) + integer(bit_kind),intent(out) :: key_out(Nint,N_key) integer,intent(out) :: idx(N_key) integer,intent(out) :: shortcut(0:N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1) integer, intent(in) :: Nint, N_key integer(bit_kind) :: tmp(Nint, 2,N_key) - tmp(:,1,:N_key) = key(:,2,:N_key) - tmp(:,2,:N_key) = key(:,1,:N_key) + key(:,1,:N_key) = key_in(:,2,:N_key) + key(:,2,:N_key) = key_in(:,1,:N_key) - call sort_dets_ab_v(tmp, idx, shortcut, version, N_key, Nint) - - - key(:,1,:N_key) = tmp(:,2,:N_key) - key(:,2,:N_key) = tmp(:,1,:N_key) + call sort_dets_ab_v(key, key_out, idx, shortcut, version, N_key, Nint) end subroutine -subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) +subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) use bitmasks implicit none - integer(bit_kind),intent(inout) :: key(Nint,2,N_key) + integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) + integer(bit_kind) :: key(Nint,2,N_key) + integer(bit_kind),intent(out) :: key_out(Nint,N_key) integer,intent(out) :: idx(N_key) integer,intent(out) :: shortcut(0:N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1) @@ -157,6 +156,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) integer(bit_kind) :: tmp(Nint, 2) integer :: tmpidx,i,ni + key(:,:,:) = key_in(:,:,:) do i=1,N_key idx(i) = i end do @@ -177,6 +177,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) shortcut(0) = 1 shortcut(1) = 1 + version(:,1) = key(:,1,1) do i=2,N_key do ni=1,nint if(key(ni,1,i) /= key(ni,1,i-1)) then @@ -188,6 +189,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) end do end do shortcut(shortcut(0)+1) = N_key+1 + key_out(:,:) = key(:,2,:) end subroutine c @@ -288,7 +290,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun integer(bit_kind) :: dets_in_sorted(Nint, 2, sze) integer :: idx(sze), shortcut(0:sze+1) - PROVIDE det_connections + !PROVIDE det_connections call write_time(iunit) call wall_time(wall) diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index ed4f092e..3028e32a 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -147,6 +147,7 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) end do idx(l) = i_beta l = l + 1 + exit beta_loop end do beta_loop else do i_beta=warp(1,i_alpha),endloop @@ -190,201 +191,201 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) end -subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer,intent(in) :: shortcut(0:sze+1) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: i_alpha, i_beta, exc_a, exc_b, endloop - integer(bit_kind) :: tmp1, tmp2 - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - i_alpha = 0 - - if (Nint==1) then - do while(shortcut(i_alpha+1) < sze) - i_alpha = i_alpha + 1 - exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) - if(exc_a > 4) then - cycle - end if - endloop = min(shortcut(i_alpha+1)-1, sze) - if(exc_a == 4) then - do i_beta = shortcut(i_alpha), endloop - if(key1(1,2,i_beta) == key2(1,2)) then - idx(l) = i_beta - l = l + 1 - exit - end if - end do - else - do i_beta = shortcut(i_alpha), endloop - exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) - if(exc_b + exc_a <= 4) then - idx(l) = i_beta - l = l + 1 - end if - end do - end if - end do - else - print *, "TBD : filter_connected_davidson_shortcut Nint>1" - stop - end if - - idx(0) = l-1 -end - -subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: j_int, j_start - integer*8 :: itmp - - PROVIDE N_con_int det_connections - - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - - if (Nint==1) then - - i = idx(0) ! lecture dans un intent(out) ? - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else if (Nint==2) then - - - i = idx(0) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(2,1,j), key2(2,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) + & - popcnt(xor( key1(2,2,j), key2(2,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else if (Nint==3) then - - i = idx(0) - !DIR$ LOOP COUNT (1000) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) + & - popcnt(xor( key1(2,1,j), key2(2,1))) + & - popcnt(xor( key1(2,2,j), key2(2,2))) + & - popcnt(xor( key1(3,1,j), key2(3,1))) + & - popcnt(xor( key1(3,2,j), key2(3,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else - - i = idx(0) - !DIR$ LOOP COUNT (1000) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) - do k=1,Nint - degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& - popcnt(xor( key1(k,2,j), key2(k,2))) - if (degree_x2 > 4) then - exit - endif - enddo - if (degree_x2 <= 5) then - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - endif - idx(0) = l-1 -end +! subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Filters out the determinants that are not connected by H +! ! returns the array idx which contains the index of the +! ! determinants in the array key1 that interact +! ! via the H operator with key2. +! ! +! ! idx(0) is the number of determinants that interact with key1 +! ! key1 should come from psi_det_sorted_ab. +! END_DOC +! integer, intent(in) :: Nint, sze +! integer(bit_kind), intent(in) :: key1(Nint,2,sze) +! integer(bit_kind), intent(in) :: key2(Nint,2) +! integer, intent(out) :: idx(0:sze) +! +! integer,intent(in) :: shortcut(0:sze+1) +! +! integer :: i,j,k,l +! integer :: degree_x2 +! integer :: i_alpha, i_beta, exc_a, exc_b, endloop +! integer(bit_kind) :: tmp1, tmp2 +! +! ASSERT (Nint > 0) +! ASSERT (sze >= 0) +! +! l=1 +! i_alpha = 0 +! +! if (Nint==1) then +! do while(shortcut(i_alpha+1) < sze) +! i_alpha = i_alpha + 1 +! exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) +! if(exc_a > 4) then +! cycle +! end if +! endloop = min(shortcut(i_alpha+1)-1, sze) +! if(exc_a == 4) then +! do i_beta = shortcut(i_alpha), endloop +! if(key1(1,2,i_beta) == key2(1,2)) then +! idx(l) = i_beta +! l = l + 1 +! exit +! end if +! end do +! else +! do i_beta = shortcut(i_alpha), endloop +! exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) +! if(exc_b + exc_a <= 4) then +! idx(l) = i_beta +! l = l + 1 +! end if +! end do +! end if +! end do +! else +! print *, "TBD : filter_connected_davidson_shortcut Nint>1" +! stop +! end if +! +! idx(0) = l-1 +! end +! +! subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Filters out the determinants that are not connected by H +! ! returns the array idx which contains the index of the +! ! determinants in the array key1 that interact +! ! via the H operator with key2. +! ! +! ! idx(0) is the number of determinants that interact with key1 +! ! key1 should come from psi_det_sorted_ab. +! END_DOC +! integer, intent(in) :: Nint, sze +! integer(bit_kind), intent(in) :: key1(Nint,2,sze) +! integer(bit_kind), intent(in) :: key2(Nint,2) +! integer, intent(inout) :: idx(0:sze) +! +! integer :: i,j,k,l +! integer :: degree_x2 +! integer :: j_int, j_start +! integer*8 :: itmp +! +! PROVIDE N_con_int det_connections +! +! +! ASSERT (Nint > 0) +! ASSERT (sze >= 0) +! +! l=1 +! +! if (Nint==1) then +! +! i = idx(0) ! lecture dans un intent(out) ? +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else if (Nint==2) then +! +! +! i = idx(0) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(2,1,j), key2(2,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) + & +! popcnt(xor( key1(2,2,j), key2(2,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else if (Nint==3) then +! +! i = idx(0) +! !DIR$ LOOP COUNT (1000) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) + & +! popcnt(xor( key1(2,1,j), key2(2,1))) + & +! popcnt(xor( key1(2,2,j), key2(2,2))) + & +! popcnt(xor( key1(3,1,j), key2(3,1))) + & +! popcnt(xor( key1(3,2,j), key2(3,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else +! +! i = idx(0) +! !DIR$ LOOP COUNT (1000) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = 0 +! !DEC$ LOOP COUNT MIN(4) +! do k=1,Nint +! degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& +! popcnt(xor( key1(k,2,j), key2(k,2))) +! if (degree_x2 > 4) then +! exit +! endif +! enddo +! if (degree_x2 <= 5) then +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! endif +! idx(0) = l-1 +! end subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) use bitmasks diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 72c1b9aa..e836d25d 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -107,35 +107,103 @@ subroutine get_s2_u0_old(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) end subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) - implicit none - use bitmasks - integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) - integer, intent(in) :: n,nmax - double precision, intent(in) :: psi_coefs_tmp(nmax) - double precision, intent(out) :: s2 - double precision :: s2_tmp - integer :: i,j,l,jj - integer, allocatable :: idx(:) - s2 = 0.d0 - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,s2_tmp,idx) & - !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& - !$OMP REDUCTION(+:s2) - allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) - do i=1,n - idx(0) = i - call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) - do jj=1,idx(0) - j = idx(jj) - if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & - > davidson_threshold ) then - call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp - endif - enddo - enddo - !$OMP END DO + implicit none + use bitmasks + integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) + integer, intent(in) :: n,nmax + double precision, intent(in) :: psi_coefs_tmp(nmax) + double precision, intent(out) :: s2 + double precision :: s2_tmp + integer :: i,j,l,jj,ii + integer, allocatable :: idx(:) + + integer :: shortcut(0:n+1), sort_idx(n) + integer(bit_kind) :: sorted(N_int,n), version(N_int,n) + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass + double precision :: davidson_threshold_bis + + !PROVIDE davidson_threshold + + s2 = 0.d0 + davidson_threshold_bis = davidson_threshold + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,s2_tmp,idx,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) & + !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)& + !$OMP REDUCTION(+:s2) + allocate(idx(0:n)) + + + !$OMP SINGLE + call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + + do sh2=1,sh + exa = 0 + do ni=1,N_int + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 + end if + + do j=shortcut(sh2),endi + ext = exa + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if + end do + end do + end do + enddo + !$OMP END DO + + !$OMP SINGLE + call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext == 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if + end do + end do + enddo + !$OMP END DO + deallocate(idx) !$OMP END PARALLEL s2 = s2+s2 @@ -146,3 +214,4 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) s2 = s2 + S_z2_Sz end + diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 06dcb9b7..acffeb3d 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1300,243 +1300,3 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) end -subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii,sh - integer :: i0, j0 - - - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) & - !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold) - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - v_0 = 0.d0 - !$OMP DO SCHEDULE(guided) - - - - - - do ii=1,n - idx(0) = ii - i = ii - call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx) - - do jj=1,idx(0) - j = idx(jj) - if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then - call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) - endif - enddo - enddo - - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL - do i=1,n - v_0(i) += H_jj(i) * u_0(i) - enddo -end - - - -BEGIN_PROVIDER [ integer, N_con_int ] - implicit none - BEGIN_DOC - ! Number of integers to represent the connections between determinants - END_DOC - N_con_int = 1 + ishft(N_det-1,-11) -END_PROVIDER - -BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ] - implicit none - BEGIN_DOC - ! Build connection proxy between determinants - END_DOC - integer :: i,j - integer :: degree - integer :: j_int, j_k, j_l - integer, allocatable :: idx(:) - integer :: thread_num - integer :: omp_get_thread_num - - PROVIDE progress_bar - call start_progress(N_det,'Det connections',0.d0) - - select case(N_int) - - case(1) - - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, & - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case(2) - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & - popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & - popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case(3) - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & - popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & - popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + & - popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + & - popcnt(xor( psi_det(3,2,i),psi_det(3,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case default - - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - !DIR$ FORCEINLINE - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if (degree < 3) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - end select - call stop_progress - -END_PROVIDER -