10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-23 12:56:14 +01:00

Merge pull request #110 from garniron/master

removed connection proxy
This commit is contained in:
Thomas Applencourt 2015-11-04 16:50:21 +01:00
commit 1af3862bdb
5 changed files with 386 additions and 592 deletions

View File

@ -108,7 +108,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
integer(bit_kind) :: dets_in_sorted(Nint,2,sze) integer(bit_kind) :: dets_in_sorted(Nint,2,sze)
integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp
PROVIDE det_connections !PROVIDE det_connections
call write_time(iunit) call write_time(iunit)
call wall_time(wall) call wall_time(wall)
@ -215,8 +215,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
! ---------------------- ! ----------------------
do k=1,N_st do k=1,N_st
call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate) !call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate)
!call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) !call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate)
call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate)
enddo enddo
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l> ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@ -368,7 +369,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
abort_here = abort_all abort_here = abort_all
end end
subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -389,8 +392,11 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
integer :: i,j,k,l, jj,ii integer :: i,j,k,l, jj,ii
integer :: i0, j0 integer :: i0, j0
integer,intent(in) :: shortcut(0:n+1), sort_idx(n) integer :: shortcut(0:n+1), sort_idx(n)
integer :: tmp, warp(2,0:n+1), sh, ni integer(bit_kind) :: sorted(Nint,n), version(Nint,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass
! !
ASSERT (Nint > 0) ASSERT (Nint > 0)
@ -401,8 +407,10 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,warp,tmp,sh) & !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) &
!$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,shortcut,sort_idx) !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,sorted,shortcut,sort_idx,version)
!$OMP DO SCHEDULE(static) !$OMP DO SCHEDULE(static)
do i=1,n do i=1,n
@ -412,42 +420,73 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
allocate(idx(0:n), vt(n)) allocate(idx(0:n), vt(n))
Vt = 0.d0 Vt = 0.d0
!$OMP SINGLE
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic) !$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0) do sh=1,shortcut(0)
warp(1,0) = 0 do sh2=1,sh
do ii=1,sh!shortcut(0) exa = 0
tmp = 0
do ni=1,Nint do ni=1,Nint
tmp = popcnt(xor(keys_tmp(ni,1, shortcut(ii)), keys_tmp(ni,1,shortcut(sh)))) exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do end do
if(tmp <= 4) then if(exa > 2) then
tmp = warp(1,0) + 1 cycle
warp(1,0) = tmp end if
warp(1,tmp) = shortcut(ii)
warp(2,tmp) = shortcut(ii+1)-1 do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
end if end if
end do end do
end do
do ii=shortcut(sh),shortcut(sh+1)-1 end do
idx(0) = ii
!call filter_connected_davidson_mwen(keys_tmp,shortcut,keys_tmp(1,1,ii),Nint,ii-1,idx)
call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx)
i = sort_idx(ii)
do jj=1,idx(0)
j = sort_idx(idx(jj))
!j = idx(jj)
if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then
call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij)
vt (i) = vt (i) + hij*u_0(j)
vt (j) = vt (j) + hij*u_0(i)
endif
enddo
enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP SINGLE
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
do j=shortcut(sh),i-1
ext = 0
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext == 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
end if
end do
end do
enddo
!$OMP END DO
!$OMP DO SCHEDULE(guided) !$OMP DO SCHEDULE(guided)
@ -469,80 +508,3 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
!$OMP END PARALLEL !$OMP END PARALLEL
end end
subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
END_DOC
integer, intent(in) :: n,Nint,istate
double precision, intent(out) :: v_0(n)
double precision, intent(in) :: u_0(n)
double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
integer, allocatable :: idx(:)
double precision :: hij
double precision, allocatable :: vt(:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy delta_ij
integer, parameter :: block_size = 157
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) &
!$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij)
!$OMP DO SCHEDULE(static)
do i=1,n
v_0(i) = H_jj(i) * u_0(i)
enddo
!$OMP END DO
allocate(idx(0:n), vt(n))
Vt = 0.d0
!$OMP DO SCHEDULE(guided)
do i=1,n
idx(0) = i
call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx)
do jj=1,idx(0)
j = idx(jj)
! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then
call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
hij = hij
vt (i) = vt (i) + hij*u_0(j)
vt (j) = vt (j) + hij*u_0(i)
! endif
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(guided)
do ii=1,n_det_ref
i = idx_ref(ii)
do jj = 1, n_det_non_ref
j = idx_non_ref(jj)
vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j)
vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i)
enddo
enddo
!$OMP END DO
!$OMP CRITICAL
do i=1,n
v_0(i) = v_0(i) + vt(i)
enddo
!$OMP END CRITICAL
deallocate(idx,vt)
!$OMP END PARALLEL
end

View File

@ -121,35 +121,34 @@ subroutine tamiser(key, idx, no, n, Nint, N_key)
end subroutine end subroutine
subroutine sort_dets_ba_v(key, idx, shortcut, version, N_key, Nint) subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
use bitmasks use bitmasks
implicit none implicit none
integer(bit_kind),intent(inout) :: key(Nint,2,N_key) integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key) integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1) integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1)
integer, intent(in) :: Nint, N_key integer, intent(in) :: Nint, N_key
integer(bit_kind) :: tmp(Nint, 2,N_key) integer(bit_kind) :: tmp(Nint, 2,N_key)
tmp(:,1,:N_key) = key(:,2,:N_key) key(:,1,:N_key) = key_in(:,2,:N_key)
tmp(:,2,:N_key) = key(:,1,:N_key) key(:,2,:N_key) = key_in(:,1,:N_key)
call sort_dets_ab_v(tmp, idx, shortcut, version, N_key, Nint) call sort_dets_ab_v(key, key_out, idx, shortcut, version, N_key, Nint)
key(:,1,:N_key) = tmp(:,2,:N_key)
key(:,2,:N_key) = tmp(:,1,:N_key)
end subroutine end subroutine
subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
use bitmasks use bitmasks
implicit none implicit none
integer(bit_kind),intent(inout) :: key(Nint,2,N_key) integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key) integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1) integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1)
@ -157,6 +156,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint)
integer(bit_kind) :: tmp(Nint, 2) integer(bit_kind) :: tmp(Nint, 2)
integer :: tmpidx,i,ni integer :: tmpidx,i,ni
key(:,:,:) = key_in(:,:,:)
do i=1,N_key do i=1,N_key
idx(i) = i idx(i) = i
end do end do
@ -177,6 +177,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint)
shortcut(0) = 1 shortcut(0) = 1
shortcut(1) = 1 shortcut(1) = 1
version(:,1) = key(:,1,1)
do i=2,N_key do i=2,N_key
do ni=1,nint do ni=1,nint
if(key(ni,1,i) /= key(ni,1,i-1)) then if(key(ni,1,i) /= key(ni,1,i-1)) then
@ -188,6 +189,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint)
end do end do
end do end do
shortcut(shortcut(0)+1) = N_key+1 shortcut(shortcut(0)+1) = N_key+1
key_out(:,:) = key(:,2,:)
end subroutine end subroutine
c c
@ -288,7 +290,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
integer(bit_kind) :: dets_in_sorted(Nint, 2, sze) integer(bit_kind) :: dets_in_sorted(Nint, 2, sze)
integer :: idx(sze), shortcut(0:sze+1) integer :: idx(sze), shortcut(0:sze+1)
PROVIDE det_connections !PROVIDE det_connections
call write_time(iunit) call write_time(iunit)
call wall_time(wall) call wall_time(wall)

View File

@ -147,6 +147,7 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx)
end do end do
idx(l) = i_beta idx(l) = i_beta
l = l + 1 l = l + 1
exit beta_loop
end do beta_loop end do beta_loop
else else
do i_beta=warp(1,i_alpha),endloop do i_beta=warp(1,i_alpha),endloop
@ -190,201 +191,201 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx)
end end
subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) ! subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx)
use bitmasks ! use bitmasks
implicit none ! implicit none
BEGIN_DOC ! BEGIN_DOC
! Filters out the determinants that are not connected by H ! ! Filters out the determinants that are not connected by H
! returns the array idx which contains the index of the ! ! returns the array idx which contains the index of the
! determinants in the array key1 that interact ! ! determinants in the array key1 that interact
! via the H operator with key2. ! ! via the H operator with key2.
! ! !
! idx(0) is the number of determinants that interact with key1 ! ! idx(0) is the number of determinants that interact with key1
! key1 should come from psi_det_sorted_ab. ! ! key1 should come from psi_det_sorted_ab.
END_DOC ! END_DOC
integer, intent(in) :: Nint, sze ! integer, intent(in) :: Nint, sze
integer(bit_kind), intent(in) :: key1(Nint,2,sze) ! integer(bit_kind), intent(in) :: key1(Nint,2,sze)
integer(bit_kind), intent(in) :: key2(Nint,2) ! integer(bit_kind), intent(in) :: key2(Nint,2)
integer, intent(out) :: idx(0:sze) ! integer, intent(out) :: idx(0:sze)
!
integer,intent(in) :: shortcut(0:sze+1) ! integer,intent(in) :: shortcut(0:sze+1)
!
integer :: i,j,k,l ! integer :: i,j,k,l
integer :: degree_x2 ! integer :: degree_x2
integer :: i_alpha, i_beta, exc_a, exc_b, endloop ! integer :: i_alpha, i_beta, exc_a, exc_b, endloop
integer(bit_kind) :: tmp1, tmp2 ! integer(bit_kind) :: tmp1, tmp2
!
ASSERT (Nint > 0) ! ASSERT (Nint > 0)
ASSERT (sze >= 0) ! ASSERT (sze >= 0)
!
l=1 ! l=1
i_alpha = 0 ! i_alpha = 0
!
if (Nint==1) then ! if (Nint==1) then
do while(shortcut(i_alpha+1) < sze) ! do while(shortcut(i_alpha+1) < sze)
i_alpha = i_alpha + 1 ! i_alpha = i_alpha + 1
exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) ! exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1)))
if(exc_a > 4) then ! if(exc_a > 4) then
cycle ! cycle
end if ! end if
endloop = min(shortcut(i_alpha+1)-1, sze) ! endloop = min(shortcut(i_alpha+1)-1, sze)
if(exc_a == 4) then ! if(exc_a == 4) then
do i_beta = shortcut(i_alpha), endloop ! do i_beta = shortcut(i_alpha), endloop
if(key1(1,2,i_beta) == key2(1,2)) then ! if(key1(1,2,i_beta) == key2(1,2)) then
idx(l) = i_beta ! idx(l) = i_beta
l = l + 1 ! l = l + 1
exit ! exit
end if ! end if
end do ! end do
else ! else
do i_beta = shortcut(i_alpha), endloop ! do i_beta = shortcut(i_alpha), endloop
exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) ! exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2)))
if(exc_b + exc_a <= 4) then ! if(exc_b + exc_a <= 4) then
idx(l) = i_beta ! idx(l) = i_beta
l = l + 1 ! l = l + 1
end if ! end if
end do ! end do
end if ! end if
end do ! end do
else ! else
print *, "TBD : filter_connected_davidson_shortcut Nint>1" ! print *, "TBD : filter_connected_davidson_shortcut Nint>1"
stop ! stop
end if ! end if
!
idx(0) = l-1 ! idx(0) = l-1
end ! end
!
subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) ! subroutine filter_connected_davidson(key1,key2,Nint,sze,idx)
use bitmasks ! use bitmasks
implicit none ! implicit none
BEGIN_DOC ! BEGIN_DOC
! Filters out the determinants that are not connected by H ! ! Filters out the determinants that are not connected by H
! returns the array idx which contains the index of the ! ! returns the array idx which contains the index of the
! determinants in the array key1 that interact ! ! determinants in the array key1 that interact
! via the H operator with key2. ! ! via the H operator with key2.
! ! !
! idx(0) is the number of determinants that interact with key1 ! ! idx(0) is the number of determinants that interact with key1
! key1 should come from psi_det_sorted_ab. ! ! key1 should come from psi_det_sorted_ab.
END_DOC ! END_DOC
integer, intent(in) :: Nint, sze ! integer, intent(in) :: Nint, sze
integer(bit_kind), intent(in) :: key1(Nint,2,sze) ! integer(bit_kind), intent(in) :: key1(Nint,2,sze)
integer(bit_kind), intent(in) :: key2(Nint,2) ! integer(bit_kind), intent(in) :: key2(Nint,2)
integer, intent(out) :: idx(0:sze) ! integer, intent(inout) :: idx(0:sze)
!
integer :: i,j,k,l ! integer :: i,j,k,l
integer :: degree_x2 ! integer :: degree_x2
integer :: j_int, j_start ! integer :: j_int, j_start
integer*8 :: itmp ! integer*8 :: itmp
!
PROVIDE N_con_int det_connections ! PROVIDE N_con_int det_connections
!
!
ASSERT (Nint > 0) ! ASSERT (Nint > 0)
ASSERT (sze >= 0) ! ASSERT (sze >= 0)
!
l=1 ! l=1
!
if (Nint==1) then ! if (Nint==1) then
!
i = idx(0) ! lecture dans un intent(out) ? ! i = idx(0) ! lecture dans un intent(out) ?
do j_int=1,N_con_int ! do j_int=1,N_con_int
itmp = det_connections(j_int,i) ! itmp = det_connections(j_int,i)
do while (itmp /= 0_8) ! do while (itmp /= 0_8)
j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) ! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
do j = j_start+1, min(j_start+32,i-1) ! do j = j_start+1, min(j_start+32,i-1)
degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & ! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + &
popcnt(xor( key1(1,2,j), key2(1,2))) ! popcnt(xor( key1(1,2,j), key2(1,2)))
if (degree_x2 > 4) then ! if (degree_x2 > 4) then
cycle ! cycle
else ! else
idx(l) = j ! idx(l) = j
l = l+1 ! l = l+1
endif ! endif
enddo ! enddo
itmp = iand(itmp-1_8,itmp) ! itmp = iand(itmp-1_8,itmp)
enddo ! enddo
enddo ! enddo
!
else if (Nint==2) then ! else if (Nint==2) then
!
!
i = idx(0) ! i = idx(0)
do j_int=1,N_con_int ! do j_int=1,N_con_int
itmp = det_connections(j_int,i) ! itmp = det_connections(j_int,i)
do while (itmp /= 0_8) ! do while (itmp /= 0_8)
j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) ! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
do j = j_start+1, min(j_start+32,i-1) ! do j = j_start+1, min(j_start+32,i-1)
degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & ! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + &
popcnt(xor( key1(2,1,j), key2(2,1))) + & ! popcnt(xor( key1(2,1,j), key2(2,1))) + &
popcnt(xor( key1(1,2,j), key2(1,2))) + & ! popcnt(xor( key1(1,2,j), key2(1,2))) + &
popcnt(xor( key1(2,2,j), key2(2,2))) ! popcnt(xor( key1(2,2,j), key2(2,2)))
if (degree_x2 > 4) then ! if (degree_x2 > 4) then
cycle ! cycle
else ! else
idx(l) = j ! idx(l) = j
l = l+1 ! l = l+1
endif ! endif
enddo ! enddo
itmp = iand(itmp-1_8,itmp) ! itmp = iand(itmp-1_8,itmp)
enddo ! enddo
enddo ! enddo
!
else if (Nint==3) then ! else if (Nint==3) then
!
i = idx(0) ! i = idx(0)
!DIR$ LOOP COUNT (1000) ! !DIR$ LOOP COUNT (1000)
do j_int=1,N_con_int ! do j_int=1,N_con_int
itmp = det_connections(j_int,i) ! itmp = det_connections(j_int,i)
do while (itmp /= 0_8) ! do while (itmp /= 0_8)
j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) ! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
do j = j_start+1, min(j_start+32,i-1) ! do j = j_start+1, min(j_start+32,i-1)
degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & ! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + &
popcnt(xor( key1(1,2,j), key2(1,2))) + & ! popcnt(xor( key1(1,2,j), key2(1,2))) + &
popcnt(xor( key1(2,1,j), key2(2,1))) + & ! popcnt(xor( key1(2,1,j), key2(2,1))) + &
popcnt(xor( key1(2,2,j), key2(2,2))) + & ! popcnt(xor( key1(2,2,j), key2(2,2))) + &
popcnt(xor( key1(3,1,j), key2(3,1))) + & ! popcnt(xor( key1(3,1,j), key2(3,1))) + &
popcnt(xor( key1(3,2,j), key2(3,2))) ! popcnt(xor( key1(3,2,j), key2(3,2)))
if (degree_x2 > 4) then ! if (degree_x2 > 4) then
cycle ! cycle
else ! else
idx(l) = j ! idx(l) = j
l = l+1 ! l = l+1
endif ! endif
enddo ! enddo
itmp = iand(itmp-1_8,itmp) ! itmp = iand(itmp-1_8,itmp)
enddo ! enddo
enddo ! enddo
!
else ! else
!
i = idx(0) ! i = idx(0)
!DIR$ LOOP COUNT (1000) ! !DIR$ LOOP COUNT (1000)
do j_int=1,N_con_int ! do j_int=1,N_con_int
itmp = det_connections(j_int,i) ! itmp = det_connections(j_int,i)
do while (itmp /= 0_8) ! do while (itmp /= 0_8)
j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) ! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
do j = j_start+1, min(j_start+32,i-1) ! do j = j_start+1, min(j_start+32,i-1)
degree_x2 = 0 ! degree_x2 = 0
!DEC$ LOOP COUNT MIN(4) ! !DEC$ LOOP COUNT MIN(4)
do k=1,Nint ! do k=1,Nint
degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& ! degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +&
popcnt(xor( key1(k,2,j), key2(k,2))) ! popcnt(xor( key1(k,2,j), key2(k,2)))
if (degree_x2 > 4) then ! if (degree_x2 > 4) then
exit ! exit
endif ! endif
enddo ! enddo
if (degree_x2 <= 5) then ! if (degree_x2 <= 5) then
idx(l) = j ! idx(l) = j
l = l+1 ! l = l+1
endif ! endif
enddo ! enddo
itmp = iand(itmp-1_8,itmp) ! itmp = iand(itmp-1_8,itmp)
enddo ! enddo
enddo ! enddo
!
endif ! endif
idx(0) = l-1 ! idx(0) = l-1
end ! end
subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx)
use bitmasks use bitmasks

View File

@ -114,28 +114,96 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
double precision, intent(in) :: psi_coefs_tmp(nmax) double precision, intent(in) :: psi_coefs_tmp(nmax)
double precision, intent(out) :: s2 double precision, intent(out) :: s2
double precision :: s2_tmp double precision :: s2_tmp
integer :: i,j,l,jj integer :: i,j,l,jj,ii
integer, allocatable :: idx(:) integer, allocatable :: idx(:)
integer :: shortcut(0:n+1), sort_idx(n)
integer(bit_kind) :: sorted(N_int,n), version(N_int,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass
double precision :: davidson_threshold_bis
!PROVIDE davidson_threshold
s2 = 0.d0 s2 = 0.d0
davidson_threshold_bis = davidson_threshold
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,s2_tmp,idx) & !$OMP PRIVATE(i,j,s2_tmp,idx,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) &
!$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)&
!$OMP REDUCTION(+:s2) !$OMP REDUCTION(+:s2)
allocate(idx(0:n)) allocate(idx(0:n))
!$OMP SINGLE
call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic) !$OMP DO SCHEDULE(dynamic)
do i=1,n do sh=1,shortcut(0)
idx(0) = i
call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) do sh2=1,sh
do jj=1,idx(0) exa = 0
j = idx(jj) do ni=1,N_int
if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then > davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif endif
enddo end if
end do
end do
end do
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP SINGLE
call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
do j=shortcut(sh),i-1
ext = 0
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext == 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
end do
end do
enddo
!$OMP END DO
deallocate(idx) deallocate(idx)
!$OMP END PARALLEL !$OMP END PARALLEL
s2 = s2+s2 s2 = s2+s2
@ -146,3 +214,4 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
s2 = s2 + S_z2_Sz s2 = s2 + S_z2_Sz
end end

View File

@ -1300,243 +1300,3 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint)
end end
subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
END_DOC
integer, intent(in) :: n,Nint
double precision, intent(out) :: v_0(n)
double precision, intent(in) :: u_0(n)
double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
integer, allocatable :: idx(:)
double precision :: hij
double precision, allocatable :: vt(:)
integer :: i,j,k,l, jj,ii,sh
integer :: i0, j0
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) &
!$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold)
allocate(idx(0:n), vt(n))
Vt = 0.d0
v_0 = 0.d0
!$OMP DO SCHEDULE(guided)
do ii=1,n
idx(0) = ii
i = ii
call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx)
do jj=1,idx(0)
j = idx(jj)
if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then
call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij)
vt (i) = vt (i) + hij*u_0(j)
vt (j) = vt (j) + hij*u_0(i)
endif
enddo
enddo
!$OMP END DO
!$OMP CRITICAL
do i=1,n
v_0(i) = v_0(i) + vt(i)
enddo
!$OMP END CRITICAL
deallocate(idx,vt)
!$OMP END PARALLEL
do i=1,n
v_0(i) += H_jj(i) * u_0(i)
enddo
end
BEGIN_PROVIDER [ integer, N_con_int ]
implicit none
BEGIN_DOC
! Number of integers to represent the connections between determinants
END_DOC
N_con_int = 1 + ishft(N_det-1,-11)
END_PROVIDER
BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ]
implicit none
BEGIN_DOC
! Build connection proxy between determinants
END_DOC
integer :: i,j
integer :: degree
integer :: j_int, j_k, j_l
integer, allocatable :: idx(:)
integer :: thread_num
integer :: omp_get_thread_num
PROVIDE progress_bar
call start_progress(N_det,'Det connections',0.d0)
select case(N_int)
case(1)
!$OMP PARALLEL DEFAULT (NONE) &
!$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, &
!$OMP progress_bar,progress_value)&
!$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num)
!$ thread_num = omp_get_thread_num()
allocate (idx(0:N_det))
!$OMP DO SCHEDULE(guided)
do i=1,N_det
if (thread_num == 0) then
progress_bar(1) = i
progress_value = dble(i)
endif
do j_int=1,N_con_int
det_connections(j_int,i) = 0_8
j_k = ishft(j_int-1,11)
do j_l = j_k,min(j_k+2047,N_det), 32
do j = j_l+1,min(j_l+32,i)
degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + &
popcnt(xor( psi_det(1,2,i),psi_det(1,2,j)))
if (degree < 5) then
det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) )
exit
endif
enddo
enddo
enddo
enddo
!$OMP ENDDO
deallocate(idx)
!$OMP END PARALLEL
case(2)
!$OMP PARALLEL DEFAULT (NONE) &
!$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,&
!$OMP progress_bar,progress_value)&
!$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num)
!$ thread_num = omp_get_thread_num()
allocate (idx(0:N_det))
!$OMP DO SCHEDULE(guided)
do i=1,N_det
if (thread_num == 0) then
progress_bar(1) = i
progress_value = dble(i)
endif
do j_int=1,N_con_int
det_connections(j_int,i) = 0_8
j_k = ishft(j_int-1,11)
do j_l = j_k,min(j_k+2047,N_det), 32
do j = j_l+1,min(j_l+32,i)
degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + &
popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + &
popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + &
popcnt(xor( psi_det(2,2,i),psi_det(2,2,j)))
if (degree < 5) then
det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) )
exit
endif
enddo
enddo
enddo
enddo
!$OMP ENDDO
deallocate(idx)
!$OMP END PARALLEL
case(3)
!$OMP PARALLEL DEFAULT (NONE) &
!$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,&
!$OMP progress_bar,progress_value)&
!$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num)
!$ thread_num = omp_get_thread_num()
allocate (idx(0:N_det))
!$OMP DO SCHEDULE(guided)
do i=1,N_det
if (thread_num == 0) then
progress_bar(1) = i
progress_value = dble(i)
endif
do j_int=1,N_con_int
det_connections(j_int,i) = 0_8
j_k = ishft(j_int-1,11)
do j_l = j_k,min(j_k+2047,N_det), 32
do j = j_l+1,min(j_l+32,i)
degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + &
popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + &
popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + &
popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + &
popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + &
popcnt(xor( psi_det(3,2,i),psi_det(3,2,j)))
if (degree < 5) then
det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) )
exit
endif
enddo
enddo
enddo
enddo
!$OMP ENDDO
deallocate(idx)
!$OMP END PARALLEL
case default
!$OMP PARALLEL DEFAULT (NONE) &
!$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,&
!$OMP progress_bar,progress_value)&
!$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num)
!$ thread_num = omp_get_thread_num()
allocate (idx(0:N_det))
!$OMP DO SCHEDULE(guided)
do i=1,N_det
if (thread_num == 0) then
progress_bar(1) = i
progress_value = dble(i)
endif
do j_int=1,N_con_int
det_connections(j_int,i) = 0_8
j_k = ishft(j_int-1,11)
do j_l = j_k,min(j_k+2047,N_det), 32
do j = j_l+1,min(j_l+32,i)
!DIR$ FORCEINLINE
call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int)
if (degree < 3) then
det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) )
exit
endif
enddo
enddo
enddo
enddo
!$OMP ENDDO
deallocate(idx)
!$OMP END PARALLEL
end select
call stop_progress
END_PROVIDER