10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-10 21:18:29 +01:00

Merge pull request #112 from garniron/master

updated H_u_0 to match the method used in H_u_0_mrcc + small minilist optimization
This commit is contained in:
Anthony Scemama 2015-11-09 16:32:19 +01:00
commit 8cfff0066e
5 changed files with 105 additions and 352 deletions

View File

@ -105,9 +105,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
double precision :: to_print(2,N_st) double precision :: to_print(2,N_st)
double precision :: cpu, wall double precision :: cpu, wall
integer(bit_kind) :: dets_in_sorted(Nint,2,sze)
integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp
!PROVIDE det_connections !PROVIDE det_connections
call write_time(iunit) call write_time(iunit)
@ -154,8 +151,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
! ============== ! ==============
dets_in_sorted(:,:,:) = dets_in(:,:,:)
call sort_dets_ab(dets_in_sorted, idx, shortcut, sze, Nint)
k_pairs=0 k_pairs=0
do l=1,N_st do l=1,N_st
@ -215,8 +210,6 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
! ---------------------- ! ----------------------
do k=1,N_st do k=1,N_st
!call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate)
!call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate)
call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate)
enddo enddo

View File

@ -91,16 +91,19 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Ndet_ref, Ndet_non_ref,i_generator,n
integer :: iint, ipos integer :: iint, ipos
integer :: i_state, k_sd, l_sd, i_I, i_alpha integer :: i_state, k_sd, l_sd, i_I, i_alpha
integer(bit_kind),allocatable :: miniList(:,:,:) integer(bit_kind),allocatable :: miniList(:,:,:), supalist(:,:,:)
integer(bit_kind),intent(in) :: key_mask(Nint, 2) integer(bit_kind),intent(in) :: key_mask(Nint, 2)
integer,allocatable :: idx_miniList(:) integer,allocatable :: idx_miniList(:)
integer :: N_miniList, ni integer :: N_miniList, N_supalist, ni, leng
leng = max(N_det_generators, N_det_non_ref)
allocate(miniList(Nint, 2, max(N_det_generators, N_det_non_ref)), idx_miniList(max(N_det_generators, N_det_non_ref))) allocate(miniList(Nint, 2, leng), idx_miniList(leng), supalist(Nint,2,leng))
l = 0 l = 0
N_miniList = 0
N_supalist = 0
do ni = 1,Nint do ni = 1,Nint
l += popcnt(key_mask(ni,1)) + popcnt(key_mask(ni,2)) l += popcnt(key_mask(ni,1)) + popcnt(key_mask(ni,2))
end do end do
@ -109,24 +112,38 @@ subroutine mrcc_dress(delta_ij_, delta_ii_, Ndet_ref, Ndet_non_ref,i_generator,n
N_miniList = i_generator-1 N_miniList = i_generator-1
miniList(:,:,:N_miniList) = psi_det_generators(:,:,:N_minilist) miniList(:,:,:N_miniList) = psi_det_generators(:,:,:N_minilist)
else else
N_miniList = 0
do i=i_generator-1,1,-1 do i=i_generator-1,1,-1
k = l k = l
do ni=1,nint do ni=1,nint
k -= popcnt(iand(key_mask(ni,1), psi_det_generators(ni,1,i))) + popcnt(iand(key_mask(ni,2), psi_det_generators(ni,2,i))) k -= popcnt(iand(key_mask(ni,1), psi_det_generators(ni,1,i))) + popcnt(iand(key_mask(ni,2), psi_det_generators(ni,2,i)))
end do end do
if(k == 0) then ! if(k == 0) then
deallocate(miniList, idx_miniList) ! deallocate(miniList, supalist, idx_miniList)
return ! return
end if ! else if(k <= 2) then
if(k <= 2) then ! N_minilist += 1
! miniList(:,:,N_minilist) = psi_det_generators(:,:,i)
! end if
!
if(k == 2) then
N_supalist += 1
supalist(:,:,N_supalist) = psi_det_generators(:,:,i)
else if(k == 1) then
N_minilist += 1 N_minilist += 1
miniList(:,:,N_minilist) = psi_det_generators(:,:,i) miniList(:,:,N_minilist) = psi_det_generators(:,:,i)
else if(k == 0) then
deallocate(miniList, supalist, idx_miniList)
return
end if end if
end do end do
end if end if
if(N_supalist > 0) then
miniList(:,:,N_minilist+1:N_minilist+N_supalist) = supalist(:,:,:N_supalist)
N_minilist = N_minilist + N_supalist
end if
call find_triples_and_quadruples(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_minilist) call find_triples_and_quadruples(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_minilist)
@ -290,6 +307,8 @@ subroutine find_triples_and_quadruples(i_generator,n_selected,det_buffer,Nint,tq
N_tq = 0 N_tq = 0
i_loop : do i=1,N_selected i_loop : do i=1,N_selected
do j=1,N_miniList do j=1,N_miniList
nt = 0 nt = 0

View File

@ -287,10 +287,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
double precision :: to_print(2,N_st) double precision :: to_print(2,N_st)
double precision :: cpu, wall double precision :: cpu, wall
integer(bit_kind) :: dets_in_sorted(Nint, 2, sze)
integer :: idx(sze), shortcut(0:sze+1)
!PROVIDE det_connections
call write_time(iunit) call write_time(iunit)
call wall_time(wall) call wall_time(wall)
@ -336,8 +333,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
! Initialization ! Initialization
! ============== ! ==============
dets_in_sorted(:,:,:) = dets_in(:,:,:)
call sort_dets_ab(dets_in_sorted, idx, shortcut, sze, Nint)
k_pairs=0 k_pairs=0
do l=1,N_st do l=1,N_st
@ -350,7 +345,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs, & !$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs, &
!$OMP Nint,dets_in_sorted,dets_in,u_in) & !$OMP Nint,dets_in,u_in) &
!$OMP PRIVATE(k,l,kl,i) !$OMP PRIVATE(k,l,kl,i)
@ -397,8 +392,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
! ---------------------- ! ----------------------
do k=1,N_st do k=1,N_st
! call H_u_0_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint) call H_u_0(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint)
call H_u_0(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint)
enddo enddo

View File

@ -98,295 +98,6 @@ subroutine filter_connected(key1,key2,Nint,sze,idx)
end end
subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx)
use bitmasks
implicit none
BEGIN_DOC
! Filters out the determinants that are not connected by H
! returns the array idx which contains the index of the
! determinants in the array key1 that interact
! via the H operator with key2.
!
! idx(0) is the number of determinants that interact with key1
! key1 should come from psi_det_sorted_ab.
END_DOC
integer, intent(in) :: Nint, sze
integer(bit_kind), intent(in) :: key1(Nint,2,sze)
integer(bit_kind), intent(in) :: key2(Nint,2)
integer, intent(out) :: idx(0:sze)
integer,intent(in) :: warp(2,0:sze+1)
integer :: i,j,k,l
integer :: degree_x2
integer :: i_alpha, i_beta, exc_a, exc_b, endloop, ni
integer(bit_kind) :: tmp1, tmp2
ASSERT (Nint > 0)
ASSERT (sze >= 0)
l=1
i_alpha = 0
if (Nint /= 1) then
do while(i_alpha < warp(1,0) .and. warp(1,i_alpha+1) <= sze)
i_alpha = i_alpha + 1
exc_a = 0
do ni=1,Nint
exc_a += popcnt(xor(key1(ni,1,warp(1,i_alpha)), key2(ni,1)))
end do
endloop = min(warp(2,i_alpha), sze)
if(exc_a == 4) then
beta_loop : do i_beta=warp(1,i_alpha),endloop
do ni=1,Nint
if(key1(ni,2,i_beta) /= key2(ni,2)) then
cycle beta_loop
end if
end do
idx(l) = i_beta
l = l + 1
exit beta_loop
end do beta_loop
else
do i_beta=warp(1,i_alpha),endloop
exc_b = 0
do ni=1,Nint
exc_b += popcnt(xor(key1(ni,2,i_beta), key2(ni,2)))
end do
if(exc_b + exc_a <= 4) then
idx(l) = i_beta
l = l + 1
end if
end do
end if
end do
else
do while(i_alpha < warp(1,0) .and. warp(1,i_alpha+1) <= sze)
i_alpha = i_alpha + 1
exc_a = popcnt(xor(key1(1,1,warp(1,i_alpha)), key2(1,1)))
endloop = min(warp(2,i_alpha), sze)
if(exc_a == 4) then
do i_beta=warp(1,i_alpha),endloop
if(key1(1,2,i_beta) == key2(1,2)) then
idx(l) = i_beta
l = l + 1
exit
end if
end do
else
do i_beta=warp(1,i_alpha),endloop
exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2)))
if(exc_b + exc_a <= 4) then
idx(l) = i_beta
l = l + 1
end if
end do
end if
end do
end if
idx(0) = l-1
end
! subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx)
! use bitmasks
! implicit none
! BEGIN_DOC
! ! Filters out the determinants that are not connected by H
! ! returns the array idx which contains the index of the
! ! determinants in the array key1 that interact
! ! via the H operator with key2.
! !
! ! idx(0) is the number of determinants that interact with key1
! ! key1 should come from psi_det_sorted_ab.
! END_DOC
! integer, intent(in) :: Nint, sze
! integer(bit_kind), intent(in) :: key1(Nint,2,sze)
! integer(bit_kind), intent(in) :: key2(Nint,2)
! integer, intent(out) :: idx(0:sze)
!
! integer,intent(in) :: shortcut(0:sze+1)
!
! integer :: i,j,k,l
! integer :: degree_x2
! integer :: i_alpha, i_beta, exc_a, exc_b, endloop
! integer(bit_kind) :: tmp1, tmp2
!
! ASSERT (Nint > 0)
! ASSERT (sze >= 0)
!
! l=1
! i_alpha = 0
!
! if (Nint==1) then
! do while(shortcut(i_alpha+1) < sze)
! i_alpha = i_alpha + 1
! exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1)))
! if(exc_a > 4) then
! cycle
! end if
! endloop = min(shortcut(i_alpha+1)-1, sze)
! if(exc_a == 4) then
! do i_beta = shortcut(i_alpha), endloop
! if(key1(1,2,i_beta) == key2(1,2)) then
! idx(l) = i_beta
! l = l + 1
! exit
! end if
! end do
! else
! do i_beta = shortcut(i_alpha), endloop
! exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2)))
! if(exc_b + exc_a <= 4) then
! idx(l) = i_beta
! l = l + 1
! end if
! end do
! end if
! end do
! else
! print *, "TBD : filter_connected_davidson_shortcut Nint>1"
! stop
! end if
!
! idx(0) = l-1
! end
!
! subroutine filter_connected_davidson(key1,key2,Nint,sze,idx)
! use bitmasks
! implicit none
! BEGIN_DOC
! ! Filters out the determinants that are not connected by H
! ! returns the array idx which contains the index of the
! ! determinants in the array key1 that interact
! ! via the H operator with key2.
! !
! ! idx(0) is the number of determinants that interact with key1
! ! key1 should come from psi_det_sorted_ab.
! END_DOC
! integer, intent(in) :: Nint, sze
! integer(bit_kind), intent(in) :: key1(Nint,2,sze)
! integer(bit_kind), intent(in) :: key2(Nint,2)
! integer, intent(inout) :: idx(0:sze)
!
! integer :: i,j,k,l
! integer :: degree_x2
! integer :: j_int, j_start
! integer*8 :: itmp
!
! PROVIDE N_con_int det_connections
!
!
! ASSERT (Nint > 0)
! ASSERT (sze >= 0)
!
! l=1
!
! if (Nint==1) then
!
! i = idx(0) ! lecture dans un intent(out) ?
! do j_int=1,N_con_int
! itmp = det_connections(j_int,i)
! do while (itmp /= 0_8)
! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
! do j = j_start+1, min(j_start+32,i-1)
! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + &
! popcnt(xor( key1(1,2,j), key2(1,2)))
! if (degree_x2 > 4) then
! cycle
! else
! idx(l) = j
! l = l+1
! endif
! enddo
! itmp = iand(itmp-1_8,itmp)
! enddo
! enddo
!
! else if (Nint==2) then
!
!
! i = idx(0)
! do j_int=1,N_con_int
! itmp = det_connections(j_int,i)
! do while (itmp /= 0_8)
! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
! do j = j_start+1, min(j_start+32,i-1)
! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + &
! popcnt(xor( key1(2,1,j), key2(2,1))) + &
! popcnt(xor( key1(1,2,j), key2(1,2))) + &
! popcnt(xor( key1(2,2,j), key2(2,2)))
! if (degree_x2 > 4) then
! cycle
! else
! idx(l) = j
! l = l+1
! endif
! enddo
! itmp = iand(itmp-1_8,itmp)
! enddo
! enddo
!
! else if (Nint==3) then
!
! i = idx(0)
! !DIR$ LOOP COUNT (1000)
! do j_int=1,N_con_int
! itmp = det_connections(j_int,i)
! do while (itmp /= 0_8)
! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
! do j = j_start+1, min(j_start+32,i-1)
! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + &
! popcnt(xor( key1(1,2,j), key2(1,2))) + &
! popcnt(xor( key1(2,1,j), key2(2,1))) + &
! popcnt(xor( key1(2,2,j), key2(2,2))) + &
! popcnt(xor( key1(3,1,j), key2(3,1))) + &
! popcnt(xor( key1(3,2,j), key2(3,2)))
! if (degree_x2 > 4) then
! cycle
! else
! idx(l) = j
! l = l+1
! endif
! enddo
! itmp = iand(itmp-1_8,itmp)
! enddo
! enddo
!
! else
!
! i = idx(0)
! !DIR$ LOOP COUNT (1000)
! do j_int=1,N_con_int
! itmp = det_connections(j_int,i)
! do while (itmp /= 0_8)
! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5)
! do j = j_start+1, min(j_start+32,i-1)
! degree_x2 = 0
! !DEC$ LOOP COUNT MIN(4)
! do k=1,Nint
! degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +&
! popcnt(xor( key1(k,2,j), key2(k,2)))
! if (degree_x2 > 4) then
! exit
! endif
! enddo
! if (degree_x2 <= 5) then
! idx(l) = j
! l = l+1
! endif
! enddo
! itmp = iand(itmp-1_8,itmp)
! enddo
! enddo
!
! endif
! idx(0) = l-1
! end
subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx)
use bitmasks use bitmasks
BEGIN_DOC BEGIN_DOC

View File

@ -1216,7 +1216,7 @@ subroutine get_occ_from_key(key,occ,Nint)
end end
subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -1234,11 +1234,15 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint)
integer, allocatable :: idx(:) integer, allocatable :: idx(:)
double precision :: hij double precision :: hij
double precision, allocatable :: vt(:) double precision, allocatable :: vt(:)
integer :: i,j,k,l, jj,ii,sh integer :: i,j,k,l, jj,ii
integer :: i0, j0 integer :: i0, j0
integer,intent(in) :: shortcut(0:n+1), sort_idx(n) integer :: shortcut(0:n+1), sort_idx(n)
integer :: tmp, warp(2,0:n+1), ni integer(bit_kind) :: sorted(Nint,n), version(Nint,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi
!
ASSERT (Nint > 0) ASSERT (Nint > 0)
@ -1246,47 +1250,80 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint)
ASSERT (n>0) ASSERT (n>0)
PROVIDE ref_bitmask_energy PROVIDE ref_bitmask_energy
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,warp,tmp,sh) & !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) &
!$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,shortcut,sort_idx) !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version,davidson_criterion_is_built)
allocate(idx(0:n), vt(n)) allocate(idx(0:n), vt(n))
Vt = 0.d0 Vt = 0.d0
v_0 = 0.d0 v_0 = 0.d0
!$OMP SINGLE
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic) !$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0) do sh=1,shortcut(0)
warp(1,0) = 0 do sh2=1,sh
do ii=1,sh!shortcut(0) exa = 0
tmp = 0
do ni=1,Nint do ni=1,Nint
tmp = popcnt(xor(keys_tmp(ni,1, shortcut(ii)), keys_tmp(ni,1,shortcut(sh)))) exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do end do
if(tmp <= 4) then if(exa > 2) then
warp(1,0) = warp(1,0) + 1 cycle
warp(1,warp(1,0)) = shortcut(ii) end if
warp(2,warp(1,0)) = shortcut(ii+1)-1
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
endif
endif endif
enddo enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP SINGLE
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP END SINGLE
do ii=shortcut(sh),shortcut(sh+1)-1 !$OMP DO SCHEDULE(dynamic)
idx(0) = ii do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx) do j=shortcut(sh),i-1
i = sort_idx(ii) ext = 0
do ni=1,Nint
do jj=1,idx(0) ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
j = sort_idx(idx(jj)) end do
if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then if(ext == 4) then
call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) org_i = sort_idx(i)
vt (i) = vt (i) + hij*u_0(j) org_j = sort_idx(j)
vt (j) = vt (j) + hij*u_0(i) if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
end if
end if end if
end do end do
end do end do
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP CRITICAL !$OMP CRITICAL
do i=1,n do i=1,n
v_0(i) = v_0(i) + vt(i) v_0(i) = v_0(i) + vt(i)
@ -1299,4 +1336,3 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint)
enddo enddo
end end