10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-06-26 07:02:14 +02:00

optimized H_u_0_mrcc and get_s2_u0 - some unused functions commented out

This commit is contained in:
Yann Garniron 2015-11-01 08:57:01 +01:00
parent 3eec2f5683
commit 67836776e1
3 changed files with 307 additions and 73 deletions

View File

@ -215,8 +215,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
! ----------------------
do k=1,N_st
call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate)
!call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate)
!call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate)
call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate)
enddo
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@ -368,7 +369,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin
abort_here = abort_all
end
subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
subroutine H_u_0_mrcc_myold(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
use bitmasks
implicit none
BEGIN_DOC
@ -449,6 +452,131 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate)
!$OMP DO SCHEDULE(guided)
do ii=1,n_det_ref
i = idx_ref(ii)
do jj = 1, n_det_non_ref
j = idx_non_ref(jj)
vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j)
vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i)
enddo
enddo
!$OMP END DO
!$OMP CRITICAL
do i=1,n
v_0(i) = v_0(i) + vt(i)
enddo
!$OMP END CRITICAL
deallocate(idx,vt)
!$OMP END PARALLEL
end
subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
END_DOC
integer, intent(in) :: n,Nint,istate
double precision, intent(out) :: v_0(n)
double precision, intent(in) :: u_0(n)
double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
integer, allocatable :: idx(:)
double precision :: hij
double precision, allocatable :: vt(:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer :: shortcut(0:n+1), sort_idx(n)
integer(bit_kind) :: sorted(Nint,n), version(Nint,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass
!
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy delta_ij
integer, parameter :: block_size = 157
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) &
!$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,sorted,shortcut,sort_idx,version)
!$OMP DO SCHEDULE(static)
do i=1,n
v_0(i) = H_jj(i) * u_0(i)
enddo
!$OMP END DO
allocate(idx(0:n), vt(n))
Vt = 0.d0
do pass=1,2
if(pass == 1) then
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
else
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
end if
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
if(pass == 2) then
endi = sh
else
endi = 1
end if
do sh2=endi,sh
exa = 0
do ni=1,Nint
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( (dabs(u_0(org_j)) > 1.d-7).or.((dabs(u_0(org_i)) > 1.d-7)) ) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
endif
end if
end do
end do
end do
enddo
!$OMP END DO
end do
!$OMP DO SCHEDULE(guided)
do ii=1,n_det_ref
i = idx_ref(ii)

View File

@ -121,35 +121,35 @@ subroutine tamiser(key, idx, no, n, Nint, N_key)
end subroutine
subroutine sort_dets_ba_v(key, idx, shortcut, version, N_key, Nint)
subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
use bitmasks
implicit none
integer(bit_kind),intent(inout) :: key(Nint,2,N_key)
integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1)
integer, intent(in) :: Nint, N_key
integer(bit_kind) :: tmp(Nint, 2,N_key)
tmp(:,1,:N_key) = key(:,2,:N_key)
tmp(:,2,:N_key) = key(:,1,:N_key)
key(:,1,:N_key) = key_in(:,2,:N_key)
key(:,2,:N_key) = key_in(:,1,:N_key)
call sort_dets_ab_v(tmp, idx, shortcut, version, N_key, Nint)
key(:,1,:N_key) = tmp(:,2,:N_key)
key(:,2,:N_key) = tmp(:,1,:N_key)
call sort_dets_ab_v(key, key_out, idx, shortcut, version, N_key, Nint)
end subroutine
subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint)
subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
use bitmasks
implicit none
integer(bit_kind),intent(inout) :: key(Nint,2,N_key)
integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1)
@ -157,6 +157,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint)
integer(bit_kind) :: tmp(Nint, 2)
integer :: tmpidx,i,ni
key(:,:,:) = key_in(:,:,:)
do i=1,N_key
idx(i) = i
end do
@ -188,6 +189,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint)
end do
end do
shortcut(shortcut(0)+1) = N_key+1
key_out(:,:) = key(:,2,:)
end subroutine
c

View File

@ -106,78 +106,182 @@ subroutine get_s2_u0_old(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
s2 += S_z2_Sz
end
subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
implicit none
use bitmasks
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax)
integer, intent(in) :: n,nmax
double precision, intent(in) :: psi_coefs_tmp(nmax)
double precision, intent(out) :: s2
double precision :: s2_tmp
integer :: i,j,l,jj,ii
integer, allocatable :: idx(:)
implicit none
use bitmasks
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax)
integer, intent(in) :: n,nmax
double precision, intent(in) :: psi_coefs_tmp(nmax)
double precision, intent(out) :: s2
double precision :: s2_tmp
integer :: i,j,l,jj,ii
integer, allocatable :: idx(:)
integer(bit_kind) :: psi_keys_srt(N_int,2,n)
integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp
print *, "totolacitrouille"
call write_time(6)
psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:)
call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int)
print *, "totolacitrouille 2"
s2 = 0.d0
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp) &
!$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)&
!$OMP REDUCTION(+:s2)
allocate(idx(0:n))
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
warp(1,0) = 0
do ii=1,sh!shortcut(0)
tmp = 0
do ni=1,N_int
tmp = popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh))))
end do
if(tmp <= 4) then
tmp = warp(1,0) + 1
warp(1,0) = tmp
warp(1,tmp) = shortcut(ii)
warp(2,tmp) = shortcut(ii+1)-1
end if
end do
integer :: shortcut(0:n+1), sort_idx(n)
integer(bit_kind) :: sorted(N_int,n), version(N_int,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass
double precision :: davidson_threshold_bis
!PROVIDE davidson_threshold
s2 = 0.d0
davidson_threshold_bis = davidson_threshold
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,s2_tmp,idx,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) &
!$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)&
!$OMP REDUCTION(+:s2)
allocate(idx(0:n))
do pass=1,2
if(pass == 1) then
call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
else
call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
end if
do ii=shortcut(sh),shortcut(sh+1)-1
!do ii=1,n
idx(0) = ii
call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx)
i = sort_idx(ii)
do jj=1,idx(0)
j = sort_idx(idx(jj))
if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) &
> davidson_threshold ) then
call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp
endif
enddo
end do
enddo
!$OMP END DO
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
if(pass == 2) then
endi = sh
else
endi = 1
end if
do sh2=endi,sh
exa = 0
do ni=1,N_int
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
end do
end do
end do
enddo
!$OMP END DO
end do
deallocate(idx)
!$OMP END PARALLEL
s2 = s2+s2
do i=1,n
call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp
enddo
s2 = s2 + S_z2_Sz
print *, "totolacitrouille 3"
call write_time(6)
end
! subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
! implicit none
! use bitmasks
! integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax)
! integer, intent(in) :: n,nmax
! double precision, intent(in) :: psi_coefs_tmp(nmax)
! double precision, intent(out) :: s2
! double precision :: s2_tmp
! integer :: i,j,l,jj,ii
! integer, allocatable :: idx(:)
!
! integer(bit_kind) :: psi_keys_srt(N_int,2,n)
! integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp
! integer :: mon, bie, egz
!
!
! psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:)
! call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int)
!
! s2 = 0.d0
! !$OMP PARALLEL DEFAULT(NONE) &
! !$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp,mon,bie,egz) &
! !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)&
! !$OMP REDUCTION(+:s2)
! allocate(idx(0:n))
! !$OMP DO SCHEDULE(dynamic)
!
! do sh=1,shortcut(0)
! mon = 0
! bie = 0
!
! warp(1,0) = 0
! do ii=1,sh!shortcut(0)
! tmp = 0
! do ni=1,N_int
! tmp += popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh))))
! end do
! egz = tmp
! if(tmp <= 4) then
! tmp = warp(1,0) + 1
! warp(1,0) = tmp
! warp(1,tmp) = shortcut(ii)
! warp(2,tmp) = shortcut(ii+1)-1
! if(egz == 4) then
! bie = bie + shortcut(ii+1) - shortcut(ii)
! else
! mon = mon + shortcut(ii+1) - shortcut(ii)
! end if
! end if
! end do
!
! if(shortcut(sh+1) - shortcut(sh) /= 1) then
! print *, shortcut(sh+1) - shortcut(sh), shortcut(sh+1), mon, bie
! end if
!
! do ii=shortcut(sh),shortcut(sh+1)-1
! !do ii=1,n
! idx(0) = ii
! call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx)
! i = sort_idx(ii)
! do jj=1,idx(0)
! j = sort_idx(idx(jj))
! if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) &
! > davidson_threshold ) then
! call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int)
! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp
! endif
! enddo
! end do
! enddo
! !$OMP END DO
! deallocate(idx)
! !$OMP END PARALLEL
! s2 = s2+s2
! do i=1,n
! call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int)
! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp
! enddo
! s2 = s2 + S_z2_Sz
! end
!
! subroutine get_s2_u0_org(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
! implicit none