10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-06-25 22:52:15 +02:00

Davidson optimization

This commit is contained in:
Anthony Scemama 2015-11-13 17:36:29 +01:00
parent b7e20b3493
commit 6a640567e8
3 changed files with 184 additions and 135 deletions

View File

@ -90,27 +90,36 @@ end function
subroutine tamiser(key, idx, no, n, Nint, N_key)
use bitmasks
implicit none
integer(bit_kind),intent(inout) :: key(Nint, 2, N_key)
BEGIN_DOC
! Uncodumented : TODO
END_DOC
integer,intent(in) :: no, n, Nint, N_key
integer(bit_kind),intent(inout) :: key(Nint, 2, N_key)
integer,intent(inout) :: idx(N_key)
integer :: k,j,tmpidx
integer(bit_kind) :: tmp(Nint, 2)
logical :: det_inf
integer :: ni
k = no
j = 2*k
do while(j <= n)
if(j < n) then
if (det_inf(key(:,:,j), key(:,:,j+1), Nint)) then
if (det_inf(key(1,1,j), key(1,1,j+1), Nint)) then
j = j+1
endif
endif
if(det_inf(key(:,:,k), key(:,:,j), Nint)) then
tmp(:,:) = key(:,:,k)
key(:,:,k) = key(:,:,j)
key(:,:,j) = tmp(:,:)
if(det_inf(key(1,1,k), key(1,1,j), Nint)) then
do ni=1,Nint
tmp(ni,1) = key(ni,1,k)
tmp(ni,2) = key(ni,2,k)
key(ni,1,k) = key(ni,1,j)
key(ni,2,k) = key(ni,2,j)
key(ni,1,j) = tmp(ni,1)
key(ni,2,j) = tmp(ni,2)
enddo
tmpidx = idx(k)
idx(k) = idx(j)
idx(j) = tmpidx
@ -126,17 +135,25 @@ end subroutine
subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
use bitmasks
implicit none
integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1)
integer, intent(in) :: Nint, N_key
integer(bit_kind) :: tmp(Nint, 2,N_key)
integer, intent(in) :: Nint, N_key
integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1)
integer(bit_kind) :: tmp(Nint, 2,N_key)
integer :: i,ni
key(:,1,:N_key) = key_in(:,2,:N_key)
key(:,2,:N_key) = key_in(:,1,:N_key)
BEGIN_DOC
! Uncodumented : TODO
END_DOC
do i=1,N_key
do ni=1,Nint
key(ni,1,i) = key_in(ni,2,i)
key(ni,2,i) = key_in(ni,1,i)
enddo
enddo
call sort_dets_ab_v(key, key_out, idx, shortcut, version, N_key, Nint)
@ -148,18 +165,24 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
use bitmasks
implicit none
BEGIN_DOC
! Uncodumented : TODO
END_DOC
integer, intent(in) :: Nint, N_key
integer(bit_kind),intent(in) :: key_in(Nint,2,N_key)
integer(bit_kind) :: key(Nint,2,N_key)
integer(bit_kind),intent(out) :: key_out(Nint,N_key)
integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1)
integer(bit_kind),intent(out) :: version(Nint,N_key+1)
integer, intent(in) :: Nint, N_key
integer(bit_kind) :: tmp(Nint, 2)
integer :: tmpidx,i,ni
key(:,:,:) = key_in(:,:,:)
do i=1,N_key
do ni=1,Nint
key(ni,1,i) = key_in(ni,1,i)
key(ni,2,i) = key_in(ni,2,i)
enddo
idx(i) = i
end do
@ -168,9 +191,14 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
end do
do i=N_key,2,-1
tmp(:,:) = key(:,:,i)
key(:,:,i) = key(:,:,1)
key(:,:,1) = tmp(:,:)
do ni=1,Nint
tmp(ni,1) = key(ni,1,i)
tmp(ni,2) = key(ni,2,i)
key(ni,1,i) = key(ni,1,1)
key(ni,2,i) = key(ni,2,1)
key(ni,1,1) = tmp(ni,1)
key(ni,2,1) = tmp(ni,2)
enddo
tmpidx = idx(i)
idx(i) = idx(1)
idx(1) = tmpidx
@ -179,7 +207,9 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
shortcut(0) = 1
shortcut(1) = 1
version(:,1) = key(:,1,1)
do ni=1,Nint
version(ni,1) = key(ni,1,1)
enddo
do i=2,N_key
do ni=1,nint
if(key(ni,1,i) /= key(ni,1,i-1)) then
@ -191,15 +221,22 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint)
end do
end do
shortcut(shortcut(0)+1) = N_key+1
key_out(:,:) = key(:,2,:)
do i=1,N_key
do ni=1,Nint
key_out(ni,i) = key(ni,2,i)
enddo
enddo
end subroutine
c
subroutine sort_dets_ab(key, idx, shortcut, N_key, Nint)
use bitmasks
implicit none
BEGIN_DOC
! Uncodumented : TODO
END_DOC
integer(bit_kind),intent(inout) :: key(Nint,2,N_key)
integer,intent(out) :: idx(N_key)
integer,intent(out) :: shortcut(0:N_key+1)
@ -216,9 +253,15 @@ subroutine sort_dets_ab(key, idx, shortcut, N_key, Nint)
end do
do i=N_key,2,-1
tmp(:,:) = key(:,:,i)
key(:,:,i) = key(:,:,1)
key(:,:,1) = tmp(:,:)
do ni=1,Nint
tmp(ni,1) = key(ni,1,i)
tmp(ni,2) = key(ni,2,i)
key(ni,1,i) = key(ni,1,1)
key(ni,2,i) = key(ni,2,1)
key(ni,1,1) = tmp(ni,1)
key(ni,2,1) = tmp(ni,2)
enddo
tmpidx = idx(i)
idx(i) = idx(1)
idx(1) = tmpidx

View File

@ -109,81 +109,82 @@ end
subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
implicit none
use bitmasks
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax)
integer, intent(in) :: n,nmax
double precision, intent(in) :: psi_coefs_tmp(nmax)
double precision, intent(out) :: s2
double precision :: s2_tmp
integer :: i,j,l,jj,ii
integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax)
integer, intent(in) :: n,nmax
double precision, intent(in) :: psi_coefs_tmp(nmax)
double precision, intent(out) :: s2
double precision :: s2_tmp
integer :: i,j,l,jj,ii
integer, allocatable :: idx(:)
integer :: shortcut(0:n+1), sort_idx(n)
integer(bit_kind) :: sorted(N_int,n), version(N_int,n)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass
double precision :: davidson_threshold_bis
!PROVIDE davidson_threshold
s2 = 0.d0
davidson_threshold_bis = davidson_threshold
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,s2_tmp,idx,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) &
call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,s2_tmp,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass)&
!$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)&
!$OMP REDUCTION(+:s2)
allocate(idx(0:n))
!$OMP SINGLE
call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP END SINGLE
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do sh2=1,sh
exa = 0
do ni=1,N_int
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
do sh2=1,sh
exa = 0
do ni=1,N_int
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do j=shortcut(sh2),endi
ext = exa
do ni=1,N_int
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i))&
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
end do
end do
end do
end do
enddo
!$OMP END DO
!$OMP SINGLE
!$OMP END PARALLEL
call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int)
!$OMP END SINGLE
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,s2_tmp,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass)&
!$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)&
!$OMP REDUCTION(+:s2)
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
do i=shortcut(sh),shortcut(sh+1)-1
do j=shortcut(sh),i-1
ext = 0
do ni=1,N_int
@ -193,25 +194,24 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2)
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) &
if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i))&
> davidson_threshold ) then
call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp
endif
end if
end do
end do
end do
enddo
!$OMP END DO
deallocate(idx)
!$OMP END PARALLEL
s2 = s2+s2
do i=1,n
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp
enddo
s2 = s2 + S_z2_Sz
!$OMP END PARALLEL
s2 = s2+s2
do i=1,n
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp
enddo
s2 = s2 + S_z2_Sz
end

View File

@ -1068,14 +1068,14 @@ double precision function diag_H_mat_elem(det_in,Nint)
nexc(1) = 0
nexc(2) = 0
do i=1,Nint
hole(i,1) = xor(det_in(i,1),ref_bitmask(i,1))
hole(i,2) = xor(det_in(i,2),ref_bitmask(i,2))
hole(i,1) = xor(det_in(i,1),ref_bitmask(i,1))
hole(i,2) = xor(det_in(i,2),ref_bitmask(i,2))
particle(i,1) = iand(hole(i,1),det_in(i,1))
particle(i,2) = iand(hole(i,2),det_in(i,2))
hole(i,1) = iand(hole(i,1),ref_bitmask(i,1))
hole(i,2) = iand(hole(i,2),ref_bitmask(i,2))
nexc(1) += popcnt(hole(i,1))
nexc(2) += popcnt(hole(i,2))
nexc(1) = nexc(1) + popcnt(hole(i,1))
nexc(2) = nexc(2) + popcnt(hole(i,2))
enddo
diag_H_mat_elem = ref_bitmask_energy
@ -1239,10 +1239,11 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
integer :: shortcut(0:n+1), sort_idx(n)
integer(bit_kind) :: sorted(Nint,n), version(Nint,n)
integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi
!
double precision :: local_threshold
ASSERT (Nint > 0)
@ -1254,47 +1255,51 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) &
!$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,local_threshold,sorted_i)&
!$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version)
allocate(idx(0:n), vt(n))
allocate(vt(n))
Vt = 0.d0
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do sh2=1,sh
exa = 0
do ni=1,Nint
exa += popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh),shortcut(sh+1)-1
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
do sh2=1,sh
exa = 0
do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh), version(ni,sh2)))
end do
if(exa > 2) then
cycle
end if
do j=shortcut(sh2),endi
ext = exa
do i=shortcut(sh),shortcut(sh+1)-1
org_i = sort_idx(i)
local_threshold = davidson_threshold - dabs(u_0(org_i))
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1)-1
end if
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext <= 4) then
org_i = sort_idx(i)
sorted_i(ni) = sorted(ni,i)
enddo
do j=shortcut(sh2),endi
org_j = sort_idx(j)
if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
if ( dabs(u_0(org_j)) > local_threshold ) then
ext = exa
do ni=1,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j)))
end do
if(ext <= 4) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
endif
endif
endif
enddo
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP CRITICAL
@ -1302,30 +1307,31 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
v_0(i) = v_0(i) + vt(i)
enddo
!$OMP END CRITICAL
deallocate(idx,vt)
deallocate(vt)
!$OMP END PARALLEL
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) &
!$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,local_threshold)&
!$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version)
allocate(idx(0:n), vt(n))
allocate(vt(n))
Vt = 0.d0
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0)
do i=shortcut(sh),shortcut(sh+1)-1
local_threshold = davidson_threshold - dabs(u_0(org_i))
org_i = sort_idx(i)
do j=shortcut(sh),i-1
ext = 0
do ni=1,Nint
ext += popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext == 4) then
org_i = sort_idx(i)
org_j = sort_idx(j)
if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then
org_j = sort_idx(j)
if ( dabs(u_0(org_j)) > local_threshold ) then
ext = 0
do ni=1,Nint
ext = ext + popcnt(xor(sorted(ni,i), sorted(ni,j)))
end do
if(ext == 4) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i)
@ -1341,9 +1347,9 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
v_0(i) = v_0(i) + vt(i)
enddo
!$OMP END CRITICAL
deallocate(idx,vt)
deallocate(vt)
!$OMP END PARALLEL
do i=1,n
v_0(i) += H_jj(i) * u_0(i)
enddo