10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-06-26 07:02:14 +02:00

Include S^2 inside davidson

This commit is contained in:
Anthony Scemama 2016-09-27 15:55:38 +02:00
parent 9e5ec756b3
commit c5501ef1f9
7 changed files with 238 additions and 27 deletions

View File

@ -100,8 +100,8 @@ program full_ci
print *, 'N_det = ', N_det
print *, 'N_states = ', N_states
print *, 'PT2 = ', pt2
print *, 'E = ', CI_energy
print *, 'E+PT2 = ', CI_energy+pt2
print *, 'E = ', CI_energy(1:N_states)
print *, 'E+PT2 = ', CI_energy(1:N_states)+pt2(1:N_states)
print *, '-----'
call ezfio_set_full_ci_energy_pt2(CI_energy(1)+pt2(1))
endif

View File

@ -425,19 +425,19 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
hij = (integral8(p1, p2, putj, hfix)-integral8(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2)
!call debug_hij(hij, gen, mask, mi, ma, puti, putj)
tmp_row(:,putj) += hij * coefs
tmp_row(1:N_states,putj) += hij * coefs(1:N_states)
end do
do putj=hfix+1, mo_tot_num
if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
hij = (integral8(p1, p2, hfix, putj)-integral8(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2)
!call debug_hij(hij, gen, mask, mi, ma, puti, putj)
tmp_row(:,putj) += hij * coefs
tmp_row(1:N_states,putj) += hij * coefs(1:N_states)
end do
if(ma == 1) then
mat(:,:,puti) += tmp_row(:,:)
mat(1:N_states,1:mo_tot_num,puti) += tmp_row(1:N_states,1:mo_tot_num)
else
mat(:,puti,:) += tmp_row(:,:)
mat(1:N_states,puti,1:mo_tot_num) += tmp_row(1:N_states,1:mo_tot_num)
end if
end if
@ -585,12 +585,12 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
!call assert(ok, "zsdq")
call i_h_j(gen, det, N_int, hij)
mat(:, p1, p2) += coefs * hij
mat(:, p1, p2) += coefs(:) * hij
else
hij = integral8(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2)
phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2)
!call debug_hij(hij, gen, mask, 1, 2, p1, p2)
mat(:, p1, p2) += coefs * hij
mat(:, p1, p2) += coefs(:) * hij
end if
end do
end do
@ -605,10 +605,10 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
call i_h_j(gen, det, N_int, hij)
mat(:, puti, putj) += coefs * hij
mat(:, puti, putj) += coefs(:) * hij
else
hij = (integral8(p1, p2, puti, putj) - integral8(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2)
mat(:, puti, putj) += coefs * hij
mat(:, puti, putj) += coefs(:) * hij
!call debug_hij(hij, gen, mask, sp, sp, puti, putj)
end if
end do

View File

@ -439,13 +439,13 @@ class H_apply_zmq(H_apply):
enddo
""" % (self.energy)
self.data["copy_buffer"] = """
do i=1,N_det_generators
do k=1,N_st
pt2(k) = pt2(k) + pt2_generators(k,i)
norm_pert(k) = norm_pert(k) + norm_pert_generators(k,i)
H_pert_diag(k) = H_pert_diag(k) + H_pert_diag_generators(k,i)
do i=1,N_det_generators
do k=1,N_st
pt2(k) = pt2(k) + pt2_generators(k,i)
norm_pert(k) = norm_pert(k) + norm_pert_generators(k,i)
H_pert_diag(k) = H_pert_diag(k) + H_pert_diag_generators(k,i)
enddo
enddo
enddo
"""
def set_selection_pt2(self,pert):

View File

@ -55,12 +55,13 @@ END_PROVIDER
if (diag_algorithm == "Davidson") then
call davidson_diag(psi_det,CI_eigenvectors,CI_electronic_energy,&
size(CI_eigenvectors,1),N_det,N_states,N_states_diag,N_int,output_determinants)
call davidson_diag_HS2(psi_det,CI_eigenvectors, &
size(CI_eigenvectors,1),CI_electronic_energy, &
N_det,N_states,N_states_diag,N_int,output_determinants)
call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int,&
N_states_diag,size(CI_eigenvectors,1))
else if (diag_algorithm == "Lapack") then

View File

@ -34,7 +34,7 @@
i_state = 0
if (s2_eig) then
do j=1,N_det
call get_s2_u0(psi_det,eigenvectors(1,j),N_det,N_det,s2)
call get_s2_u0(psi_det,eigenvectors(1,j),N_det,s2,N_det)
if(dabs(s2-expected_s2).le.0.3d0)then
print*,'j = ',j
print*,'e = ',eigenvalues(j)
@ -54,7 +54,7 @@
enddo
else
do j=1,N_states_diag
call get_s2_u0(psi_det,eigenvectors(1,j),N_det,N_det,s2)
call get_s2_u0(psi_det,eigenvectors(1,j),N_det,s2,N_det)
if(dabs(eigenvectors(1,j)).gt.0.9d0)then
i_state += 1
do i=1,N_det

View File

@ -174,3 +174,158 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
call u_0_H_u_0(psi_energy,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size)
END_PROVIDER
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
!
! S2_jj : array of <j|S^2|j>
END_DOC
integer, intent(in) :: N_st,n,Nint, sze_8
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st)
double precision, intent(in) :: u_0(sze_8,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8
integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
N_st_8 = align_double(N_st)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
allocate(ut(N_st_8,n))
v_0 = 0.d0
s_0 = 0.d0
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(i,istate)
enddo
enddo
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
!$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
allocate(vt(N_st_8,n),st(N_st_8,n))
Vt = 0.d0
St = 0.d0
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,1)
do sh2=sh,shortcut(0,1)
exa = 0
do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
if(sh==sh2) then
endi = i-1
else
endi = shortcut(sh2+1,1)-1
end if
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do j=shortcut(sh2,1),endi
org_j = sort_idx(j,1)
ext = exa
do ni=1,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
end do
if(ext <= 4) then
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
enddo
endif
enddo
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,2)
do i=shortcut(sh,2),shortcut(sh+1,2)-1
org_i = sort_idx(i,2)
do j=shortcut(sh,2),i-1
org_j = sort_idx(j,2)
ext = 0
do ni=1,Nint
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
end do
if(ext == 4) then
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
enddo
end if
end do
end do
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
do istate=1,N_st
do i=n,1,-1
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
s_0(i,istate) = s_0(i,istate) + st(istate,i)
enddo
enddo
!$OMP END CRITICAL
deallocate(vt,st)
!$OMP END PARALLEL
do istate=1,N_st
do i=1,n
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
enddo
enddo
deallocate (shortcut, sort_idx, sorted, version, ut)
end

View File

@ -1,4 +1,4 @@
subroutine get_s2(key_i,key_j,s2,Nint)
subroutine get_s2(key_i,key_j,Nint,s2)
implicit none
use bitmasks
BEGIN_DOC
@ -189,7 +189,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
end do
if(ext <= 4) then
call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),s2_tmp,Nint)
call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),Nint,s2_tmp)
do istate=1,N_st
vt (org_i,istate) = vt (org_i,istate) + s2_tmp*u_0(org_j,istate)
vt (org_j,istate) = vt (org_j,istate) + s2_tmp*u_0(org_i,istate)
@ -212,7 +212,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
end do
if(ext == 4) then
call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),s2_tmp,Nint)
call get_s2(keys_tmp(1,1,org_i),keys_tmp(1,1,org_j),Nint,s2_tmp)
do istate=1,N_st
vt (org_i,istate) = vt (org_i,istate) + s2_tmp*u_0(org_j,istate)
vt (org_j,istate) = vt (org_j,istate) + s2_tmp*u_0(org_i,istate)
@ -235,7 +235,7 @@ subroutine S2_u_0_nstates(v_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
!$OMP END PARALLEL
do i=1,n
call get_s2(keys_tmp(1,1,i),keys_tmp(1,1,i),s2_tmp,Nint)
call get_s2(keys_tmp(1,1,i),keys_tmp(1,1,i),Nint,s2_tmp)
do istate=1,N_st
v_0(i,istate) += s2_tmp * u_0(i,istate)
enddo
@ -275,12 +275,12 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
allocate(idx(0:n))
!$OMP DO SCHEDULE(dynamic)
do i = n,1,-1 ! Better OMP scheduling
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj)
call filter_connected(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx)
do kk=1,idx(0)
j = idx(kk)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int)
call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),N_int,s2_tmp)
accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(j,jj) + psi_coefs_tmp(i,jj) * s2_tmp * psi_coefs_tmp(j,ll)
enddo
enddo
@ -418,3 +418,58 @@ subroutine diagonalize_s2_betweenstates(keys_tmp,u_0,n,nmax_keys,nmax_coefs,nsta
end
subroutine i_S2_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,Nstate,i_S2_psi_array)
use bitmasks
implicit none
integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate,idx_key(Ndet), N_minilist
integer(bit_kind), intent(in) :: keys(Nint,2,Ndet)
integer(bit_kind), intent(in) :: key(Nint,2)
double precision, intent(in) :: coef(Ndet_max,Nstate)
double precision, intent(out) :: i_S2_psi_array(Nstate)
integer :: i, ii,j, i_in_key, i_in_coef
double precision :: phase
integer :: exc(0:2,2,2)
double precision :: s2ij
integer :: idx(0:Ndet)
BEGIN_DOC
! Computes <i|S2|Psi> = \sum_J c_J <i|S2|J>.
!
! Uses filter_connected_i_H_psi0 to get all the |J> to which |i>
! is connected. The |J> are searched in short pre-computed lists.
END_DOC
ASSERT (Nint > 0)
ASSERT (N_int == Nint)
ASSERT (Nstate > 0)
ASSERT (Ndet > 0)
ASSERT (Ndet_max >= Ndet)
i_S2_psi_array = 0.d0
call filter_connected_i_H_psi0(keys,key,Nint,N_minilist,idx)
if (Nstate == 1) then
do ii=1,idx(0)
i_in_key = idx(ii)
i_in_coef = idx_key(idx(ii))
!DIR$ FORCEINLINE
call get_s2(keys(1,1,i_in_key),key,Nint,s2ij)
! TODO : Cache misses
i_S2_psi_array(1) = i_S2_psi_array(1) + coef(i_in_coef,1)*s2ij
enddo
else
do ii=1,idx(0)
i_in_key = idx(ii)
i_in_coef = idx_key(idx(ii))
!DIR$ FORCEINLINE
call get_s2(keys(1,1,i_in_key),key,Nint,s2ij)
do j = 1, Nstate
i_S2_psi_array(j) = i_S2_psi_array(j) + coef(i_in_coef,j)*s2ij
enddo
enddo
endif
end