10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-07 03:43:20 +01:00

optimized calculation of inactive amplitudes

This commit is contained in:
Yann Garniron 2016-10-17 14:40:09 +02:00
parent b780a6540a
commit 1f4cd4c318

View File

@ -616,204 +616,301 @@ END_PROVIDER
BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ]
&BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ] &BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ]
implicit none implicit none
logical :: ok logical :: ok
integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row integer :: i, j, k, s, II, pp, ppp, hh, ind, wk, nex, a_col, at_row
integer, external :: searchDet, unsortedSearchDet integer, external :: searchDet, unsortedSearchDet
integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2)
integer :: N, INFO, AtA_size, r1, r2 integer :: N, INFO, AtA_size, r1, r2
double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:)
double precision :: t, norm, cx, res double precision :: t, norm, cx, res
integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:)
double precision :: phase double precision :: phase
integer, allocatable :: pathTo(:), active_hh_idx(:), active_pp_idx(:)
logical, allocatable :: active(:)
double precision, allocatable :: rho_mrcc_init(:,:)
integer :: nactive
nex = hh_shortcut(hh_shortcut(0)+1)-1
print *, "TI", nex, N_det_non_ref
allocate(pathTo(N_det_non_ref), active(nex))
allocate(active_pp_idx(nex), active_hh_idx(nex))
allocate(rho_mrcc_init(N_det_non_ref, N_states))
pathTo = 0
active = .false.
nactive = 0
do hh = 1, hh_shortcut(0)
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
do II = 1, N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
if(.not. ok) cycle
call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
if(.not. ok) cycle
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
if(ind == -1) cycle
ind = psi_non_ref_sorted_idx(ind)
if(pathTo(ind) == 0) then
pathTo(ind) = pp
else
active(pp) = .true.
active(pathTo(ind)) = .true.
end if
end do
end do
end do
do hh = 1, hh_shortcut(0)
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
if(active(pp)) then
nactive = nactive + 1
active_hh_idx(nactive) = hh
active_pp_idx(nactive) = pp
end if
end do
end do
print *, nactive, "inact/", size(active)
allocate(A_ind(0:N_det_ref+1, nactive), A_val(N_det_ref+1, nactive))
allocate(AtA_ind(N_det_ref * nactive), AtA_val(N_det_ref * nactive))
allocate(x(nex), AtB(nex))
allocate(N_col(nactive), col_shortcut(nactive))
allocate(x_new(nex))
nex = hh_shortcut(hh_shortcut(0)+1)-1 do s = 1, N_states
print *, "TI", nex, N_det_non_ref
allocate(A_ind(0:N_det_ref+1, nex), A_val(N_det_ref+1, nex))
allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!!
allocate(x(nex), AtB(nex))
allocate(N_col(nex), col_shortcut(nex))
allocate(x_new(nex))
do s = 1, N_states A_val = 0d0
A_ind = 0
AtA_ind = 0
AtB = 0d0
AtA_val = 0d0
x = 0d0
N_col = 0
col_shortcut = 0
A_val = 0d0 !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)&
A_ind = 0 !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)&
AtA_ind = 0 !$OMP shared(active, active_hh_idx, active_pp_idx, nactive)&
AtA_val = 0d0 !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh)
x = 0d0 allocate(lref(N_det_non_ref))
!A_val_mwen = 0d0 !$OMP DO schedule(static,10)
N_col = 0 do ppp=1,nactive
col_shortcut = 0 pp = active_pp_idx(ppp)
hh = active_hh_idx(ppp)
lref = 0
do II = 1, N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
if(.not. ok) cycle
call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
if(.not. ok) cycle
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
if(ind /= -1) then
call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
if (phase > 0.d0) then
lref(psi_non_ref_sorted_idx(ind)) = II
else
lref(psi_non_ref_sorted_idx(ind)) = -II
endif
end if
end do
wk = 0
do i=1, N_det_non_ref
if(lref(i) > 0) then
wk += 1
A_val(wk, ppp) = psi_ref_coef(lref(i), s)
A_ind(wk, ppp) = i
else if(lref(i) < 0) then
wk += 1
A_val(wk, ppp) = -psi_ref_coef(-lref(i), s)
A_ind(wk, ppp) = i
end if
end do
A_ind(0,ppp) = wk
end do
!$OMP END DO
deallocate(lref)
!$OMP END PARALLEL
!$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)&
!$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& print *, 'Done building A_val, A_ind'
!$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk)
allocate(lref(N_det_non_ref)) AtA_size = 0
!$OMP DO schedule(static,10) col_shortcut = 0
do hh = 1, hh_shortcut(0) N_col = 0
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 integer :: a_coll, at_roww
lref = 0
do II = 1, N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)&
if(.not. ok) cycle !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)&
call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int) !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s, nactive, active_pp_idx)
if(.not. ok) cycle allocate(A_val_mwen(nex), A_ind_mwen(nex))
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
if(ind /= -1) then !$OMP DO schedule(dynamic, 100)
call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int) do at_roww = 1, nactive ! nex
if (phase > 0.d0) then at_row = active_pp_idx(at_roww)
lref(psi_non_ref_sorted_idx(ind)) = II wk = 0
else if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", nex
lref(psi_non_ref_sorted_idx(ind)) = -II do i=1,A_ind(0,at_roww)
endif j = active_pp_idx(i)
end if AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_roww), s) * A_val(i, at_roww)
end do end do
wk = 0
do i=1, N_det_non_ref do a_coll = 1, nactive
if(lref(i) > 0) then a_col = active_pp_idx(a_coll)
wk += 1 t = 0d0
A_val(wk, pp) = psi_ref_coef(lref(i), s) r1 = 1
A_ind(wk, pp) = i r2 = 1
else if(lref(i) < 0) then do while ((A_ind(r1, at_roww) /= 0).and.(A_ind(r2, a_coll) /= 0))
wk += 1 if(A_ind(r1, at_roww) > A_ind(r2, a_coll)) then
A_val(wk, pp) = -psi_ref_coef(-lref(i), s) r2 = r2+1
A_ind(wk, pp) = i else if(A_ind(r1, at_roww) < A_ind(r2, a_coll)) then
end if r1 = r1+1
end do else
A_ind(0,pp) = wk t = t - A_val(r1, at_roww) * A_val(r2, a_coll)
end do r1 = r1+1
r2 = r2+1
end if
end do
if(a_col == at_row) then
t = t + 1.d0
end if
if(t /= 0.d0) then
wk += 1
A_ind_mwen(wk) = a_col
A_val_mwen(wk) = t
end if
end do
if(wk /= 0) then
!$OMP CRITICAL
col_shortcut(at_roww) = AtA_size+1
N_col(at_roww) = wk
if (AtA_size+wk > size(AtA_ind,1)) then
print *, AtA_size+wk , size(AtA_ind,1)
stop 'too small'
endif
do i=1,wk
AtA_ind(AtA_size+i) = A_ind_mwen(i)
AtA_val(AtA_size+i) = A_val_mwen(i)
enddo
AtA_size += wk
!$OMP END CRITICAL
end if
end do
!$OMP END DO NOWAIT
deallocate (A_ind_mwen, A_val_mwen)
!$OMP END PARALLEL
print *, "ATA SIZE", ata_size
x = 0d0
do a_coll = 1, nactive
a_col = active_pp_idx(a_coll)
X(a_col) = AtB(a_col)
end do
rho_mrcc_init = 0d0
allocate(lref(N_det_ref))
!$OMP PARALLEL DO default(shared) schedule(static, 1) &
!$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase)
do hh = 1, hh_shortcut(0)
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
if(active(pp)) cycle
lref = 0
do II=1,N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
if(.not. ok) cycle
call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
if(.not. ok) cycle
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
if(ind == -1) cycle
ind = psi_non_ref_sorted_idx(ind)
call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
X(pp) += psi_ref_coef(II,s)**2
AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase
lref(II) = ind
if(phase < 0d0) lref(II) = -ind
end do
X(pp) = AtB(pp) / X(pp)
do II=1,N_det_ref
if(lref(II) > 0) then
rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp)
else if(lref(II) < 0) then
rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp)
end if
end do
end do end do
!$OMP END DO end do
deallocate(lref) !$OMP END PARALLEL DO
!$OMP END PARALLEL
print *, 'Done building A_val, A_ind'
AtB = 0d0 x_new = x
AtA_size = 0
col_shortcut = 0
N_col = 0
!$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)&
!$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)&
!$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s)
allocate(A_val_mwen(nex), A_ind_mwen(nex))
!A_ind_mwen = 0
!A_val_mwen = 0d0
!$OMP DO schedule(dynamic, 100)
do at_row = 1, nex
wk = 0
if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex
do i=1,A_ind(0,at_row)
AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row)
end do
do a_col = 1, nex double precision :: factor, resold
t = 0d0 factor = 1.d0
r1 = 1 resold = huge(1.d0)
r2 = 1 do k=0,100000
do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) !$OMP PARALLEL default(shared) private(cx, i, j, a_col, a_coll)
if(A_ind(r1, at_row) > A_ind(r2, a_col)) then
r2 = r2+1 !$OMP DO
else if(A_ind(r1, at_row) < A_ind(r2, a_col)) then do i=1,N_det_non_ref
r1 = r1+1 rho_mrcc(i,s) = rho_mrcc_init(i,s) ! 0d0
else enddo
t = t - A_val(r1, at_row) * A_val(r2, a_col) !$OMP END DO
r1 = r1+1
r2 = r2+1 !$OMP DO
end if do a_coll = 1, nactive !: nex
a_col = active_pp_idx(a_coll)
cx = 0d0
do i=col_shortcut(a_coll), col_shortcut(a_coll) + N_col(a_coll) - 1
cx = cx + x(AtA_ind(i)) * AtA_val(i)
end do
x_new(a_col) = AtB(a_col) + cx * factor
end do
!$OMP END DO
!$OMP END PARALLEL
res = 0.d0
if (res < resold) then
do a_coll=1,nactive ! nex
a_col = active_pp_idx(a_coll)
do j=1,N_det_non_ref
i = A_ind(j,a_coll)
if (i==0) exit
rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_coll) * X_new(a_col)
enddo
res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
X(a_col) = X_new(a_col)
end do end do
factor = 1.d0
else
factor = -factor * 0.5d0
endif
resold = res
if(a_col == at_row) then if(mod(k, 5) == 0) then
t = t + 1.d0 print *, "res ", k, res
end if end if
if(t /= 0.d0) then
wk += 1
A_ind_mwen(wk) = a_col
A_val_mwen(wk) = t
end if
end do
if(wk /= 0) then if(res < 1d-12) exit
!$OMP CRITICAL end do
col_shortcut(at_row) = AtA_size+1
N_col(at_row) = wk
if (AtA_size+wk > size(AtA_ind,1)) then
print *, AtA_size+wk , size(AtA_ind,1)
stop 'too small'
endif
do i=1,wk
AtA_ind(AtA_size+i) = A_ind_mwen(i)
AtA_val(AtA_size+i) = A_val_mwen(i)
enddo
AtA_size += wk
!$OMP END CRITICAL
end if
end do
!$OMP END DO NOWAIT
deallocate (A_ind_mwen, A_val_mwen)
!$OMP END PARALLEL
if(AtA_size > size(AtA_val)) stop "SIZA"
print *, "ATA SIZE", ata_size
do i=1,nex
x(i) = AtB(i)
enddo
double precision :: factor, resold
factor = 1.d0
resold = huge(1.d0)
do k=0,100000
!$OMP PARALLEL default(shared) private(cx, i, j, a_col)
!$OMP DO norm = 0.d0
do i=1,N_det_non_ref
rho_mrcc(i,s) = 0.d0
enddo
!$OMP END DO
!$OMP DO
do a_col = 1, nex
cx = 0d0
do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1
cx = cx + x(AtA_ind(i)) * AtA_val(i)
end do
x_new(a_col) = AtB(a_col) + cx * factor
end do
!$OMP END DO
!$OMP END PARALLEL
res = 0.d0
do a_col=1,nex
res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
end do
if (res < resold) then
do a_col=1,nex
do j=1,N_det_non_ref
i = A_ind(j,a_col)
if (i==0) exit
rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col)
enddo
X(a_col) = X_new(a_col)
end do
! factor = 1.d0
else
factor = -factor * 0.5d0
endif
resold = res
if(mod(k, 100) == 0) then
print *, "res", k, res
end if
if(res < 1d-8) exit
end do
! rho_mrcc now contains A.X
norm = 0.d0
do i=1,N_det_non_ref do i=1,N_det_non_ref
norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s)
enddo enddo
@ -826,7 +923,7 @@ END_PROVIDER
print *, k, "res : ", res, "norm : ", sqrt(norm) print *, k, "res : ", res, "norm : ", sqrt(norm)
dIj_unique(:size(X), s) = X(:) !dIj_unique(:size(X), s) = X(:)
norm = 0.d0 norm = 0.d0
double precision :: f double precision :: f
@ -872,11 +969,13 @@ END_PROVIDER
! rho_mrcc now contains the product of the scaling factors and the ! rho_mrcc now contains the product of the scaling factors and the
! normalization constant ! normalization constant
end do dIj_unique(:size(X), s) = X(:)
end do
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
integer :: s,i,j integer :: s,i,j
double precision, external :: get_dij_index double precision, external :: get_dij_index
@ -1142,3 +1241,6 @@ subroutine apply_particle_local(det, exc, res, ok, Nint)
ok = .true. ok = .true.
end subroutine end subroutine