10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-07 03:43:20 +01:00

optimized calculation of inactive amplitudes

This commit is contained in:
Yann Garniron 2016-10-17 14:40:09 +02:00
parent b780a6540a
commit 1f4cd4c318

View File

@ -618,7 +618,7 @@ END_PROVIDER
&BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ] &BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ]
implicit none implicit none
logical :: ok logical :: ok
integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row integer :: i, j, k, s, II, pp, ppp, hh, ind, wk, nex, a_col, at_row
integer, external :: searchDet, unsortedSearchDet integer, external :: searchDet, unsortedSearchDet
integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2)
integer :: N, INFO, AtA_size, r1, r2 integer :: N, INFO, AtA_size, r1, r2
@ -628,33 +628,83 @@ END_PROVIDER
double precision :: phase double precision :: phase
integer, allocatable :: pathTo(:), active_hh_idx(:), active_pp_idx(:)
logical, allocatable :: active(:)
double precision, allocatable :: rho_mrcc_init(:,:)
integer :: nactive
nex = hh_shortcut(hh_shortcut(0)+1)-1 nex = hh_shortcut(hh_shortcut(0)+1)-1
print *, "TI", nex, N_det_non_ref print *, "TI", nex, N_det_non_ref
allocate(A_ind(0:N_det_ref+1, nex), A_val(N_det_ref+1, nex))
allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! allocate(pathTo(N_det_non_ref), active(nex))
allocate(active_pp_idx(nex), active_hh_idx(nex))
allocate(rho_mrcc_init(N_det_non_ref, N_states))
pathTo = 0
active = .false.
nactive = 0
do hh = 1, hh_shortcut(0)
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
do II = 1, N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
if(.not. ok) cycle
call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
if(.not. ok) cycle
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
if(ind == -1) cycle
ind = psi_non_ref_sorted_idx(ind)
if(pathTo(ind) == 0) then
pathTo(ind) = pp
else
active(pp) = .true.
active(pathTo(ind)) = .true.
end if
end do
end do
end do
do hh = 1, hh_shortcut(0)
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
if(active(pp)) then
nactive = nactive + 1
active_hh_idx(nactive) = hh
active_pp_idx(nactive) = pp
end if
end do
end do
print *, nactive, "inact/", size(active)
allocate(A_ind(0:N_det_ref+1, nactive), A_val(N_det_ref+1, nactive))
allocate(AtA_ind(N_det_ref * nactive), AtA_val(N_det_ref * nactive))
allocate(x(nex), AtB(nex)) allocate(x(nex), AtB(nex))
allocate(N_col(nex), col_shortcut(nex)) allocate(N_col(nactive), col_shortcut(nactive))
allocate(x_new(nex)) allocate(x_new(nex))
do s = 1, N_states do s = 1, N_states
A_val = 0d0 A_val = 0d0
A_ind = 0 A_ind = 0
AtA_ind = 0 AtA_ind = 0
AtB = 0d0
AtA_val = 0d0 AtA_val = 0d0
x = 0d0 x = 0d0
!A_val_mwen = 0d0
N_col = 0 N_col = 0
col_shortcut = 0 col_shortcut = 0
!$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& !$OMP PARALLEL default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)&
!$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)&
!$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk) !$OMP shared(active, active_hh_idx, active_pp_idx, nactive)&
!$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh)
allocate(lref(N_det_non_ref)) allocate(lref(N_det_non_ref))
!$OMP DO schedule(static,10) !$OMP DO schedule(static,10)
do hh = 1, hh_shortcut(0) do ppp=1,nactive
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 pp = active_pp_idx(ppp)
hh = active_hh_idx(ppp)
lref = 0 lref = 0
do II = 1, N_det_ref do II = 1, N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
@ -675,51 +725,56 @@ END_PROVIDER
do i=1, N_det_non_ref do i=1, N_det_non_ref
if(lref(i) > 0) then if(lref(i) > 0) then
wk += 1 wk += 1
A_val(wk, pp) = psi_ref_coef(lref(i), s) A_val(wk, ppp) = psi_ref_coef(lref(i), s)
A_ind(wk, pp) = i A_ind(wk, ppp) = i
else if(lref(i) < 0) then else if(lref(i) < 0) then
wk += 1 wk += 1
A_val(wk, pp) = -psi_ref_coef(-lref(i), s) A_val(wk, ppp) = -psi_ref_coef(-lref(i), s)
A_ind(wk, pp) = i A_ind(wk, ppp) = i
end if end if
end do end do
A_ind(0,pp) = wk A_ind(0,ppp) = wk
end do
end do end do
!$OMP END DO !$OMP END DO
deallocate(lref) deallocate(lref)
!$OMP END PARALLEL !$OMP END PARALLEL
print *, 'Done building A_val, A_ind' print *, 'Done building A_val, A_ind'
AtB = 0d0
AtA_size = 0 AtA_size = 0
col_shortcut = 0 col_shortcut = 0
N_col = 0 N_col = 0
integer :: a_coll, at_roww
!$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)&
!$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)&
!$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s, nactive, active_pp_idx)
allocate(A_val_mwen(nex), A_ind_mwen(nex)) allocate(A_val_mwen(nex), A_ind_mwen(nex))
!A_ind_mwen = 0
!A_val_mwen = 0d0
!$OMP DO schedule(dynamic, 100) !$OMP DO schedule(dynamic, 100)
do at_row = 1, nex do at_roww = 1, nactive ! nex
at_row = active_pp_idx(at_roww)
wk = 0 wk = 0
if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", nex
do i=1,A_ind(0,at_row) do i=1,A_ind(0,at_roww)
AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) j = active_pp_idx(i)
AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_roww), s) * A_val(i, at_roww)
end do end do
do a_col = 1, nex do a_coll = 1, nactive
a_col = active_pp_idx(a_coll)
t = 0d0 t = 0d0
r1 = 1 r1 = 1
r2 = 1 r2 = 1
do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) do while ((A_ind(r1, at_roww) /= 0).and.(A_ind(r2, a_coll) /= 0))
if(A_ind(r1, at_row) > A_ind(r2, a_col)) then if(A_ind(r1, at_roww) > A_ind(r2, a_coll)) then
r2 = r2+1 r2 = r2+1
else if(A_ind(r1, at_row) < A_ind(r2, a_col)) then else if(A_ind(r1, at_roww) < A_ind(r2, a_coll)) then
r1 = r1+1 r1 = r1+1
else else
t = t - A_val(r1, at_row) * A_val(r2, a_col) t = t - A_val(r1, at_roww) * A_val(r2, a_coll)
r1 = r1+1 r1 = r1+1
r2 = r2+1 r2 = r2+1
end if end if
@ -737,8 +792,8 @@ END_PROVIDER
if(wk /= 0) then if(wk /= 0) then
!$OMP CRITICAL !$OMP CRITICAL
col_shortcut(at_row) = AtA_size+1 col_shortcut(at_roww) = AtA_size+1
N_col(at_row) = wk N_col(at_roww) = wk
if (AtA_size+wk > size(AtA_ind,1)) then if (AtA_size+wk > size(AtA_ind,1)) then
print *, AtA_size+wk , size(AtA_ind,1) print *, AtA_size+wk , size(AtA_ind,1)
stop 'too small' stop 'too small'
@ -755,28 +810,69 @@ END_PROVIDER
deallocate (A_ind_mwen, A_val_mwen) deallocate (A_ind_mwen, A_val_mwen)
!$OMP END PARALLEL !$OMP END PARALLEL
if(AtA_size > size(AtA_val)) stop "SIZA"
print *, "ATA SIZE", ata_size print *, "ATA SIZE", ata_size
do i=1,nex x = 0d0
x(i) = AtB(i)
enddo
do a_coll = 1, nactive
a_col = active_pp_idx(a_coll)
X(a_col) = AtB(a_col)
end do
rho_mrcc_init = 0d0
allocate(lref(N_det_ref))
!$OMP PARALLEL DO default(shared) schedule(static, 1) &
!$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase)
do hh = 1, hh_shortcut(0)
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
if(active(pp)) cycle
lref = 0
do II=1,N_det_ref
call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
if(.not. ok) cycle
call apply_particle_local(myMask, pp_exists(1, pp), myDet, ok, N_int)
if(.not. ok) cycle
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
if(ind == -1) cycle
ind = psi_non_ref_sorted_idx(ind)
call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
X(pp) += psi_ref_coef(II,s)**2
AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase
lref(II) = ind
if(phase < 0d0) lref(II) = -ind
end do
X(pp) = AtB(pp) / X(pp)
do II=1,N_det_ref
if(lref(II) > 0) then
rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp)
else if(lref(II) < 0) then
rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp)
end if
end do
end do
end do
!$OMP END PARALLEL DO
x_new = x
double precision :: factor, resold double precision :: factor, resold
factor = 1.d0 factor = 1.d0
resold = huge(1.d0) resold = huge(1.d0)
do k=0,100000 do k=0,100000
!$OMP PARALLEL default(shared) private(cx, i, j, a_col) !$OMP PARALLEL default(shared) private(cx, i, j, a_col, a_coll)
!$OMP DO !$OMP DO
do i=1,N_det_non_ref do i=1,N_det_non_ref
rho_mrcc(i,s) = 0.d0 rho_mrcc(i,s) = rho_mrcc_init(i,s) ! 0d0
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP DO !$OMP DO
do a_col = 1, nex do a_coll = 1, nactive !: nex
a_col = active_pp_idx(a_coll)
cx = 0d0 cx = 0d0
do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 do i=col_shortcut(a_coll), col_shortcut(a_coll) + N_col(a_coll) - 1
cx = cx + x(AtA_ind(i)) * AtA_val(i) cx = cx + x(AtA_ind(i)) * AtA_val(i)
end do end do
x_new(a_col) = AtB(a_col) + cx * factor x_new(a_col) = AtB(a_col) + cx * factor
@ -786,32 +882,33 @@ END_PROVIDER
!$OMP END PARALLEL !$OMP END PARALLEL
res = 0.d0 res = 0.d0
do a_col=1,nex
res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
end do
if (res < resold) then if (res < resold) then
do a_col=1,nex do a_coll=1,nactive ! nex
a_col = active_pp_idx(a_coll)
do j=1,N_det_non_ref do j=1,N_det_non_ref
i = A_ind(j,a_col) i = A_ind(j,a_coll)
if (i==0) exit if (i==0) exit
rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col) rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_coll) * X_new(a_col)
enddo enddo
res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
X(a_col) = X_new(a_col) X(a_col) = X_new(a_col)
end do end do
! factor = 1.d0 factor = 1.d0
else else
factor = -factor * 0.5d0 factor = -factor * 0.5d0
endif endif
resold = res resold = res
if(mod(k, 100) == 0) then if(mod(k, 5) == 0) then
print *, "res", k, res print *, "res ", k, res
end if end if
if(res < 1d-8) exit if(res < 1d-12) exit
end do end do
! rho_mrcc now contains A.X
norm = 0.d0 norm = 0.d0
do i=1,N_det_non_ref do i=1,N_det_non_ref
@ -826,7 +923,7 @@ END_PROVIDER
print *, k, "res : ", res, "norm : ", sqrt(norm) print *, k, "res : ", res, "norm : ", sqrt(norm)
dIj_unique(:size(X), s) = X(:) !dIj_unique(:size(X), s) = X(:)
norm = 0.d0 norm = 0.d0
double precision :: f double precision :: f
@ -872,11 +969,13 @@ END_PROVIDER
! rho_mrcc now contains the product of the scaling factors and the ! rho_mrcc now contains the product of the scaling factors and the
! normalization constant ! normalization constant
dIj_unique(:size(X), s) = X(:)
end do end do
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ] BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
integer :: s,i,j integer :: s,i,j
double precision, external :: get_dij_index double precision, external :: get_dij_index
@ -1142,3 +1241,6 @@ subroutine apply_particle_local(det, exc, res, ok, Nint)
ok = .true. ok = .true.
end subroutine end subroutine