mirror of
https://github.com/LCPQ/quantum_package
synced 2025-01-03 18:16:12 +01:00
parallel AtA*X
This commit is contained in:
parent
d55b0a0b5e
commit
0e18a2790b
@ -6,7 +6,7 @@
|
|||||||
# --align=32 : Align all provided arrays on a 32-byte boundary
|
# --align=32 : Align all provided arrays on a 32-byte boundary
|
||||||
#
|
#
|
||||||
[COMMON]
|
[COMMON]
|
||||||
FC : ifort
|
FC : ifort -g
|
||||||
LAPACK_LIB : -mkl=parallel
|
LAPACK_LIB : -mkl=parallel
|
||||||
IRPF90 : irpf90
|
IRPF90 : irpf90
|
||||||
IRPF90_FLAGS : --ninja --align=32
|
IRPF90_FLAGS : --ninja --align=32
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use bitmasks
|
||||||
|
|
||||||
BEGIN_PROVIDER [ integer, mrmode ]
|
BEGIN_PROVIDER [ integer, mrmode ]
|
||||||
&BEGIN_PROVIDER [ logical, old_lambda ]
|
&BEGIN_PROVIDER [ logical, old_lambda ]
|
||||||
@ -671,66 +672,202 @@ END_PROVIDER
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ integer(bit_kind), psi_non_ref_sorted, (N_int, 2, N_det_non_ref) ]
|
||||||
|
&BEGIN_PROVIDER [ integer, psi_non_ref_sorted_idx, (N_det_non_ref) ]
|
||||||
|
implicit none
|
||||||
|
psi_non_ref_sorted = psi_non_ref
|
||||||
|
call sort_det(psi_non_ref_sorted, psi_non_ref_sorted_idx, N_det_non_ref, N_int)
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
BEGIN_PROVIDER [ double precision, dIj, (hh_shortcut(hh_shortcut(0)+1)-1) ]
|
BEGIN_PROVIDER [ double precision, dIj, (hh_shortcut(hh_shortcut(0)+1)-1) ]
|
||||||
implicit none
|
implicit none
|
||||||
logical :: ok
|
logical :: ok
|
||||||
integer :: i, j, k, II, pp, hh, ind, wk, nex
|
integer :: i, j, k, II, pp, hh, ind, wk, nex, a_col, at_row
|
||||||
integer, external :: unsortedSearchDet
|
integer, external :: searchDet, unsortedSearchDet
|
||||||
integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2)
|
integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2)
|
||||||
double precision, allocatable :: A(:,:)
|
integer :: N, INFO, AtA_size, r1, r2
|
||||||
integer :: N, IPIV(N_det_non_ref), INFO
|
double precision , allocatable:: AtB(:), AtA_val(:), A_dense(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:)
|
||||||
double precision, allocatable :: WORK(:)
|
double precision :: t, norm, cx
|
||||||
integer, allocatable :: IWORK(:)
|
integer, allocatable :: A_ind(:,:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:)
|
||||||
|
|
||||||
|
|
||||||
nex = hh_shortcut(hh_shortcut(0)+1)-1
|
nex = hh_shortcut(hh_shortcut(0)+1)-1
|
||||||
print *, "TI", nex, N_det_non_ref
|
print *, "TI", nex, N_det_non_ref
|
||||||
allocate(A(N_det_non_ref, nex))
|
allocate(A_ind(N_det_ref+1, nex), A_val(N_det_ref+1, nex))
|
||||||
A = 0d0
|
allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL !!!!!!!!
|
||||||
do II = 1, N_det_ref
|
allocate(x(nex), AtB(nex), A_dense(N_det_non_ref))
|
||||||
do hh = 1, hh_shortcut(0)
|
allocate(A_val_mwen(nex), A_ind_mwen(nex))
|
||||||
call apply_hole(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
|
allocate(N_col(nex), col_shortcut(nex))
|
||||||
if(.not. ok) cycle
|
A_val = 0d0
|
||||||
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
|
A_ind = 0
|
||||||
|
!$OMP PARALLEL DO schedule(static,10) default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind) &
|
||||||
|
!$OMP shared(hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref) &
|
||||||
|
!$OMP private(pp, II, ok, myMask, myDet, ind, wk)
|
||||||
|
do hh = 1, hh_shortcut(0)
|
||||||
|
!print *, hh, "/", hh_shortcut(0)
|
||||||
|
do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
|
||||||
|
wk = 0
|
||||||
|
do II = 1, N_det_ref
|
||||||
|
call apply_hole(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
|
||||||
|
if(.not. ok) cycle
|
||||||
call apply_particle(myMask, pp_exists(1, pp), myDet, ok, N_int)
|
call apply_particle(myMask, pp_exists(1, pp), myDet, ok, N_int)
|
||||||
if(.not. ok) cycle
|
if(.not. ok) cycle
|
||||||
ind = unsortedSearchDet(psi_non_ref(1,1,1), myDet, N_det_non_ref, N_int)
|
!ind = unsortedSearchDet(psi_non_ref(1,1,1), myDet, N_det_non_ref, N_int)
|
||||||
|
ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int)
|
||||||
if(ind /= -1) then
|
if(ind /= -1) then
|
||||||
A(ind, pp) += psi_ref_coef(II, 1)
|
wk = wk+1
|
||||||
|
A_val(wk, pp) = psi_ref_coef(II, 1)
|
||||||
|
A_ind(wk, pp) = psi_non_ref_sorted_idx(ind)
|
||||||
end if
|
end if
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
double precision, allocatable :: IAtA(:,:), AtB(:), X(:), X_new(:)
|
|
||||||
double precision :: norm
|
A_dense = 0d0
|
||||||
allocate(IAtA(nex, nex), AtB(nex), X(nex), X_new(nex))
|
AtB = 0d0
|
||||||
print *, "allocated", size(IAtA, 1), size(A, 2)
|
AtA_size = 0
|
||||||
!IAtA = -matmul(transpose(A), A)
|
wk = 0
|
||||||
|
col_shortcut = 0
|
||||||
IAtA = 0.d0
|
N_col = 0
|
||||||
do i=1, size(A,2)
|
!$OMP PARALLEL DO schedule(dynamic, 100) default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref) &
|
||||||
IAtA(i,i) = 1d0
|
!$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen) &
|
||||||
|
!$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind)
|
||||||
|
do at_row = 1, nex
|
||||||
|
wk = 0
|
||||||
|
if(mod(at_row, 1000) == 0) print *, "AtA", at_row, "/", nex
|
||||||
|
!A_dense = 0d0
|
||||||
|
do i=1,N_det_ref
|
||||||
|
if(A_ind(i, at_row) == 0) exit
|
||||||
|
AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), 1) * A_val(i, at_row)
|
||||||
|
end do
|
||||||
|
do a_col = 1, nex
|
||||||
|
t = 0d0
|
||||||
|
r1 = 1
|
||||||
|
r2 = 1
|
||||||
|
do while(A_ind(r1, at_row) * A_ind(r2, a_col) /= 0)
|
||||||
|
if(A_ind(r1, at_row) < A_ind(r2, a_col)) then
|
||||||
|
r1 += 1
|
||||||
|
else if(A_ind(r1, at_row) > A_ind(r2, a_col)) then
|
||||||
|
r2 += 1
|
||||||
|
else
|
||||||
|
t = t - A_val(r1, at_row) * A_val(r2, a_col)
|
||||||
|
r1 += 1
|
||||||
|
r2 += 1
|
||||||
|
end if
|
||||||
|
end do
|
||||||
|
|
||||||
|
if(a_col == at_row) t = (t + 1d0)! / 2d0
|
||||||
|
if(t /= 0d0) then
|
||||||
|
wk += 1
|
||||||
|
!AtA_ind(1, wk) = at_row
|
||||||
|
!AtA_ind(2, wk) = a_col
|
||||||
|
A_ind_mwen(wk) = a_col
|
||||||
|
!AtA_val(wk) = t
|
||||||
|
A_val_mwen(wk) = t
|
||||||
|
end if
|
||||||
|
end do
|
||||||
|
|
||||||
|
if(wk /= 0) then
|
||||||
|
!$OMP CRITICAL
|
||||||
|
col_shortcut(at_row) = AtA_size+1
|
||||||
|
N_col(at_row) = wk
|
||||||
|
AtA_ind(AtA_size+1:AtA_size+wk) = A_ind_mwen(:wk)
|
||||||
|
AtA_val(AtA_size+1:AtA_size+wk) = A_val_mwen(:wk)
|
||||||
|
AtA_size += wk
|
||||||
|
!$OMP END CRITICAL
|
||||||
|
end if
|
||||||
end do
|
end do
|
||||||
call dgemm('T','N',nex,nex,N_det_non_ref,1.d0,A,size(A,1),A,size(A,1),-1.d0,IAtA,size(IAtA,1))
|
|
||||||
IaTa = -IATa
|
x = AtB
|
||||||
|
if(AtA_size > size(AtA_val)) stop "SIZA"
|
||||||
|
print *, "ATA SIZE", ata_size
|
||||||
|
allocate (x_new(nex))
|
||||||
|
integer :: iproc, omp_get_thread_num
|
||||||
|
iproc = omp_get_thread_num()
|
||||||
|
do i=1,nex
|
||||||
|
x_new(i) = 0.D0
|
||||||
|
enddo
|
||||||
|
|
||||||
call dgemv('T',N_det_non_ref,nex,1.d0,A,size(A,1),psi_non_ref_coef(1,1),1,0.d0,AtB,1)
|
do k=0,100000
|
||||||
|
!$OMP PARALLEL DO default(shared)
|
||||||
|
do i=1,nex
|
||||||
|
x_new(i) = AtB(i)
|
||||||
|
enddo
|
||||||
|
!$OMP PARALLEL DO default(shared) private(cx, i)
|
||||||
|
do a_col = 1, nex
|
||||||
|
cx = 0d0
|
||||||
|
do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1
|
||||||
|
cx += x(AtA_ind(i)) * AtA_val(i)
|
||||||
|
end do
|
||||||
|
x_new(a_col) += cx
|
||||||
|
end do
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
!AtB = matmul(transpose(A), psi_non_ref_coef(:,1))
|
|
||||||
|
|
||||||
X = AtB
|
|
||||||
do k=1, 1000
|
|
||||||
!X_new = matmul(IAtA, X) + AtB
|
|
||||||
x_new = AtB
|
|
||||||
call dgemv('N',nex,nex,1.d0,IAtA,size(IAtA,1),X,1,1.d0,x_new,1)
|
|
||||||
norm = 0d0
|
norm = 0d0
|
||||||
|
|
||||||
do j=1, size(X)
|
do j=1, size(X)
|
||||||
norm += (X_new(j) - X(j))**2
|
norm += (X_new(j) - X(j))**2
|
||||||
X(j) = X_new(j)
|
x(j) = x_new(j)
|
||||||
end do
|
end do
|
||||||
print *, "resudu ", norm
|
|
||||||
|
if(mod(k, 1000) == 0) print *, "residu ", k, norm
|
||||||
|
if(norm < 1d-9) exit
|
||||||
end do
|
end do
|
||||||
dIj = X
|
print *, "CONVERGENCE : ", norm
|
||||||
|
|
||||||
|
|
||||||
|
!do k=0,500
|
||||||
|
! if(k == 1) print *, X(:10)
|
||||||
|
! x_new = 0d0
|
||||||
|
! A_dense = 0d0
|
||||||
|
! !!$OMP PARALLEL DO schedule(dynamic, 10) default(none) shared(k, psi_non_ref_coef, x_new, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref) &
|
||||||
|
! !!$OMP private(a_col, t, i, cx) &
|
||||||
|
! !!$OMP firstprivate(A_dense)
|
||||||
|
! do at_row = 1, nex
|
||||||
|
! ! ! d DIR$ IVDEP
|
||||||
|
! cx = 0d0
|
||||||
|
! do i=1,N_det_ref
|
||||||
|
! if(A_ind(i, at_row) == 0) exit
|
||||||
|
! if(k /= 0) A_dense(A_ind(i, at_row)) = A_val(i, at_row)
|
||||||
|
! cx = cx + psi_non_ref_coef(A_ind(i, at_row), 1) * A_val(i, at_row)
|
||||||
|
! !x_new(at_row) = x_new(at_row) + psi_non_ref_coef(A_ind(i, at_row), 1) * A_val(i, at_row)
|
||||||
|
! end do
|
||||||
|
! if(k == 0) then
|
||||||
|
! x_new(at_row) = cx
|
||||||
|
! cycle
|
||||||
|
! end if
|
||||||
|
! do a_col = 1, nex
|
||||||
|
! t = 0d0
|
||||||
|
! do i = 1, N_det_ref
|
||||||
|
! if(A_ind(i, a_col) == 0) exit
|
||||||
|
! t = t - A_val(i, a_col) * A_dense(A_ind(i, a_col)) ! -= pcq I-A
|
||||||
|
! end do
|
||||||
|
! if(a_col == at_row) t = t + 1d0
|
||||||
|
! cx = cx + t * x(a_col)
|
||||||
|
! !x_new(at_row) = x_new(at_row) + t * x(a_col)
|
||||||
|
! end do
|
||||||
|
! x_new(at_row) = cx
|
||||||
|
! do i=1,N_det_ref
|
||||||
|
! if(A_ind(i, at_row) == 0) exit
|
||||||
|
! A_dense(A_ind(i, at_row)) = 0d0
|
||||||
|
! end do
|
||||||
|
! end do
|
||||||
|
! !!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
! norm = 0d0
|
||||||
|
! do j=1, size(X)
|
||||||
|
! norm += (X_new(j) - X(j))**2
|
||||||
|
! X(j) = X_new(j)
|
||||||
|
! end do
|
||||||
|
! print *, "residu ", k, norm
|
||||||
|
! if(norm < 1d-10) exit
|
||||||
|
!end do
|
||||||
|
!
|
||||||
|
|
||||||
|
dIj(:size(X)) = X(:)
|
||||||
|
!print *, X
|
||||||
print *, "done"
|
print *, "done"
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
@ -544,8 +544,8 @@ END_PROVIDER
|
|||||||
double precision :: Hjk, Hki, Hij
|
double precision :: Hjk, Hki, Hij
|
||||||
double precision, external :: get_dij
|
double precision, external :: get_dij
|
||||||
integer i_state, degree
|
integer i_state, degree
|
||||||
|
|
||||||
!provide lambda_mrcc
|
provide lambda_mrcc dIj
|
||||||
do i_state = 1, N_states
|
do i_state = 1, N_states
|
||||||
!$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(no_mono_dressing,lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref)
|
!$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(no_mono_dressing,lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref)
|
||||||
do i=1,N_det_ref
|
do i=1,N_det_ref
|
||||||
@ -670,6 +670,8 @@ end function
|
|||||||
idx_sorted_bit(get_index_in_psi_det_sorted_bit(psi_non_ref(1,1,i), N_int)) = i
|
idx_sorted_bit(get_index_in_psi_det_sorted_bit(psi_non_ref(1,1,i), N_int)) = i
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
|
! To provide everything
|
||||||
|
contrib = get_dij(psi_ref(1,1,1), psi_non_ref(1,1,1), N_int)
|
||||||
|
|
||||||
do i_state = 1, N_states
|
do i_state = 1, N_states
|
||||||
delta_mrcepa0_ii(:,:) = 0d0
|
delta_mrcepa0_ii(:,:) = 0d0
|
||||||
|
Loading…
Reference in New Issue
Block a user