1
0
mirror of https://gitlab.com/scemama/qp_plugins_scemama.git synced 2024-12-31 16:45:45 +01:00
qp_plugins_scemama/devel/svdwf/buildpsi_diagSVDit_v2.irp.f
Abdallah Ammar 2fd2fcac5d svd save
2021-07-28 17:19:18 +02:00

841 lines
26 KiB
Fortran

program buildpsi_diagSVDit_v2
implicit none
BEGIN_DOC
! study efficiency for different way to build | psi >
END_DOC
read_wf = .True.
TOUCH read_wf
PROVIDE N_int
call run()
end
subroutine run
USE OMP_LIB
implicit none
integer(bit_kind) :: det1(N_int,2), det2(N_int,2)
integer :: degree, i_state, n_blocs
double precision :: h12
integer :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb
double precision :: norm_psi, inv_sqrt_norm_psi
double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
double precision :: E0_av, E0_ap, E0pt2
double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy
double precision :: ctmp, htmp, Ept2
double precision :: E0_postsvd, overlop_postsvd
double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi
double precision, allocatable :: H(:,:,:,:)
double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:)
double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:)
integer :: it_svd, it_svd_max
integer :: n_TSVD, n_FSVD, n_selected, n_toselect
integer, allocatable :: numalpha_selected(:), numbeta_selected(:)
integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:)
integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir
real(kind=8) :: W_tot_time, W_tot_time_it, W_tot_time_step
real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step
real(kind=8) :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step
real(kind=8) :: speedup, speedup_it, speedup_step
integer :: nb_taches
double precision, allocatable :: Uezfio(:,:,:), Dezfio(:,:), Vezfio(:,:,:)
!$OMP PARALLEL
nb_taches = OMP_GET_NUM_THREADS()
!$OMP END PARALLEL
call CPU_TIME(CPU_tbeg)
call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
i_state = 1
det1(:,1) = psi_det_alpha_unique(:,1)
det2(:,1) = psi_det_alpha_unique(:,1)
det1(:,2) = psi_det_beta_unique(:,1)
det2(:,2) = psi_det_beta_unique(:,1)
call i_H_j(det1, det2, N_int, h12)
! ---------------------------------------------------------------------------------------
! construct the initial CISD matrix
print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique
print *, ' N det :', N_det
print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
norm_psi = 0.d0
do k = 1, N_det
norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
* psi_bilinear_matrix_values(k,i_state)
enddo
print *, ' initial norm = ', norm_psi
allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
Aref(:,:) = 0.d0
do k = 1, N_det
i = psi_bilinear_matrix_rows(k)
j = psi_bilinear_matrix_columns(k)
Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
enddo
! ---------------------------------------------------------------------------------------
! ---------------------------------------------------------------------------------------
! perform a Full SVD
allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
allocate( Dref(min(n_det_alpha_unique,n_det_beta_unique)) )
allocate( Vref (n_det_beta_unique,n_det_beta_unique) )
allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
call svd_s( Aref, size(Aref,1), &
Uref, size(Uref,1), &
Dref, &
Vtref, size(Vtref,1), &
n_det_alpha_unique, n_det_beta_unique )
print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
print *, ' --- First SVD: ok --- '
do l = 1, n_det_beta_unique
do i = 1, n_det_beta_unique
Vref(i,l) = Vtref(l,i)
enddo
enddo
deallocate( Vtref )
! check Full SVD error
err0 = 0.d0
do j = 1, n_det_beta_unique
do i = 1, n_det_alpha_unique
err_tmp = 0.d0
do l = 1, min(n_det_alpha_unique,n_det_beta_unique)
err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
enddo
err_tmp = Aref(i,j) - err_tmp
err0 += err_tmp * err_tmp
enddo
enddo
print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi)
print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
! ---------------------------------------------------------------------------------------
! ---------------------------------------------------------------------------------------
! numerote vectors
! Full rank
n_FSVD = n_det_alpha_unique * n_det_beta_unique
print*, ' Full psi space rank = ', n_FSVD
! Truncated rank
n_TSVD = 15
print*, ' initial psi space rank = ', n_TSVD
! check Truncate SVD error
err0 = 0.d0
do j = 1, n_det_beta_unique
do i = 1, n_det_alpha_unique
err_tmp = 0.d0
do l = 1, n_TSVD
err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l)
enddo
err_tmp = Aref(i,j) - err_tmp
err0 += err_tmp * err_tmp
enddo
enddo
deallocate( Aref )
print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi)
n_selected = n_TSVD * n_TSVD
allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) )
k = 0
! first diagonal bloc
do i = 1, n_TSVD
do j = 1, n_TSVD
k = k + 1
numalpha_selected(k) = j
numbeta_selected (k) = i
enddo
enddo
! check size
if( k.ne.n_selected ) then
print*, ' error in numeroting: selected '
print*, ' k = ', k
print*, ' n_selected = ', n_selected
stop
endif
! perturbative space rank
n_blocs = 2
k = 0
if( n_blocs.eq.3 ) then
n_toselect = n_FSVD - n_selected
allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
! nondiagonal blocs
do i = 1, n_TSVD
do j = n_TSVD+1, n_det_beta_unique
k = k + 1
numalpha_toselect(k) = i
numbeta_toselect (k) = j
enddo
enddo
do i = n_TSVD+1, n_det_alpha_unique
do j = 1, n_TSVD
k = k + 1
numalpha_toselect(k) = i
numbeta_toselect (k) = j
enddo
enddo
! diagonal bloc
do i = n_TSVD+1, n_det_alpha_unique
do j = n_TSVD+1, n_det_beta_unique
k = k + 1
numalpha_toselect(k) = i
numbeta_toselect (k) = j
enddo
enddo
elseif( n_blocs.eq.2 ) then
n_toselect = n_FSVD - n_selected - (n_det_alpha_unique-n_TSVD)*(n_det_beta_unique-n_TSVD)
allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) )
! nondiagonal blocs
do i = 1, n_TSVD
do j = n_TSVD+1, n_det_beta_unique
k = k + 1
numalpha_toselect(k) = j
numbeta_toselect (k) = i
enddo
enddo
do j = 1, n_TSVD
!do i = n_TSVD+1, n_det_beta_unique
do i = n_TSVD+1, n_det_alpha_unique
k = k + 1
numalpha_toselect(k) = j
numbeta_toselect (k) = i
enddo
enddo
endif
! check size
if( k.ne.n_toselect ) then
print*, ' error in numeroting: to select '
print*, ' k = ', k
print*, ' n_toselect = ', n_toselect
stop
endif
print*, ' perturbative psi space rank = ', n_toselect
! ---------------------------------------------------------------------------------------
!________________________________________________________________________________________________________
!
! loop over SVD iterations
!________________________________________________________________________________________________________
E0_old = 0.d0
tol_energy = 1.d0
it_svd = 0
it_svd_max = 100
do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) )
call CPU_TIME(CPU_tbeg_it)
call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir)
it_svd = it_svd + 1
print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +'
print*, ' '
print*, ' '
print*, ' '
print*, ' iteration', it_svd
norm_coeff_psi = 0.d0
do j = 1, n_TSVD
norm_coeff_psi += Dref(j) * Dref(j)
enddo
inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi)
do j = 1, n_TSVD
Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi
enddo
allocate( H0(n_selected,n_selected) )
call const_H0(n_TSVD, n_selected, Uref, Vref, H0)
E0 = 0.d0
do i = 1, n_TSVD
ii = (i-1)*n_TSVD + i
do j = 1, n_TSVD
jj = (j-1)*n_TSVD + j
E0 += Dref(j) * H0(jj,ii) * Dref(i)
enddo
enddo
E0_av = E0 + nuclear_repulsion
print *,' E0 (avant SVD) =', E0_av
allocate( psi_postsvd(n_selected) )
call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd)
print*, ' postsvd energy = ', E0_postsvd
deallocate( H0 )
!Dref(:) = 0.d0
call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref)
deallocate( psi_postsvd )
print *, ' --- Compute H --- '
allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) )
call const_Hdiag_Hkl_H0(n_TSVD, n_selected, n_toselect, numalpha_selected, numbeta_selected, &
numalpha_toselect, numbeta_toselect, Uref, Vref, Hdiag, Hkl, H0)
E0 = 0.d0
norm_coeff_psi = 0.d0
do i = 1, n_TSVD
ii = (i-1)*n_TSVD + i
do j = 1, n_TSVD
jj = (j-1)*n_TSVD + j
E0 += Dref(j) * H0(jj,ii) * Dref(i)
enddo
norm_coeff_psi += Dref(i) * Dref(i)
enddo
E0_ap = E0 + nuclear_repulsion
print *,' E0 (apres SVD) =', E0_ap
deallocate(H0)
allocate( coeff_psi_perturb(n_toselect) )
ept2 = 0.d0
do ii = 1, n_toselect
ctmp = 0.d0
do i = 1, n_TSVD
l = (i-1)*n_TSVD + i
ctmp += Dref(i) * Hkl(l,ii)
enddo
coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) )
enddo
E0pt2 = E0_ap + ept2
print *, ' perturb energy = ', E0pt2, ept2
tol_energy = dabs(E0_ap-E0_old)
E0_old = E0_ap
deallocate( Hdiag, Hkl)
call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
deallocate( coeff_psi_perturb )
write(n_blocs,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
!write(222,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2
call CPU_TIME(CPU_tend_it)
call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir)
CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it
W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8)
speedup_it = CPU_tot_time_it / W_tot_time_it
print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, &
& 3X, "CPU time = ", 1PE10.3, " min.", /, &
& 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it
end do
!________________________________________________________________________________________________________
!________________________________________________________________________________________________________
! save to EZFIO
allocate(Uezfio(n_det_alpha_unique,n_TSVD,1), Dezfio(n_TSVD,1), Vezfio(n_det_beta_unique,n_TSVD,1))
Dezfio(1:n_TSVD,1) = Dref(1:n_TSVD)
Uezfio(1:n_det_alpha_unique,1:n_TSVD,1) = Uref(1:n_det_alpha_unique,1:n_TSVD)
Vezfio(1:n_det_beta_unique ,1:n_TSVD,1) = Vref(1:n_det_beta_unique ,1:n_TSVD)
!call ezfio_set_spindeterminants_n_det(N_det)
!call ezfio_set_spindeterminants_n_states(N_states)
!call ezfio_set_spindeterminants_n_det_alpha(n_det_alpha_unique)
!call ezfio_set_spindeterminants_n_det_beta(n_det_beta_unique)
!call ezfio_set_spindeterminants_psi_coef_matrix_rows(psi_bilinear_matrix_rows)
!call ezfio_set_spindeterminants_psi_coef_matrix_columns(psi_bilinear_matrix_columns)
!call ezfio_set_spindeterminants_psi_coef_matrix_values(psi_bilinear_matrix_values)
!call ezfio_set_spindeterminants_n_svd_coefs(n_TSVD)
!call ezfio_set_spindeterminants_psi_svd_alpha(Uezfio)
!call ezfio_set_spindeterminants_psi_svd_beta(Vezfio )
!call ezfio_set_spindeterminants_psi_svd_coefs(Dezfio)
deallocate(Uezfio, Dezfio, Vezfio)
deallocate( Uref, Vref, Dref )
call CPU_TIME(CPU_tend)
call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir)
CPU_tot_time = CPU_tend - CPU_tbeg
W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8)
speedup = CPU_tot_time / W_tot_time
print *,' ___________________________________________________________________'
print '(//,3X,"Execution avec ",i2," threads")',nb_taches
print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, &
& 3X, "CPU time = ", 1PE10.3, " min.", /, &
& 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup
print *,' ___________________________________________________________________'
end
subroutine const_H0(n_TSVD, n_selected, Uref, Vref, H0)
USE OMP_LIB
implicit none
integer, intent(in) :: n_TSVD, n_selected
double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique)
double precision, intent(out) :: H0(n_selected,n_selected)
integer :: i, j, k, l
integer :: n, m
double precision, allocatable :: H(:,:,:,:)
H0(:,:) = 0.d0
allocate( H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) )
call const_H_uv_lapack(Uref, Vref, H)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,k,l,m,n) &
!$OMP SHARED(n_TSVD,H0,H)
!$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
do i = 1, n_TSVD
do j = 1, n_TSVD
m = (i-1)*n_TSVD + j
do k = 1, n_TSVD
do l = 1, n_TSVD
n = (k-1)*n_TSVD + l
H0(n,m) = H(k,l,i,j)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
deallocate( H )
return
end subroutine const_H0
subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd )
USE OMP_LIB
implicit none
integer, intent(in) :: n_TSVD, n_selected
double precision, intent(in) :: H0(n_selected,n_selected)
double precision, intent(in) :: Dref(min(n_det_alpha_unique,n_det_beta_unique))
double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected)
integer(bit_kind) :: det1(N_int,2), det2(N_int,2)
integer :: i, j, k, l, degree
integer :: ii0, jj0, ii, jj, n, m, np, mp
integer :: nn0, mm0, nn, mm, ind_gs
double precision :: h12, x
double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:)
! diagonalize H0
allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) )
call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected)
! get the postsvd ground state
allocate( check_ov(n_selected) )
do l = 1, n_selected
overlop = 0.d0
do i = 1, n_TSVD
ii = n_TSVD*(i-1) + i
overlop = overlop + eigvec0(ii,l) * Dref(i)
enddo
check_ov(l) = dabs(overlop)
enddo
ind_gs = MAXLOC( check_ov, DIM=1 )
!ind_gs = 1
overlop = check_ov(ind_gs)
E0 = eigval0(ind_gs)+nuclear_repulsion
psi_postsvd = eigvec0(:,ind_gs)
deallocate( check_ov, eigvec0, eigval0 )
return
end subroutine diag_postsvd
subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref)
USE OMP_LIB
integer, intent(in) :: n_toselect
integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
double precision, intent(in) :: coeff_psi_perturb(n_toselect)
double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb
double precision :: err0, err_norm, err_tmp, norm_tmp
double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV
double precision, allocatable :: S_mat(:,:), SxVt(:,:)
double precision, allocatable :: U_svd(:,:), V_svd(:,:)
double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
mm = n_det_alpha_unique
nn = n_det_beta_unique
allocate( U_svd(mm,mm) , V_svd(nn,nn) , S_mat(mm,nn) )
U_svd(:,:) = Uref(:,:)
V_svd(:,:) = Vref(:,:)
S_mat(:,:) = 0.d0
norm_tmp = 0.d0
do j = 1, min(mm,nn)
S_mat(j,j) = Dref(j)
norm_tmp += S_mat(j,j)*S_mat(j,j)
enddo
do l = 1, n_toselect
na = numalpha_toselect(l)
nb = numbeta_toselect (l)
S_mat(na,nb) = coeff_psi_perturb(l)
norm_tmp += S_mat(na,nb)*S_mat(na,nb)
enddo
norm_tmp = 1.d0/dsqrt(norm_tmp)
do i = 1, nn
do j = 1, mm
S_mat(j,i) = S_mat(j,i) * norm_tmp
enddo
enddo
! first compute S_mat x transpose(V_svd)
allocate( SxVt(mm,nn) )
call dgemm( 'N', 'T', mm, nn, nn, 1.d0 &
, S_mat , size(S_mat,1) &
, V_svd , size(V_svd,1) &
, 0.d0, SxVt, size(SxVt ,1) )
! then compute U_svd x SxVt
allocate( A_newsvd(mm,nn) )
call dgemm( 'N', 'N', mm, nn, mm, 1.d0 &
, U_svd , size(U_svd ,1) &
, SxVt , size(SxVt ,1) &
, 0.d0, A_newsvd, size(A_newsvd,1) )
deallocate( SxVt )
! perform new SVD
allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
allocate( V_newsvd(nn,nn) )
do l = 1, nn
do j = 1, nn
V_newsvd(j,l) = Vt_newsvd(l,j)
enddo
enddo
Dref(:) = D_newsvd(:)
Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
return
end subroutine perform_newSVD
subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref)
USE OMP_LIB
integer, intent(in) :: n_TSVD, n_selected
double precision, intent(in) :: psi_postsvd(n_selected)
double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique))
integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb
double precision :: err0, err_norm, err_tmp, norm_tmp
double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV
double precision, allocatable :: S_mat(:,:), SxVt(:,:)
double precision, allocatable :: U_svd(:,:), V_svd(:,:)
double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
mm = n_det_alpha_unique
nn = n_det_beta_unique
allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) )
U_svd(1:mm,1:n_TSVD) = Uref(1:mm,1:n_TSVD)
V_svd(1:nn,1:n_TSVD) = Vref(1:nn,1:n_TSVD)
S_mat(:,:) = 0.d0
do i = 1, n_TSVD
ii = (i-1)*n_TSVD
do j = 1, n_TSVD
jj = ii + j
S_mat(i,j) = psi_postsvd(jj)
enddo
enddo
! first compute S_mat x transpose(V_svd)
allocate( SxVt(n_TSVD,nn) )
call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 &
, S_mat , size(S_mat,1) &
, V_svd , size(V_svd,1) &
, 0.d0, SxVt, size(SxVt ,1) )
! then compute U_svd x SxVt
allocate( A_newsvd(mm,nn) )
call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 &
, U_svd , size(U_svd ,1) &
, SxVt , size(SxVt ,1) &
, 0.d0, A_newsvd, size(A_newsvd,1) )
deallocate( SxVt )
! perform new SVD
allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) )
call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn)
allocate( V_newsvd(nn,nn) )
do l = 1, nn
do j = 1, nn
V_newsvd(j,l) = Vt_newsvd(l,j)
enddo
enddo
! check SVD error
err0 = 0.d0
err_norm = 0.d0
do j = 1, nn
do i = 1, mm
err_tmp = 0.d0
do l = 1, n_TSVD
err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l)
enddo
err_tmp = A_newsvd(i,j) - err_tmp
err0 += err_tmp * err_tmp
err_norm += A_newsvd(i,j) * A_newsvd(i,j)
enddo
enddo
Dref(1:n_TSVD) = D_newsvd(1:n_TSVD)
Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm)
Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn)
deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd )
return
end subroutine perform_newpostSVD
subroutine const_Hdiag_Hkl_H0(n_TSVD, n_selected, n_toselect, numalpha_selected, numbeta_selected, &
numalpha_toselect, numbeta_toselect, Uref, Vref, Hdiag, Hkl, H0)
USE OMP_LIB
implicit none
integer, intent(in) :: n_TSVD, n_selected, n_toselect
integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected)
integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect)
double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique)
double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected)
integer :: i, j, k, l
integer :: n, na, nb, m, ma, mb
double precision, allocatable :: H(:,:,:,:)
integer(kind=8) :: W_tbeg_step, W_tend_step, W_ir
real(kind=8) :: W_tot_time_step
Hdiag(:) = 0.d0
Hkl(:,:) = 0.d0
H0(:,:) = 0.d0
allocate( H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) )
call const_H_uv_lapack(Uref, Vref, H)
call SYSTEM_CLOCK(COUNT=W_tbeg_step, COUNT_RATE=W_ir)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb) &
!$OMP SHARED(n_TSVD,n_selected,n_toselect,H0,H,Hdiag,Hkl, &
!$OMP numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect )
!$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,8)
do i = 1, n_TSVD
do j = 1, n_TSVD
m = (i-1)*n_TSVD + j
do k = 1, n_TSVD
do l = 1, n_TSVD
n = (k-1)*n_TSVD + l
H0(n,m) = H(k,l,i,j)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP DO
do n = 1, n_toselect
na = numalpha_toselect(n)
nb = numbeta_toselect (n)
! diagonal part
Hdiag(n) = H(na,nb,na,nb)
do m = 1, n_selected
ma = numalpha_selected(m)
mb = numalpha_selected(m)
! 3 blocs treated perturbatively
Hkl(m,n) = H(ma,mb,na,nb)
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call SYSTEM_CLOCK(COUNT=W_tend_step, COUNT_RATE=W_ir)
W_tot_time_step = real(W_tend_step-W_tbeg_step, kind=8) / real(W_ir, kind=8)
deallocate( H )
return
end subroutine const_Hdiag_Hkl_H0
subroutine const_H_uv_lapack(Uref, Vref, H)
USE OMP_LIB
implicit none
double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique)
double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique)
double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)
integer(bit_kind) :: det1(N_int,2), det2(N_int,2)
integer :: i, j, k, l, degree
integer :: ii0, jj0, ii, jj, n, m, np, mp
integer :: nn0, mm0, na, nb, mm, ind_gs
integer :: p,q,r,s
double precision :: h12, x
double precision, allocatable :: H0(:,:,:,:)
double precision, allocatable :: H1(:,:,:,:)
na = n_det_alpha_unique
nb = n_det_beta_unique
allocate( H0(na,nb,na,nb) )
allocate( H1(nb,na,nb,na) )
H0 = 0.d0
call wall_time(t0)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) &
!$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
!$OMP N_int,Uref,Vref,H0,H1,H)
!$OMP DO
do l = 1, nb
det2(:,2) = psi_det_beta_unique(:,l)
do j = 1, nb
det1(:,2) = psi_det_beta_unique(:,j)
call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
if (degree > 2) cycle
do k = 1, na
det2(:,1) = psi_det_alpha_unique(:,k)
do i = 1, na
det1(:,1) = psi_det_alpha_unique(:,i)
call get_excitation_degree(det1,det2,degree,N_int)
if ( degree > 2) cycle
call i_H_j(det1, det2, N_int, h12)
H0(i,j,k,l) = h12
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call wall_time(t1)
! (i,j,k,l) -> (j,k,l,p)
call DGEMM('T','N', nb * na * nb, na, na, &
1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
! (j,k,l,p) -> (k,l,p,q)
call DGEMM('T','N', na * nb * na, nb, nb, &
1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3))
! (k,l,p,q) -> (l,p,q,r)
call DGEMM('T','N', nb * na * nb, na, na, &
1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
! (l,p,q,r) -> (p,q,r,s)
call DGEMM('T','N', na * nb * na, nb, nb, &
1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
call wall_time(t2)
print *, t1-t0, t2-t1
double precision :: t0, t1, t2
deallocate(H1,H0)
end const_H_uv_lapack