mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2025-01-03 01:55:52 +01:00
Added 1st version of SVD optimization
This commit is contained in:
parent
0ea3df3502
commit
4372cc8403
418
devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f
Normal file
418
devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f
Normal file
@ -0,0 +1,418 @@
|
||||
program buildpsi_diagSVDit_modif_v2
|
||||
|
||||
implicit none
|
||||
|
||||
BEGIN_DOC
|
||||
! perturbative approach to build psi_postsvd
|
||||
END_DOC
|
||||
|
||||
read_wf = .True.
|
||||
TOUCH read_wf
|
||||
|
||||
PROVIDE N_int
|
||||
|
||||
call run()
|
||||
end
|
||||
|
||||
|
||||
subroutine run
|
||||
|
||||
USE OMP_LIB
|
||||
|
||||
implicit none
|
||||
|
||||
integer(bit_kind) :: det1(N_int,2), det2(N_int,2)
|
||||
integer :: degree, i_state
|
||||
|
||||
integer :: i, j, k, l, ii, jj, na, nb
|
||||
|
||||
double precision :: norm_psi, inv_sqrt_norm_psi
|
||||
double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:)
|
||||
|
||||
double precision :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy
|
||||
double precision :: ctmp, htmp, Ept2
|
||||
double precision :: E0_postsvd, overlap_postsvd, E_prev
|
||||
double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi
|
||||
double precision :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi
|
||||
|
||||
double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:)
|
||||
double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:)
|
||||
|
||||
integer :: n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max
|
||||
integer :: n_selected2
|
||||
integer, allocatable :: numalpha_selected(:), numbeta_selected(:)
|
||||
integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:)
|
||||
integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:)
|
||||
|
||||
integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir
|
||||
real(kind=8) :: W_tot_time, W_tot_time_it
|
||||
real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it
|
||||
real(kind=8) :: CPU_tot_time, CPU_tot_time_it
|
||||
real(kind=8) :: speedup, speedup_it
|
||||
integer :: nb_taches
|
||||
|
||||
!$OMP PARALLEL
|
||||
nb_taches = OMP_GET_NUM_THREADS()
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call CPU_TIME(CPU_tbeg)
|
||||
call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir)
|
||||
|
||||
i_state = 1
|
||||
|
||||
! ---------------------------------------------------------------------------------------
|
||||
! construct the initial CISD matrix
|
||||
|
||||
print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
|
||||
print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique
|
||||
print *, ' N det :', N_det
|
||||
print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~'
|
||||
|
||||
norm_psi = 0.d0
|
||||
do k = 1, N_det
|
||||
norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) &
|
||||
* psi_bilinear_matrix_values(k,i_state)
|
||||
enddo
|
||||
print *, ' initial norm = ', norm_psi
|
||||
|
||||
allocate( Aref(n_det_alpha_unique,n_det_beta_unique) )
|
||||
Aref(:,:) = 0.d0
|
||||
do k = 1, N_det
|
||||
i = psi_bilinear_matrix_rows(k)
|
||||
j = psi_bilinear_matrix_columns(k)
|
||||
Aref(i,j) = psi_bilinear_matrix_values(k,i_state)
|
||||
enddo
|
||||
|
||||
! ---------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
! ---------------------------------------------------------------------------------------
|
||||
! perform a Full SVD
|
||||
|
||||
allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) )
|
||||
allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) )
|
||||
allocate( Vref(n_det_beta_unique,n_det_beta_unique) )
|
||||
allocate( Vtref(n_det_beta_unique,n_det_beta_unique) )
|
||||
call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) &
|
||||
, n_det_alpha_unique, n_det_beta_unique)
|
||||
|
||||
print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ '
|
||||
|
||||
do l = 1, n_det_beta_unique
|
||||
do i = 1, n_det_beta_unique
|
||||
Vref(i,l) = Vtref(l,i)
|
||||
enddo
|
||||
enddo
|
||||
deallocate( Vtref )
|
||||
|
||||
! Truncated rank
|
||||
n_selected = n_svd
|
||||
call write_int(6,n_svd, 'Rank of psi')
|
||||
|
||||
|
||||
!________________________________________________________________________________________________________
|
||||
!
|
||||
! loop over SVD iterations
|
||||
!________________________________________________________________________________________________________
|
||||
|
||||
tol_energy = 1.d0
|
||||
it_svd = 0
|
||||
it_svd_max = 100
|
||||
E_prev = 0.d0
|
||||
|
||||
allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique))
|
||||
allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique))
|
||||
do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) )
|
||||
|
||||
call CPU_TIME(CPU_tbeg_it)
|
||||
call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir)
|
||||
|
||||
it_svd = it_svd + 1
|
||||
|
||||
double precision :: norm
|
||||
norm = 0.d0
|
||||
do j = 1, n_selected
|
||||
norm = norm + Dref(j)*Dref(j)
|
||||
enddo
|
||||
Dref = Dref / dsqrt(norm)
|
||||
|
||||
! print *, '-- Compute H --'
|
||||
call const_H_uv(Uref, Vref, H)
|
||||
|
||||
! H0(i,j) = < u_i v_j | H | u_i v_j >
|
||||
! E0 = < psi_0 | H | psi_0 >
|
||||
E0 = 0.d0
|
||||
do j = 1, n_selected
|
||||
do i = 1, n_selected
|
||||
E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
|
||||
enddo
|
||||
enddo
|
||||
E0 = E0 + nuclear_repulsion
|
||||
! print *,' E0 =', E0
|
||||
|
||||
double precision, allocatable :: eigval0(:)
|
||||
double precision, allocatable :: eigvec0(:,:,:)
|
||||
double precision, allocatable :: H_tmp(:,:,:,:)
|
||||
|
||||
allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) )
|
||||
do l=1,n_selected
|
||||
do k=1,n_selected
|
||||
do j=1,n_selected
|
||||
do i=1,n_selected
|
||||
H_tmp(i,j,k,l) = H(i,j,k,l)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2))
|
||||
eigvec0 = 0.d0
|
||||
|
||||
! print *, ' --- Diag post-SVD --- '
|
||||
call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2)
|
||||
deallocate(H_tmp, eigval0)
|
||||
|
||||
! print *, ' --- SVD --- '
|
||||
Dref = 0.d0
|
||||
call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref)
|
||||
deallocate(eigvec0)
|
||||
|
||||
! print *, ' --- Compute H --- '
|
||||
call const_H_uv(Uref, Vref, H)
|
||||
|
||||
! H0(i,j) = < u_i v_j | H | u_i v_j >
|
||||
! E0 = < psi_0 | H | psi_0 >
|
||||
E0 = 0.d0
|
||||
norm = 0.d0
|
||||
do j = 1, n_det_beta_unique
|
||||
do i = 1, n_det_beta_unique
|
||||
E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j)
|
||||
enddo
|
||||
norm = norm + Dref(j)*Dref(j)
|
||||
enddo
|
||||
E0 = E0 + nuclear_repulsion
|
||||
! print *,' E0 =', E0
|
||||
! print *,' norm =', norm
|
||||
|
||||
! print *, ' --- Perturbation --- '
|
||||
psi_postsvd = 0.d0
|
||||
do i=1,n_selected
|
||||
psi_postsvd(i,i) = Dref(i)
|
||||
enddo
|
||||
|
||||
Ept2 = 0.d0
|
||||
do j=1,n_selected
|
||||
do i=n_selected+1,n_det_alpha_unique
|
||||
ctmp = 0.d0
|
||||
do l=1,n_selected
|
||||
do k=1,n_selected
|
||||
ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
|
||||
enddo
|
||||
enddo
|
||||
psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
|
||||
Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
|
||||
enddo
|
||||
enddo
|
||||
|
||||
do j=n_selected+1,n_det_beta_unique
|
||||
do i=1,n_selected
|
||||
ctmp = 0.d0
|
||||
do l=1,n_selected
|
||||
do k=1,n_selected
|
||||
ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
|
||||
enddo
|
||||
enddo
|
||||
psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
|
||||
Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
|
||||
enddo
|
||||
enddo
|
||||
! do j=n_selected+1,n_det_beta_unique
|
||||
! do i=n_selected+1,n_det_alpha_unique
|
||||
! ctmp = 0.d0
|
||||
! do l=1,n_selected
|
||||
! do k=1,n_selected
|
||||
! ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l)
|
||||
! enddo
|
||||
! enddo
|
||||
! psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
|
||||
! Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) )
|
||||
! enddo
|
||||
! enddo
|
||||
|
||||
tol_energy = dabs(E_prev - E0)
|
||||
print '(I5, 3(3X, F20.10))', it_svd, E0, E0 + Ept2, tol_energy
|
||||
E_prev = E0
|
||||
|
||||
print *, ' --- SVD --- '
|
||||
call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref)
|
||||
|
||||
end do
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref)
|
||||
|
||||
USE OMP_LIB
|
||||
|
||||
integer, intent(in) :: n_selected
|
||||
double precision, intent(in) :: psi_postsvd(n_selected,n_selected)
|
||||
double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique)
|
||||
double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique)
|
||||
double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique))
|
||||
|
||||
integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb
|
||||
double precision :: err0, err_norm, err_tmp, norm_tmp
|
||||
double precision :: overlapU_mat, overlapV_mat, overlapU, overlapV
|
||||
double precision, allocatable :: S_mat(:,:), SxVt(:,:)
|
||||
double precision, allocatable :: U_svd(:,:), V_svd(:,:)
|
||||
double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:)
|
||||
|
||||
mm = n_det_alpha_unique
|
||||
nn = n_det_beta_unique
|
||||
|
||||
allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) )
|
||||
|
||||
U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected)
|
||||
V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected)
|
||||
|
||||
S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected)
|
||||
|
||||
! first compute S_mat x transpose(V_svd)
|
||||
allocate( SxVt(n_selected,nn) )
|
||||
call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 &
|
||||
, S_mat , size(S_mat,1) &
|
||||
, V_svd , size(V_svd,1) &
|
||||
, 0.d0, SxVt, size(SxVt ,1) )
|
||||
deallocate(S_mat)
|
||||
|
||||
! then compute U_svd x SxVt
|
||||
allocate( A_newsvd(mm,nn) )
|
||||
call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 &
|
||||
, U_svd , size(U_svd ,1) &
|
||||
, SxVt , size(SxVt ,1) &
|
||||
, 0.d0, A_newsvd, size(A_newsvd,1) )
|
||||
deallocate( SxVt )
|
||||
|
||||
! perform new SVD
|
||||
allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) )
|
||||
call svd_s( A_newsvd, size(A_newsvd,1), &
|
||||
U_newsvd, size(U_newsvd,1), &
|
||||
D_newsvd, &
|
||||
Vt_newsvd, size(Vt_newsvd,1), &
|
||||
mm, nn)
|
||||
deallocate(A_newsvd)
|
||||
|
||||
allocate( V_newsvd(nn,nn) )
|
||||
do l = 1, nn
|
||||
do j = 1, nn
|
||||
V_newsvd(j,l) = Vt_newsvd(l,j)
|
||||
enddo
|
||||
enddo
|
||||
deallocate(Vt_newsvd)
|
||||
|
||||
do l = 1, n_selected
|
||||
Dref(l) = D_newsvd(l)
|
||||
Uref(1:mm,l) = U_newsvd(1:mm,l)
|
||||
Vref(1:nn,l) = V_newsvd(1:nn,l)
|
||||
enddo
|
||||
|
||||
deallocate(U_newsvd)
|
||||
deallocate(V_newsvd)
|
||||
deallocate(D_newsvd)
|
||||
|
||||
end subroutine perform_newpostSVD
|
||||
|
||||
|
||||
|
||||
subroutine const_H_uv(Uref, Vref, H)
|
||||
|
||||
USE OMP_LIB
|
||||
|
||||
implicit none
|
||||
|
||||
double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique)
|
||||
double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique)
|
||||
double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique)
|
||||
|
||||
integer(bit_kind) :: det1(N_int,2), det2(N_int,2)
|
||||
integer :: i, j, k, l, degree
|
||||
integer :: ii0, jj0, ii, jj, n, m, np, mp
|
||||
integer :: nn0, mm0, na, nb, mm, ind_gs
|
||||
integer :: p,q,r,s
|
||||
double precision :: h12, x
|
||||
|
||||
double precision, allocatable :: H0(:,:,:,:)
|
||||
double precision, allocatable :: H1(:,:,:,:)
|
||||
|
||||
na = n_det_alpha_unique
|
||||
nb = n_det_beta_unique
|
||||
|
||||
allocate( H0(na,nb,na,nb) )
|
||||
allocate( H1(nb,na,nb,na) )
|
||||
|
||||
det1(:,1) = psi_det_alpha_unique(:,1)
|
||||
det2(:,1) = psi_det_alpha_unique(:,1)
|
||||
det1(:,2) = psi_det_beta_unique(:,1)
|
||||
det2(:,2) = psi_det_beta_unique(:,1)
|
||||
call i_H_j(det1, det2, N_int, h12)
|
||||
|
||||
H0 = 0.d0
|
||||
call wall_time(t0)
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) &
|
||||
!$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, &
|
||||
!$OMP N_int,Uref,Vref,H0,H1,H)
|
||||
|
||||
!$OMP DO
|
||||
do l = 1, nb
|
||||
det2(:,2) = psi_det_beta_unique(:,l)
|
||||
do j = 1, nb
|
||||
det1(:,2) = psi_det_beta_unique(:,j)
|
||||
call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int)
|
||||
if (degree > 2) cycle
|
||||
do k = 1, na
|
||||
det2(:,1) = psi_det_alpha_unique(:,k)
|
||||
do i = 1, na
|
||||
det1(:,1) = psi_det_alpha_unique(:,i)
|
||||
call get_excitation_degree(det1,det2,degree,N_int)
|
||||
if ( degree > 2) cycle
|
||||
call i_H_j(det1, det2, N_int, h12)
|
||||
H0(i,j,k,l) = h12
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call wall_time(t1)
|
||||
! (i,j,k,l) -> (j,k,l,p)
|
||||
call DGEMM('T','N', nb * na * nb, na, na, &
|
||||
1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
|
||||
|
||||
! (j,k,l,p) -> (k,l,p,q)
|
||||
call DGEMM('T','N', na * nb * na, nb, nb, &
|
||||
1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3))
|
||||
|
||||
! (k,l,p,q) -> (l,p,q,r)
|
||||
call DGEMM('T','N', nb * na * nb, na, na, &
|
||||
1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3))
|
||||
|
||||
! (l,p,q,r) -> (p,q,r,s)
|
||||
call DGEMM('T','N', na * nb * na, nb, nb, &
|
||||
1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3))
|
||||
call wall_time(t2)
|
||||
print *, t1-t0, t2-t1
|
||||
double precision :: t0, t1, t2
|
||||
|
||||
deallocate(H1,H0)
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user