program buildpsi_diagSVDit_v1 implicit none BEGIN_DOC ! study efficiency for different way to build | psi > END_DOC read_wf = .True. TOUCH read_wf PROVIDE N_int call run() end subroutine run USE OMP_LIB implicit none integer(bit_kind) :: det1(N_int,2), det2(N_int,2) integer :: degree, i_state double precision :: h12 integer :: i, j, k, l, ii, jj, nn, n, na, nb, m, ma, mb double precision :: norm_psi, inv_sqrt_norm_psi double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) double precision :: E0_av, E0_ap, E0pt2 double precision :: err0, err_tmp, e_tmp, E0, overlop, E0_old, tol_energy double precision :: ctmp, htmp, Ept2 double precision :: E0_postsvd, overlop_postsvd double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi double precision :: overlopU, overlopU_mat, overlopV, overlopV_mat, overlop_psi double precision, allocatable :: H(:,:,:,:) double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:) double precision, allocatable :: psi_postsvd(:), coeff_psi_perturb(:) integer :: it_svd, it_svd_max integer :: n_TSVD, n_FSVD, n_selected, n_toselect integer, allocatable :: numalpha_selected(:), numbeta_selected(:) integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_tbeg_step, W_tend_step, W_ir real(kind=8) :: W_tot_time, W_tot_time_it, W_tot_time_step real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it, CPU_tbeg_step, CPU_tend_step real(kind=8) :: CPU_tot_time, CPU_tot_time_it, CPU_tot_time_step real(kind=8) :: speedup, speedup_it, speedup_step integer :: nb_taches !$OMP PARALLEL nb_taches = OMP_GET_NUM_THREADS() !$OMP END PARALLEL call CPU_TIME(CPU_tbeg) call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) i_state = 1 det1(:,1) = psi_det_alpha_unique(:,1) det2(:,1) = psi_det_alpha_unique(:,1) call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) det1(:,2) = psi_det_beta_unique(:,1) det2(:,2) = psi_det_beta_unique(:,1) call get_excitation_degree(det1,det2,degree,N_int) call i_H_j(det1, det2, N_int, h12) ! --------------------------------------------------------------------------------------- ! construct the initial CISD matrix print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' print *, ' CISD matrix:', n_det_alpha_unique,'x',n_det_beta_unique print *, ' N det :', N_det print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' norm_psi = 0.d0 do k = 1, N_det norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & * psi_bilinear_matrix_values(k,i_state) enddo print *, ' initial norm = ', norm_psi allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) Aref(:,:) = 0.d0 do k = 1, N_det i = psi_bilinear_matrix_rows(k) j = psi_bilinear_matrix_columns(k) Aref(i,j) = psi_bilinear_matrix_values(k,i_state) enddo ! --------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------- ! perform a Full SVD allocate( Uref(n_det_alpha_unique,n_det_beta_unique) ) allocate( Dref(n_det_beta_unique) ) allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & , n_det_alpha_unique, n_det_beta_unique) print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' print *, ' --- First SVD: ok --- ' print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' do l = 1, n_det_beta_unique do i = 1, n_det_beta_unique Vref(i,l) = Vtref(l,i) enddo enddo deallocate( Vtref ) ! check Truncate SVD error err0 = 0.d0 do j = 1, n_det_beta_unique do i = 1, n_det_alpha_unique err_tmp = 0.d0 do l = 1, n_det_beta_unique err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) enddo err_tmp = Aref(i,j) - err_tmp err0 += err_tmp * err_tmp enddo enddo print *, ' Full SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) ! --------------------------------------------------------------------------------------- nn = n_det_beta_unique ! --------------------------------------------------------------------------------------- ! numerote vectors ! Full rank n_FSVD = nn * nn print*, ' Full psi space rank = ', n_FSVD ! Truncated rank n_TSVD = 20 print*, ' initial psi space rank = ', n_TSVD ! check Truncate SVD error err0 = 0.d0 do j = 1, n_det_beta_unique do i = 1, n_det_alpha_unique err_tmp = 0.d0 do l = 1, n_TSVD err_tmp = err_tmp + Dref(l) * Uref(i,l) * Vref(j,l) enddo err_tmp = Aref(i,j) - err_tmp err0 += err_tmp * err_tmp enddo enddo deallocate( Aref ) print *, ' Truncate SVD err (%) = ', 100.d0 * dsqrt(err0/norm_psi) n_selected = n_TSVD * n_TSVD allocate( numalpha_selected(n_selected) , numbeta_selected(n_selected) ) k = 0 ! first diagonal bloc do i = 1, n_TSVD do j = 1, n_TSVD k = k + 1 numalpha_selected(k) = j numbeta_selected (k) = i enddo enddo ! check size if( k.ne.n_selected ) then print*, ' error in numeroting: selected ' print*, ' k = ', k print*, ' n_selected = ', n_selected stop endif ! perturbative space rank k = 0 n_toselect = n_FSVD - n_selected print*, ' perturbative psi space rank = ', n_toselect allocate( numalpha_toselect(n_toselect) , numbeta_toselect(n_toselect) ) ! nondiagonal blocs do i = 1, n_TSVD do j = n_TSVD+1, nn k = k + 1 numalpha_toselect(k) = j numbeta_toselect (k) = i enddo enddo do j = 1, n_TSVD do i = n_TSVD+1, nn k = k + 1 numalpha_toselect(k) = j numbeta_toselect (k) = i enddo enddo ! diagonal bloc do i = n_TSVD+1, nn do j = n_TSVD+1, nn k = k + 1 numalpha_toselect(k) = j numbeta_toselect (k) = i enddo enddo ! check size if( k.ne.n_toselect ) then print*, ' error in numeroting: to select ' print*, ' k = ', k print*, ' n_toselect = ', n_toselect stop endif ! --------------------------------------------------------------------------------------- !________________________________________________________________________________________________________ ! ! loop over SVD iterations !________________________________________________________________________________________________________ E0_old = 0.d0 tol_energy = 1.d0 it_svd = 0 it_svd_max = 10 do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) call CPU_TIME(CPU_tbeg_it) call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) it_svd = it_svd + 1 print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' print*, ' ' print*, ' ' print*, ' ' print*, ' iteration', it_svd norm_coeff_psi = 0.d0 do j = 1, n_TSVD norm_coeff_psi += Dref(j) * Dref(j) enddo inv_sqrt_norm_coeff_psi = 1.d0 / dsqrt(norm_coeff_psi) do j = 1, n_TSVD Dref(j) = Dref(j) * inv_sqrt_norm_coeff_psi enddo allocate( H0(n_selected,n_selected) ) call const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) ! avant SVD E0 = 0.d0 do i = 1, n_TSVD ii = (i-1)*n_TSVD + i do j = 1, n_TSVD jj = (j-1)*n_TSVD + j E0 += Dref(j) * H0(jj,ii) * Dref(i) enddo enddo E0_av = E0 + nuclear_repulsion print *,' E0 (avant SVD) =', E0_av print *, '' allocate( psi_postsvd(n_selected) ) print *, ' --- Diag post-SVD --- ' call diag_postsvd(n_TSVD, n_selected, Dref, H0, E0_postsvd, overlop_postsvd, psi_postsvd) print*, ' postsvd energy = ', E0_postsvd deallocate( H0 ) ! post-SVD !Dref(:) = 0.d0 call perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) deallocate( psi_postsvd ) print *, '' print *, '' print *, ' --- Compute H --- ' allocate( H0(n_selected,n_selected), Hdiag(n_toselect), Hkl(n_selected,n_toselect) ) call const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0) E0 = 0.d0 norm_coeff_psi = 0.d0 do i = 1, n_TSVD ii = (i-1)*n_TSVD + i do j = 1, n_TSVD jj = (j-1)*n_TSVD + j E0 += Dref(j) * H0(jj,ii) * Dref(i) enddo norm_coeff_psi += Dref(i) * Dref(i) enddo E0_ap = E0 + nuclear_repulsion print *,' E0 (apres SVD) =', E0_ap deallocate(H0) print *, ' --- Perturbation --- ' allocate( coeff_psi_perturb(n_toselect) ) ept2 = 0.d0 do ii = 1, n_toselect ctmp = 0.d0 do i = 1, n_TSVD l = (i-1)*n_TSVD + i ctmp += Dref(i) * Hkl(l,ii) enddo coeff_psi_perturb(ii) = ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) ept2 += ctmp*ctmp / ( E0_ap - (Hdiag(ii)+nuclear_repulsion) ) enddo E0pt2 = E0_ap + ept2 print *, ' perturb energy = ', E0pt2, ept2 tol_energy = 100.d0 * dabs(E0pt2-E0_old) / dabs(E0pt2) E0_old = E0pt2 deallocate( Hdiag, Hkl) print *, ' --- SVD --- ' call perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) deallocate( coeff_psi_perturb ) write(11,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0pt2 call CPU_TIME(CPU_tend_it) call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) speedup_it = CPU_tot_time_it / W_tot_time_it print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & & 3X, "CPU time = ", 1PE10.3, " min.", /, & & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it end do !________________________________________________________________________________________________________ !________________________________________________________________________________________________________ deallocate( Uref, Vref, Dref ) call CPU_TIME(CPU_tend) call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) CPU_tot_time = CPU_tend - CPU_tbeg W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) speedup = CPU_tot_time / W_tot_time print *,' ___________________________________________________________________' print '(//,3X,"Execution avec ",i2," threads")',nb_taches print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & & 3X, "CPU time = ", 1PE10.3, " min.", /, & & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup print *,' ___________________________________________________________________' end subroutine const_psihpsi_postsvd_H0_modif(n_selected, numalpha_selected, numbeta_selected, Uref, Vref, H0) USE OMP_LIB implicit none integer, intent(in) :: n_selected integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) double precision, intent(out) :: H0(n_selected,n_selected) integer(bit_kind) :: det1(N_int,2), det2(N_int,2) integer :: i, j, k, l, degree integer :: n, na, nb, m , ma, mb double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:) H0(:,:) = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree) & !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & !$OMP N_int,n_selected,Uref,Vref,H0,Htot,H1,numalpha_selected,numbeta_selected ) !$OMP SINGLE allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) Htot(:,:,:,:) = 0.d0 !$OMP END SINGLE !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) do i = 1, n_det_alpha_unique do k = 1, n_det_alpha_unique det1(:,1) = psi_det_alpha_unique(:,i) det2(:,1) = psi_det_alpha_unique(:,k) call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) if (degree .gt. 2) then cycle endif do j = 1, n_det_beta_unique det1(:,2) = psi_det_beta_unique(:,j) do l = 1, n_det_beta_unique det2(:,2) = psi_det_beta_unique(:,l) call get_excitation_degree(det1,det2,degree,N_int) if (degree .gt. 2) then cycle endif ! !!! call i_H_j(det1, det2, N_int, Htot(k,l,i,j)) ! !!! enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) ) H1(:,:,:) = 0.d0 !$OMP END SINGLE !$OMP DO do n = 1, n_selected na = numalpha_selected(n) nb = numbeta_selected (n) do i = 1, n_det_alpha_unique do j = 1, n_det_beta_unique do l = 1, n_det_beta_unique do k = 1, n_det_alpha_unique H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) enddo enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE deallocate( Htot ) !$OMP END SINGLE !$OMP DO do m = 1, n_selected ma = numalpha_selected(m) mb = numbeta_selected (m) do n = 1, n_selected do k = 1, n_det_alpha_unique do l = 1, n_det_beta_unique H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb) enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE deallocate( H1 ) !$OMP END SINGLE !$OMP END PARALLEL return end subroutine const_psihpsi_postsvd_H0_modif subroutine diag_postsvd(n_TSVD, n_selected, Dref, H0, E0, overlop, psi_postsvd ) USE OMP_LIB implicit none integer, intent(in) :: n_TSVD, n_selected double precision, intent(in) :: H0(n_selected,n_selected) double precision, intent(in) :: Dref(n_det_beta_unique) double precision, intent(out) :: E0, overlop, psi_postsvd(n_selected) integer(bit_kind) :: det1(N_int,2), det2(N_int,2) integer :: i, j, k, l, degree integer :: ii0, jj0, ii, jj, n, m, np, mp integer :: nn0, mm0, nn, mm, ind_gs double precision :: h12, x double precision, allocatable :: eigvec0(:,:), eigval0(:), check_ov(:) ! diagonalize H0 allocate( eigvec0(n_selected,n_selected), eigval0(n_selected) ) call lapack_diag(eigval0, eigvec0, H0, n_selected, n_selected) ! get the postsvd ground state allocate( check_ov(n_selected) ) do l = 1, n_selected overlop = 0.d0 do i = 1, n_TSVD ii = n_TSVD*(i-1) + i overlop = overlop + eigvec0(ii,l) * Dref(i) enddo check_ov(l) = dabs(overlop) enddo ind_gs = MAXLOC( check_ov, DIM=1 ) !ind_gs = 1 overlop = check_ov(ind_gs) E0 = eigval0(ind_gs)+nuclear_repulsion psi_postsvd = eigvec0(:,ind_gs) deallocate( check_ov, eigvec0, eigval0 ) return end subroutine diag_postsvd subroutine perform_newSVD(n_toselect, numalpha_toselect, numbeta_toselect, coeff_psi_perturb, Uref, Vref, Dref) USE OMP_LIB integer, intent(in) :: n_toselect integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) double precision, intent(in) :: coeff_psi_perturb(n_toselect) double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) double precision, intent(inout) :: Dref(n_det_beta_unique) integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb double precision :: err0, err_norm, err_tmp, norm_tmp double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV double precision, allocatable :: S_mat(:,:), SxVt(:,:) double precision, allocatable :: U_svd(:,:), V_svd(:,:) double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) mm = n_det_alpha_unique nn = n_det_beta_unique allocate( U_svd(mm,nn) , V_svd(nn,nn) , S_mat(nn,nn) ) U_svd(:,:) = Uref(:,:) V_svd(:,:) = Vref(:,:) S_mat(:,:) = 0.d0 norm_tmp = 0.d0 do j = 1, n_det_beta_unique S_mat(j,j) = Dref(j) norm_tmp += S_mat(j,j)*S_mat(j,j) enddo do l = 1, n_toselect na = numalpha_toselect(l) nb = numbeta_toselect (l) S_mat(na,nb) = coeff_psi_perturb(l) norm_tmp += S_mat(na,nb)*S_mat(na,nb) enddo print*, ' norm de S_mat =', norm_tmp !norm_tmp = 1.d0/dsqrt(norm_tmp) !do i = 1, nn ! do j = 1, nn ! S_mat(j,i) = S_mat(j,i) * norm_tmp ! enddo !enddo ! first compute S_mat x transpose(V_svd) allocate( SxVt(nn,nn) ) call dgemm( 'N', 'T', nn, nn, nn, 1.d0 & , S_mat , size(S_mat,1) & , V_svd , size(V_svd,1) & , 0.d0, SxVt, size(SxVt ,1) ) ! then compute U_svd x SxVt allocate( A_newsvd(mm,nn) ) call dgemm( 'N', 'N', mm, nn, nn, 1.d0 & , U_svd , size(U_svd ,1) & , SxVt , size(SxVt ,1) & , 0.d0, A_newsvd, size(A_newsvd,1) ) deallocate( SxVt ) ! perform new SVD allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) print *, ' +++ new perturbative SVD is performed +++ ' allocate( V_newsvd(nn,nn) ) do l = 1, nn do j = 1, nn V_newsvd(j,l) = Vt_newsvd(l,j) enddo enddo ! check SVD error err0 = 0.d0 err_norm = 0.d0 do j = 1, nn do i = 1, mm err_tmp = 0.d0 do l = 1, nn err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) enddo err_tmp = A_newsvd(i,j) - err_tmp err0 += err_tmp * err_tmp err_norm += A_newsvd(i,j) * A_newsvd(i,j) enddo enddo print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' do l = 1, nn Dref(l) = D_newsvd(l) Uref(:,l) = U_newsvd(:,l) Vref(:,l) = V_newsvd(:,l) enddo !print *, Dref(:) overlopU_mat = 0.d0 overlopV_mat = 0.d0 do i = 1, nn do j = 1, nn overlopU = 0.d0 do ii = 1, mm overlopU += Uref(ii,j) * Uref(ii,i) enddo overlopU_mat += overlopU overlopV = 0.d0 do ii = 1, nn overlopV += Vref(ii,j) * Vref(ii,i) enddo overlopV_mat += overlopV enddo enddo print *, 'overlop U =', overlopU_mat print *, 'overlop V =', overlopV_mat deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) return end subroutine perform_newSVD subroutine perform_newpostSVD(n_TSVD, n_selected, psi_postsvd, Uref, Vref, Dref) ! TODO: general case wherer we we don't consider the first trucated block USE OMP_LIB integer, intent(in) :: n_TSVD, n_selected double precision, intent(in) :: psi_postsvd(n_selected) double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_beta_unique) double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) double precision, intent(inout) :: Dref(n_det_beta_unique) integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb double precision :: err0, err_norm, err_tmp, norm_tmp double precision :: overlopU_mat, overlopV_mat, overlopU, overlopV double precision, allocatable :: S_mat(:,:), SxVt(:,:) double precision, allocatable :: U_svd(:,:), V_svd(:,:) double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) mm = n_det_alpha_unique nn = n_det_beta_unique allocate( U_svd(mm,n_TSVD) , V_svd(nn,n_TSVD) , S_mat(n_TSVD,n_TSVD) ) U_svd(:,:) = Uref(:,1:n_TSVD) V_svd(:,:) = Vref(:,1:n_TSVD) S_mat(:,:) = 0.d0 do i = 1, n_TSVD ii = (i-1)*n_TSVD do j = 1, n_TSVD jj = ii + j S_mat(j,i) = psi_postsvd(jj) enddo enddo ! first compute S_mat x transpose(V_svd) allocate( SxVt(n_TSVD,nn) ) call dgemm( 'N', 'T', n_TSVD, nn, n_TSVD, 1.d0 & , S_mat , size(S_mat,1) & , V_svd , size(V_svd,1) & , 0.d0, SxVt, size(SxVt ,1) ) ! then compute U_svd x SxVt allocate( A_newsvd(mm,nn) ) call dgemm( 'N', 'N', mm, nn, n_TSVD, 1.d0 & , U_svd , size(U_svd ,1) & , SxVt , size(SxVt ,1) & , 0.d0, A_newsvd, size(A_newsvd,1) ) deallocate( SxVt ) ! perform new SVD allocate( U_newsvd(mm,nn), Vt_newsvd(nn,nn), D_newsvd(nn) ) call svd_s( A_newsvd, size(A_newsvd,1), U_newsvd, size(U_newsvd,1), D_newsvd, Vt_newsvd, size(Vt_newsvd,1), mm, nn) print *, ' +++ new SVD is performed +++ ' allocate( V_newsvd(nn,nn) ) do l = 1, nn do j = 1, nn V_newsvd(j,l) = Vt_newsvd(l,j) enddo enddo ! check SVD error err0 = 0.d0 err_norm = 0.d0 do j = 1, nn do i = 1, mm err_tmp = 0.d0 do l = 1, n_TSVD err_tmp = err_tmp + D_newsvd(l) * U_newsvd(i,l) * V_newsvd(j,l) enddo err_tmp = A_newsvd(i,j) - err_tmp err0 += err_tmp * err_tmp err_norm += A_newsvd(i,j) * A_newsvd(i,j) enddo enddo print *, ' SVD err (%) = ', 100.d0 * dsqrt(err0/err_norm) print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' do l = 1, n_TSVD Dref(l) = D_newsvd(l) Uref(:,l) = U_newsvd(:,l) Vref(:,l) = V_newsvd(:,l) enddo ! print *, Dref(:) overlopU_mat = 0.d0 overlopV_mat = 0.d0 do i = 1, nn do j = 1, nn overlopU = 0.d0 do ii = 1, mm overlopU += Uref(ii,j) * Uref(ii,i) enddo overlopU_mat += overlopU overlopV = 0.d0 do ii = 1, nn overlopV += Vref(ii,j) * Vref(ii,i) enddo overlopV_mat += overlopV enddo enddo print *, 'overlop U =', overlopU_mat print *, 'overlop V =', overlopV_mat deallocate( U_newsvd, V_newsvd, Vt_newsvd, D_newsvd, A_newsvd ) return end subroutine perform_newpostSVD subroutine const_Hdiag_Hkl_H0(n_selected, n_toselect, Uref, Vref, numalpha_selected, numbeta_selected & , numalpha_toselect, numbeta_toselect, Hdiag, Hkl, H0) implicit none integer, intent(in) :: n_selected, n_toselect integer, intent(in) :: numalpha_selected(n_selected), numbeta_selected(n_selected) integer, intent(in) :: numalpha_toselect(n_toselect), numbeta_toselect(n_toselect) double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) double precision, intent(out) :: Hdiag(n_toselect), Hkl(n_selected,n_toselect), H0(n_selected,n_selected) integer(bit_kind) :: det1(N_int,2) integer(bit_kind) :: det2(N_int,2) integer :: degree integer :: i, j, k, l integer :: n, na, nb, m, ma, mb double precision, allocatable :: Htot(:,:,:,:), H1(:,:,:), H2(:,:,:) Hdiag(:) = 0.d0 Hkl(:,:) = 0.d0 H0(:,:) = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,j,k,l,n,na,nb,m,ma,mb,det1,det2,degree) & !$OMP SHARED(n_det_alpha_unique,n_det_beta_unique,psi_det_alpha_unique,psi_det_beta_unique, & !$OMP N_int,n_selected,n_toselect,Uref,Vref,H0,Htot,H1,H2,Hdiag,Hkl, & !$OMP numalpha_selected,numbeta_selected,numalpha_toselect,numbeta_toselect ) !$OMP SINGLE allocate( Htot(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique) ) Htot(:,:,:,:) = 0.d0 !$OMP END SINGLE !$OMP DO COLLAPSE(2) SCHEDULE(DYNAMIC,20) do i = 1, n_det_alpha_unique do k = 1, n_det_alpha_unique det1(:,1) = psi_det_alpha_unique(:,i) det2(:,1) = psi_det_alpha_unique(:,k) call get_excitation_degree_spin(det1(1,1),det2(1,1),degree,N_int) if (degree .gt. 2) then cycle endif do j = 1, n_det_beta_unique det1(:,2) = psi_det_beta_unique(:,j) do l = 1, n_det_beta_unique det2(:,2) = psi_det_beta_unique(:,l) call get_excitation_degree(det1,det2,degree,N_int) if (degree .gt. 2) then cycle endif ! !!! call i_H_j(det1, det2, N_int, Htot(k,l,i,j)) ! !!! enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE print *, ' *** Htot is calculated *** ' allocate( H1(n_det_alpha_unique,n_det_beta_unique,n_selected) ) H1(:,:,:) = 0.d0 !$OMP END SINGLE !$OMP DO do n = 1, n_selected na = numalpha_selected(n) nb = numbeta_selected (n) do i = 1, n_det_alpha_unique do j = 1, n_det_beta_unique do l = 1, n_det_beta_unique do k = 1, n_det_alpha_unique H1(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) enddo enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE allocate( H2(n_det_alpha_unique,n_det_beta_unique,n_toselect) ) H2(:,:,:) = 0.d0 !$OMP END SINGLE !$OMP DO do n = 1, n_toselect na = numalpha_toselect(n) nb = numbeta_toselect (n) do i = 1, n_det_alpha_unique do j = 1, n_det_beta_unique do l = 1, n_det_beta_unique do k = 1, n_det_alpha_unique H2(k,l,n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) Hdiag(n) += Htot(k,l,i,j) * Uref(i,na) * Vref(j,nb) * Uref(k,na) * Vref(l,nb) enddo enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE deallocate( Htot ) !$OMP END SINGLE !$OMP DO do m = 1, n_selected ma = numalpha_selected(m) mb = numbeta_selected (m) do n = 1, n_toselect do k = 1, n_det_alpha_unique do l = 1, n_det_beta_unique Hkl(m,n) += H2(k,l,n) * Uref(k,ma) * Vref(l,mb) enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE deallocate( H2 ) !$OMP END SINGLE !$OMP DO do m = 1, n_selected ma = numalpha_selected(m) mb = numbeta_selected (m) do n = 1, n_selected do k = 1, n_det_alpha_unique do l = 1, n_det_beta_unique H0(m,n) += H1(k,l,n) * Uref(k,ma) * Vref(l,mb) enddo enddo enddo enddo !$OMP END DO !$OMP SINGLE deallocate( H1 ) !$OMP END SINGLE !$OMP END PARALLEL return end subroutine const_Hdiag_Hkl_H0