program buildpsi_diagSVDit_Anthony_v1 implicit none BEGIN_DOC ! perturbative approach to build psi_postsvd END_DOC read_wf = .True. TOUCH read_wf PROVIDE N_int call run() end subroutine run USE OMP_LIB implicit none integer(bit_kind) :: det1(N_int,2), det2(N_int,2) integer :: degree, i_state double precision :: h12 integer :: i, j, k, l, ii, jj, na, nb double precision :: norm_psi, inv_sqrt_norm_psi double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) double precision :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy double precision :: ctmp, htmp, Ept2 double precision :: E0_postsvd, overlap_postsvd, E_prev double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi double precision :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:) double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:) integer :: n_TSVD, n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max integer :: n_selected2 integer, allocatable :: numalpha_selected(:), numbeta_selected(:) integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir real(kind=8) :: W_tot_time, W_tot_time_it real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it real(kind=8) :: CPU_tot_time, CPU_tot_time_it real(kind=8) :: speedup, speedup_it integer :: nb_taches !$OMP PARALLEL nb_taches = OMP_GET_NUM_THREADS() !$OMP END PARALLEL call CPU_TIME(CPU_tbeg) call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) i_state = 1 det1(:,1) = psi_det_alpha_unique(:,1) det2(:,1) = psi_det_alpha_unique(:,1) det1(:,2) = psi_det_beta_unique(:,1) det2(:,2) = psi_det_beta_unique(:,1) call i_H_j(det1, det2, N_int, h12) ! --------------------------------------------------------------------------------------- ! construct the initial CISD matrix print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique print *, ' N det :', N_det print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' norm_psi = 0.d0 do k = 1, N_det norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & * psi_bilinear_matrix_values(k,i_state) enddo print *, ' initial norm = ', norm_psi allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) Aref(:,:) = 0.d0 do k = 1, N_det i = psi_bilinear_matrix_rows(k) j = psi_bilinear_matrix_columns(k) Aref(i,j) = psi_bilinear_matrix_values(k,i_state) enddo ! --------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------- ! perform a Full SVD allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) !allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) ) allocate( Dref(min(n_det_beta_unique,n_det_alpha_unique)) ) allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & , n_det_alpha_unique, n_det_beta_unique) print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' do l = 1, n_det_beta_unique do i = 1, n_det_beta_unique Vref(i,l) = Vtref(l,i) enddo enddo deallocate( Vtref ) ! Truncated rank n_TSVD = 20 n_selected = n_TSVD call write_int(6,n_TSVD, 'Rank of psi') !________________________________________________________________________________________________________ ! ! loop over SVD iterations !________________________________________________________________________________________________________ tol_energy = 1.d0 it_svd = 0 it_svd_max = 100 E_prev = 0.d0 allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique)) allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique)) do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-8 ) ) call CPU_TIME(CPU_tbeg_it) call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) it_svd = it_svd + 1 print*, '+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +' print*, ' ' print*, ' ' print*, ' ' print*, ' iteration', it_svd double precision :: norm norm = 0.d0 do j = 1, n_selected norm = norm + Dref(j)*Dref(j) enddo Dref = Dref / dsqrt(norm) call const_H_uv(Uref, Vref, H) E0 = 0.d0 do j = 1, n_selected do i = 1, n_selected E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) enddo enddo double precision :: E0_av, E0_ap, E0pt2 E0_av = E0 + nuclear_repulsion print *,' E0 (avant SVD) =', E0_av print *, '' double precision, allocatable :: eigval0(:) double precision, allocatable :: eigvec0(:,:,:) double precision, allocatable :: H_tmp(:,:,:,:) allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) ) do l=1,n_selected do k=1,n_selected do j=1,n_selected do i=1,n_selected H_tmp(i,j,k,l) = H(i,j,k,l) enddo enddo enddo enddo allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2)) eigvec0 = 0.d0 call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2) E0_postsvd = eigval0(1) + nuclear_repulsion print*, ' postsvd energy = ', E0_postsvd deallocate(H_tmp, eigval0) Dref = 0.d0 call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref) deallocate(eigvec0) print *, ' --- Compute H --- ' call const_H_uv(Uref, Vref, H) E0 = 0.d0 norm = 0.d0 do j = 1, n_det_beta_unique do i = 1, n_det_beta_unique E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) enddo norm = norm + Dref(j)*Dref(j) enddo E0_ap = E0 + nuclear_repulsion print *,' E0 (apres SVD) =', E0_ap psi_postsvd = 0.d0 do i=1,n_selected psi_postsvd(i,i) = Dref(i) enddo E0 = E0_ap Ept2 = 0.d0 do j=1,n_selected do i=n_selected+1,n_det_alpha_unique ctmp = 0.d0 do l=1,n_selected do k=1,n_selected ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) enddo enddo psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) enddo enddo do j=n_selected+1,n_det_beta_unique do i=1,n_selected ctmp = 0.d0 do l=1,n_selected do k=1,n_selected ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) enddo enddo psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) enddo enddo do j=n_selected+1,n_det_beta_unique do i=n_selected+1,n_det_alpha_unique ctmp = 0.d0 do l=1,n_selected do k=1,n_selected ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) enddo enddo psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) enddo enddo E0pt2 = E0_ap + ept2 print *, ' perturb energy = ', E0pt2, ept2 tol_energy = dabs(E_prev - E0_ap) E_prev = E0_ap call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref) write(44,'(i5,4x,4(f22.15,2x))') it_svd, E0_av, E0_postsvd, E0_ap, E0_ap+Ept2 call CPU_TIME(CPU_tend_it) call SYSTEM_CLOCK(COUNT=W_tend_it, COUNT_RATE=W_ir) CPU_tot_time_it = CPU_tend_it - CPU_tbeg_it W_tot_time_it = real(W_tend_it-W_tbeg_it, kind=8) / real(W_ir, kind=8) speedup_it = CPU_tot_time_it / W_tot_time_it print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & & 3X, "CPU time = ", 1PE10.3, " min.", /, & & 3X, "speed up = ", 1PE10.3,//)', W_tot_time_it/60.d0, CPU_tot_time_it/60.d0, speedup_it end do deallocate( Uref, Vref, Dref ) call CPU_TIME(CPU_tend) call SYSTEM_CLOCK(COUNT=W_tend, COUNT_RATE=W_ir) CPU_tot_time = CPU_tend - CPU_tbeg W_tot_time = real(W_tend - W_tbeg, kind=8) / real(W_ir, kind=8) speedup = CPU_tot_time / W_tot_time print *,' ___________________________________________________________________' print '(//,3X,"Execution avec ",i2," threads")',nb_taches print '(//, 3X, "elapsed time = ", 1PE10.3, " min.", /, & & 3X, "CPU time = ", 1PE10.3, " min.", /, & & 3X, "speed up = ", 1PE10.3 ,// )', W_tot_time/60.d0, CPU_tot_time/60.d0, speedup print *,' ___________________________________________________________________' end subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref) USE OMP_LIB integer, intent(in) :: n_selected double precision, intent(in) :: psi_postsvd(n_selected,n_selected) double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) double precision, intent(inout) :: Dref(min(n_det_beta_unique,n_det_alpha_unique)) integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb double precision :: err0, err_norm, err_tmp, norm_tmp double precision :: overlapU_mat, overlapV_mat, overlapU, overlapV double precision, allocatable :: S_mat(:,:), SxVt(:,:) double precision, allocatable :: U_svd(:,:), V_svd(:,:) double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) mm = n_det_alpha_unique nn = n_det_beta_unique allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) ) U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected) V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected) S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected) ! first compute S_mat x transpose(V_svd) allocate( SxVt(n_selected,nn) ) call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 & , S_mat , size(S_mat,1) & , V_svd , size(V_svd,1) & , 0.d0, SxVt, size(SxVt ,1) ) deallocate(S_mat) ! then compute U_svd x SxVt allocate( A_newsvd(mm,nn) ) call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 & , U_svd , size(U_svd ,1) & , SxVt , size(SxVt ,1) & , 0.d0, A_newsvd, size(A_newsvd,1) ) deallocate( SxVt ) ! perform new SVD allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(min(mm,nn)) ) call svd_s( A_newsvd, size(A_newsvd,1), & U_newsvd, size(U_newsvd,1), & D_newsvd, & Vt_newsvd, size(Vt_newsvd,1), & mm, nn) deallocate(A_newsvd) allocate( V_newsvd(nn,nn) ) do l = 1, nn do j = 1, nn V_newsvd(j,l) = Vt_newsvd(l,j) enddo enddo deallocate(Vt_newsvd) !do l = 1, n_selected ! Dref(l) = D_newsvd(l) ! Uref(1:mm,l) = U_newsvd(1:mm,l) ! Vref(1:nn,l) = V_newsvd(1:nn,l) !enddo Dref(1:n_selected) = D_newsvd(1:n_selected) Uref(1:mm,1:mm) = U_newsvd(1:mm,1:mm) Vref(1:nn,1:nn) = V_newsvd(1:nn,1:nn) deallocate(U_newsvd) deallocate(V_newsvd) deallocate(D_newsvd) end subroutine perform_newpostSVD subroutine const_H_uv(Uref, Vref, H) USE OMP_LIB implicit none double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique) integer(bit_kind) :: det1(N_int,2), det2(N_int,2) integer :: i, j, k, l, degree integer :: ii0, jj0, ii, jj, n, m, np, mp integer :: nn0, mm0, na, nb, mm, ind_gs integer :: p,q,r,s double precision :: h12, x double precision, allocatable :: H0(:,:,:,:) double precision, allocatable :: H1(:,:,:,:) na = n_det_alpha_unique nb = n_det_beta_unique allocate( H0(na,nb,na,nb) ) allocate( H1(nb,na,nb,na) ) H0 = 0.d0 call wall_time(t0) !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) & !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & !$OMP N_int,Uref,Vref,H0,H1,H) !$OMP DO do l = 1, nb det2(:,2) = psi_det_beta_unique(:,l) do j = 1, nb det1(:,2) = psi_det_beta_unique(:,j) call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) if (degree > 2) cycle do k = 1, na det2(:,1) = psi_det_alpha_unique(:,k) do i = 1, na det1(:,1) = psi_det_alpha_unique(:,i) call get_excitation_degree(det1,det2,degree,N_int) if ( degree > 2) cycle call i_H_j(det1, det2, N_int, h12) H0(i,j,k,l) = h12 enddo enddo enddo enddo !$OMP END DO !$OMP END PARALLEL call wall_time(t1) ! (i,j,k,l) -> (j,k,l,p) call DGEMM('T','N', nb * na * nb, na, na, & 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) ! (j,k,l,p) -> (k,l,p,q) call DGEMM('T','N', na * nb * na, nb, nb, & 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3)) ! (k,l,p,q) -> (l,p,q,r) call DGEMM('T','N', nb * na * nb, na, na, & 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) ! (l,p,q,r) -> (p,q,r,s) call DGEMM('T','N', na * nb * na, nb, nb, & 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) call wall_time(t2) print *, t1-t0, t2-t1 double precision :: t0, t1, t2 deallocate(H1,H0) end