From 4372cc84030afbf45c91bf3cd6ee227aaf8ed3bc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 20 May 2021 16:31:39 +0200 Subject: [PATCH] Added 1st version of SVD optimization --- devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f | 418 ++++++++++++++++++ 1 file changed, 418 insertions(+) create mode 100644 devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f diff --git a/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f b/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f new file mode 100644 index 0000000..18661de --- /dev/null +++ b/devel/svdwf/buildpsi_diagSVDit_modif_v2.irp.f @@ -0,0 +1,418 @@ +program buildpsi_diagSVDit_modif_v2 + + implicit none + + BEGIN_DOC + ! perturbative approach to build psi_postsvd + END_DOC + + read_wf = .True. + TOUCH read_wf + + PROVIDE N_int + + call run() +end + + +subroutine run + + USE OMP_LIB + + implicit none + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: degree, i_state + + integer :: i, j, k, l, ii, jj, na, nb + + double precision :: norm_psi, inv_sqrt_norm_psi + double precision, allocatable :: Uref(:,:), Dref(:), Vtref(:,:), Aref(:,:), Vref(:,:) + + double precision :: err0, err_tmp, e_tmp, E0, overlap, E0_old, tol_energy + double precision :: ctmp, htmp, Ept2 + double precision :: E0_postsvd, overlap_postsvd, E_prev + double precision :: norm_coeff_psi, inv_sqrt_norm_coeff_psi + double precision :: overlapU, overlapU_mat, overlapV, overlapV_mat, overlap_psi + + double precision, allocatable :: Hdiag(:), Hkl(:,:), H0(:,:), H(:,:,:,:) + double precision, allocatable :: psi_postsvd(:,:), coeff_psi_perturb(:) + + integer :: n_FSVD, n_selected, n_toselect, n_tmp, it_svd, it_svd_max + integer :: n_selected2 + integer, allocatable :: numalpha_selected(:), numbeta_selected(:) + integer, allocatable :: numalpha_toselect(:), numbeta_toselect(:) + integer, allocatable :: numalpha_tmp(:), numbeta_tmp(:) + + integer(kind=8) :: W_tbeg, W_tend, W_tbeg_it, W_tend_it, W_ir + real(kind=8) :: W_tot_time, W_tot_time_it + real(kind=8) :: CPU_tbeg, CPU_tend, CPU_tbeg_it, CPU_tend_it + real(kind=8) :: CPU_tot_time, CPU_tot_time_it + real(kind=8) :: speedup, speedup_it + integer :: nb_taches + + !$OMP PARALLEL + nb_taches = OMP_GET_NUM_THREADS() + !$OMP END PARALLEL + + call CPU_TIME(CPU_tbeg) + call SYSTEM_CLOCK(COUNT=W_tbeg, COUNT_RATE=W_ir) + + i_state = 1 + + ! --------------------------------------------------------------------------------------- + ! construct the initial CISD matrix + + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + print *, ' CI matrix:', n_det_alpha_unique,'x',n_det_beta_unique + print *, ' N det :', N_det + print *, ' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~' + + norm_psi = 0.d0 + do k = 1, N_det + norm_psi = norm_psi + psi_bilinear_matrix_values(k,i_state) & + * psi_bilinear_matrix_values(k,i_state) + enddo + print *, ' initial norm = ', norm_psi + + allocate( Aref(n_det_alpha_unique,n_det_beta_unique) ) + Aref(:,:) = 0.d0 + do k = 1, N_det + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + Aref(i,j) = psi_bilinear_matrix_values(k,i_state) + enddo + + ! --------------------------------------------------------------------------------------- + + + ! --------------------------------------------------------------------------------------- + ! perform a Full SVD + + allocate( Uref(n_det_alpha_unique,n_det_alpha_unique) ) + allocate( Dref(max(n_det_beta_unique,n_det_alpha_unique)) ) + allocate( Vref(n_det_beta_unique,n_det_beta_unique) ) + allocate( Vtref(n_det_beta_unique,n_det_beta_unique) ) + call svd_s(Aref, size(Aref,1), Uref, size(Uref,1), Dref, Vtref, size(Vtref,1) & + , n_det_alpha_unique, n_det_beta_unique) + + print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ' + + do l = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + Vref(i,l) = Vtref(l,i) + enddo + enddo + deallocate( Vtref ) + + ! Truncated rank + n_selected = n_svd + call write_int(6,n_svd, 'Rank of psi') + + + !________________________________________________________________________________________________________ + ! + ! loop over SVD iterations + !________________________________________________________________________________________________________ + + tol_energy = 1.d0 + it_svd = 0 + it_svd_max = 100 + E_prev = 0.d0 + + allocate(H(n_det_alpha_unique,n_det_beta_unique,n_det_alpha_unique,n_det_beta_unique)) + allocate(psi_postsvd(n_det_alpha_unique,n_det_beta_unique)) + do while( ( it_svd .lt. it_svd_max) .and. ( tol_energy .gt. 1d-6 ) ) + + call CPU_TIME(CPU_tbeg_it) + call SYSTEM_CLOCK(COUNT=W_tbeg_it, COUNT_RATE=W_ir) + + it_svd = it_svd + 1 + + double precision :: norm + norm = 0.d0 + do j = 1, n_selected + norm = norm + Dref(j)*Dref(j) + enddo + Dref = Dref / dsqrt(norm) + +! print *, '-- Compute H --' + call const_H_uv(Uref, Vref, H) + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + do j = 1, n_selected + do i = 1, n_selected + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + enddo + E0 = E0 + nuclear_repulsion +! print *,' E0 =', E0 + + double precision, allocatable :: eigval0(:) + double precision, allocatable :: eigvec0(:,:,:) + double precision, allocatable :: H_tmp(:,:,:,:) + + allocate( H_tmp(n_selected,n_selected,n_selected,n_selected) ) + do l=1,n_selected + do k=1,n_selected + do j=1,n_selected + do i=1,n_selected + H_tmp(i,j,k,l) = H(i,j,k,l) + enddo + enddo + enddo + enddo + allocate( eigval0(n_selected**2),eigvec0(n_selected,n_selected,n_selected**2)) + eigvec0 = 0.d0 + +! print *, ' --- Diag post-SVD --- ' + call lapack_diag(eigval0, eigvec0, H_tmp, n_selected**2, n_selected**2) + deallocate(H_tmp, eigval0) + +! print *, ' --- SVD --- ' + Dref = 0.d0 + call perform_newpostSVD(n_selected, eigvec0(1,1,1), Uref, Vref, Dref) + deallocate(eigvec0) + +! print *, ' --- Compute H --- ' + call const_H_uv(Uref, Vref, H) + + ! H0(i,j) = < u_i v_j | H | u_i v_j > + ! E0 = < psi_0 | H | psi_0 > + E0 = 0.d0 + norm = 0.d0 + do j = 1, n_det_beta_unique + do i = 1, n_det_beta_unique + E0 = E0 + Dref(i) * H(i,i,j,j) * Dref(j) + enddo + norm = norm + Dref(j)*Dref(j) + enddo + E0 = E0 + nuclear_repulsion +! print *,' E0 =', E0 +! print *,' norm =', norm + +! print *, ' --- Perturbation --- ' + psi_postsvd = 0.d0 + do i=1,n_selected + psi_postsvd(i,i) = Dref(i) + enddo + + Ept2 = 0.d0 + do j=1,n_selected + do i=n_selected+1,n_det_alpha_unique + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo + + do j=n_selected+1,n_det_beta_unique + do i=1,n_selected + ctmp = 0.d0 + do l=1,n_selected + do k=1,n_selected + ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) + enddo + enddo + psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) + enddo + enddo +! do j=n_selected+1,n_det_beta_unique +! do i=n_selected+1,n_det_alpha_unique +! ctmp = 0.d0 +! do l=1,n_selected +! do k=1,n_selected +! ctmp = ctmp + H(k,l,i,j) * psi_postsvd(k,l) +! enddo +! enddo +! psi_postsvd(i,j) = ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) +! Ept2 += ctmp*ctmp / (E0 - (H(i,j,i,j)+nuclear_repulsion) ) +! enddo +! enddo + + tol_energy = dabs(E_prev - E0) + print '(I5, 3(3X, F20.10))', it_svd, E0, E0 + Ept2, tol_energy + E_prev = E0 + + print *, ' --- SVD --- ' + call perform_newpostSVD(n_det_beta_unique, psi_postsvd, Uref, Vref, Dref) + + end do + + +end + + + +subroutine perform_newpostSVD(n_selected, psi_postsvd, Uref, Vref, Dref) + + USE OMP_LIB + + integer, intent(in) :: n_selected + double precision, intent(in) :: psi_postsvd(n_selected,n_selected) + double precision, intent(inout) :: Uref(n_det_alpha_unique,n_det_alpha_unique) + double precision, intent(inout) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(inout) :: Dref(max(n_det_beta_unique,n_det_alpha_unique)) + + integer :: mm, nn, i, j, ii0, ii, l, jj, na, nb + double precision :: err0, err_norm, err_tmp, norm_tmp + double precision :: overlapU_mat, overlapV_mat, overlapU, overlapV + double precision, allocatable :: S_mat(:,:), SxVt(:,:) + double precision, allocatable :: U_svd(:,:), V_svd(:,:) + double precision, allocatable :: U_newsvd(:,:), V_newsvd(:,:), Vt_newsvd(:,:), D_newsvd(:), A_newsvd(:,:) + + mm = n_det_alpha_unique + nn = n_det_beta_unique + + allocate( U_svd(mm,n_selected) , V_svd(nn,n_selected) , S_mat(n_selected,n_selected) ) + + U_svd(1:mm,1:n_selected) = Uref(1:mm,1:n_selected) + V_svd(1:nn,1:n_selected) = Vref(1:nn,1:n_selected) + + S_mat(1:n_selected,1:n_selected) = psi_postsvd(1:n_selected,1:n_selected) + + ! first compute S_mat x transpose(V_svd) + allocate( SxVt(n_selected,nn) ) + call dgemm( 'N', 'T', n_selected, nn, n_selected, 1.d0 & + , S_mat , size(S_mat,1) & + , V_svd , size(V_svd,1) & + , 0.d0, SxVt, size(SxVt ,1) ) + deallocate(S_mat) + + ! then compute U_svd x SxVt + allocate( A_newsvd(mm,nn) ) + call dgemm( 'N', 'N', mm, nn, n_selected, 1.d0 & + , U_svd , size(U_svd ,1) & + , SxVt , size(SxVt ,1) & + , 0.d0, A_newsvd, size(A_newsvd,1) ) + deallocate( SxVt ) + + ! perform new SVD + allocate( U_newsvd(mm,mm), Vt_newsvd(nn,nn), D_newsvd(max(mm,nn)) ) + call svd_s( A_newsvd, size(A_newsvd,1), & + U_newsvd, size(U_newsvd,1), & + D_newsvd, & + Vt_newsvd, size(Vt_newsvd,1), & + mm, nn) + deallocate(A_newsvd) + + allocate( V_newsvd(nn,nn) ) + do l = 1, nn + do j = 1, nn + V_newsvd(j,l) = Vt_newsvd(l,j) + enddo + enddo + deallocate(Vt_newsvd) + + do l = 1, n_selected + Dref(l) = D_newsvd(l) + Uref(1:mm,l) = U_newsvd(1:mm,l) + Vref(1:nn,l) = V_newsvd(1:nn,l) + enddo + + deallocate(U_newsvd) + deallocate(V_newsvd) + deallocate(D_newsvd) + +end subroutine perform_newpostSVD + + + +subroutine const_H_uv(Uref, Vref, H) + + USE OMP_LIB + + implicit none + + double precision, intent(in) :: Uref(n_det_alpha_unique,n_det_beta_unique) + double precision, intent(in) :: Vref(n_det_beta_unique ,n_det_beta_unique) + double precision, intent(out) :: H(n_det_alpha_unique,n_det_beta_unique, n_det_alpha_unique,n_det_beta_unique) + + integer(bit_kind) :: det1(N_int,2), det2(N_int,2) + integer :: i, j, k, l, degree + integer :: ii0, jj0, ii, jj, n, m, np, mp + integer :: nn0, mm0, na, nb, mm, ind_gs + integer :: p,q,r,s + double precision :: h12, x + + double precision, allocatable :: H0(:,:,:,:) + double precision, allocatable :: H1(:,:,:,:) + + na = n_det_alpha_unique + nb = n_det_beta_unique + + allocate( H0(na,nb,na,nb) ) + allocate( H1(nb,na,nb,na) ) + + det1(:,1) = psi_det_alpha_unique(:,1) + det2(:,1) = psi_det_alpha_unique(:,1) + det1(:,2) = psi_det_beta_unique(:,1) + det2(:,2) = psi_det_beta_unique(:,1) + call i_H_j(det1, det2, N_int, h12) + + H0 = 0.d0 + call wall_time(t0) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p,q,r,s,i,j,k,l,det1,det2,degree,h12) & + !$OMP SHARED(na,nb,psi_det_alpha_unique,psi_det_beta_unique, & + !$OMP N_int,Uref,Vref,H0,H1,H) + + !$OMP DO + do l = 1, nb + det2(:,2) = psi_det_beta_unique(:,l) + do j = 1, nb + det1(:,2) = psi_det_beta_unique(:,j) + call get_excitation_degree_spin(det1(1,2),det2(1,2),degree,N_int) + if (degree > 2) cycle + do k = 1, na + det2(:,1) = psi_det_alpha_unique(:,k) + do i = 1, na + det1(:,1) = psi_det_alpha_unique(:,i) + call get_excitation_degree(det1,det2,degree,N_int) + if ( degree > 2) cycle + call i_H_j(det1, det2, N_int, h12) + H0(i,j,k,l) = h12 + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call wall_time(t1) + ! (i,j,k,l) -> (j,k,l,p) + call DGEMM('T','N', nb * na * nb, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + + ! (j,k,l,p) -> (k,l,p,q) + call DGEMM('T','N', na * nb * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H0, size(H0,1)*size(H0,2)*size(H0,3)) + + ! (k,l,p,q) -> (l,p,q,r) + call DGEMM('T','N', nb * na * nb, na, na, & + 1.d0, H0, size(H0,1), Uref, size(Uref,1), 0.d0, H1, size(H1,1)*size(H1,2)*size(H1,3)) + + ! (l,p,q,r) -> (p,q,r,s) + call DGEMM('T','N', na * nb * na, nb, nb, & + 1.d0, H1, size(H1,1), Vref, size(Vref,1), 0.d0, H, size(H,1)*size(H,2)*size(H,3)) + call wall_time(t2) + print *, t1-t0, t2-t1 + double precision :: t0, t1, t2 + + deallocate(H1,H0) + +end + + + + +