From 669e5cbd6f8de766fee6144e54a0ce4ee18bd1cd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 27 Sep 2016 10:10:39 +0200 Subject: [PATCH] Put fast davidson in mrcc --- ocaml/qp_overlap_of_wf.ml | 3 +- plugins/MRCC_Utils/davidson.irp.f | 295 ++++++++++++++-------------- plugins/MRCC_Utils/mrcc_utils.irp.f | 18 +- 3 files changed, 155 insertions(+), 161 deletions(-) diff --git a/ocaml/qp_overlap_of_wf.ml b/ocaml/qp_overlap_of_wf.ml index c405b363..816256fa 100644 --- a/ocaml/qp_overlap_of_wf.ml +++ b/ocaml/qp_overlap_of_wf.ml @@ -51,7 +51,6 @@ let () = norm'+. c'*. c' ) ) wf (0.,0.,0.) in - Printf.printf "%f %f %f\n" result norm norm'; result /. (norm *. norm') in @@ -63,5 +62,5 @@ let () = let o = overlap wf wf' in - Printf.printf "Overlap : %f\n" o + print_float (abs_float o) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 7bde219a..085799f6 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -21,8 +21,8 @@ subroutine davidson_diag_mrcc(dets_in,u_in,energies,dim_in,sze,N_st,N_st_diag,Ni END_DOC integer, intent(in) :: dim_in, sze, N_st, Nint, iunit, istate, N_st_diag integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) - double precision, intent(inout) :: u_in(dim_in,N_st) - double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) double precision, allocatable :: H_jj(:) double precision :: diag_h_mat_elem @@ -72,41 +72,45 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_s ! sze : Number of determinants ! ! N_st : Number of eigenstates + ! + ! N_st_diag : Number of states in which H is diagonalized ! ! iunit : Unit for the I/O ! ! Initial guess vectors are not necessarily orthonormal END_DOC - integer, intent(in) :: dim_in, sze, N_st, Nint, istate, N_st_diag + integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint, istate integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) double precision, intent(in) :: H_jj(sze) integer, intent(in) :: iunit - double precision, intent(inout) :: u_in(dim_in,N_st) - double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + integer :: sze_8 integer :: iter integer :: i,j,k,l,m logical :: converged - double precision :: overlap(N_st,N_st) + double precision, allocatable :: overlap(:,:) double precision :: u_dot_v, u_dot_u integer, allocatable :: kl_pairs(:,:) integer :: k_pairs, kl - integer :: iter2, sze_8 + integer :: iter2 double precision, allocatable :: W(:,:,:), U(:,:,:), R(:,:) double precision, allocatable :: y(:,:,:,:), h(:,:,:,:), lambda(:) + double precision, allocatable :: c(:), H_small(:,:) double precision :: diag_h_mat_elem - double precision :: residual_norm(N_st) + double precision, allocatable :: residual_norm(:) character*(16384) :: write_buffer double precision :: to_print(2,N_st) double precision :: cpu, wall + include 'constants.include.F' - !PROVIDE det_connections -if (N_st_diag /= N_st) then - stop 'N_st_diag /= N_st todo in davidson' -endif + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, y, h, lambda + + PROVIDE nuclear_repulsion call write_time(iunit) call wall_time(wall) @@ -116,6 +120,7 @@ endif write(iunit,'(A)') '------------------------' write(iunit,'(A)') '' call write_int(iunit,N_st,'Number of states') + call write_int(iunit,N_st_diag,'Number of states in diagonalization') call write_int(iunit,sze,'Number of determinants') call write_int(iunit,istate,'Using dressing for state ') write(iunit,'(A)') '' @@ -139,15 +144,20 @@ endif sze_8 = align_double(sze) allocate( & - kl_pairs(2,N_st*(N_st+1)/2), & - W(sze_8,N_st,davidson_sze_max), & - U(sze_8,N_st,davidson_sze_max), & - R(sze_8,N_st), & - h(N_st,davidson_sze_max,N_st,davidson_sze_max), & - y(N_st,davidson_sze_max,N_st,davidson_sze_max), & - lambda(N_st*davidson_sze_max)) + kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & + W(sze_8,N_st_diag,davidson_sze_max), & + U(sze_8,N_st_diag,davidson_sze_max), & + R(sze_8,N_st_diag), & + h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & + y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & + residual_norm(N_st_diag), & + overlap(N_st_diag,N_st_diag), & + c(N_st_diag*davidson_sze_max), & + H_small(N_st_diag,N_st_diag), & + lambda(N_st_diag*davidson_sze_max)) ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) ASSERT (sze > 0) ASSERT (Nint > 0) ASSERT (Nint == N_int) @@ -156,145 +166,121 @@ endif ! ============== - if (N_st > 1) then - - k_pairs=0 - do l=1,N_st - do k=1,l - k_pairs+=1 - kl_pairs(1,k_pairs) = k - kl_pairs(2,k_pairs) = l + do k=1,N_st_diag + + if (k > N_st) then + do i=1,sze + double precision :: r1, r2 + call random_number(r1) + call random_number(r2) + u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2) enddo - enddo + endif - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP SHARED(U,sze,N_st,overlap,kl_pairs,k_pairs, & - !$OMP Nint,dets_in,u_in) & - !$OMP PRIVATE(k,l,kl,i) - - - ! Orthonormalize initial guess - ! ============================ - - !$OMP DO - do kl=1,k_pairs - k = kl_pairs(1,kl) - l = kl_pairs(2,kl) - if (k/=l) then - overlap(k,l) = u_dot_v(U_in(1,k),U_in(1,l),sze) - overlap(l,k) = overlap(k,l) - else - overlap(k,k) = u_dot_u(U_in(1,k),sze) - endif - enddo - !$OMP END DO - !$OMP END PARALLEL + ! Gram-Schmidt + ! ------------ + call dgemv('T',sze,k-1,1.d0,u_in,size(u_in,1), & + u_in(1,k),1,0.d0,c,1) + call dgemv('N',sze,k-1,-1.d0,u_in,size(u_in,1), & + c,1,1.d0,u_in(1,k),1) + call normalize(u_in(1,k),sze) + enddo - call ortho_lowdin(overlap,size(overlap,1),N_st,U_in,size(U_in,1),sze) - else - - overlap(1,1) = u_dot_u(U_in(1,1),sze) - double precision :: f - f = 1.d0 / dsqrt(overlap(1,1)) - do i=1,sze - U_in(i,1) = U_in(i,1) * f - enddo - - endif - ! Davidson iterations - ! =================== - - - integer :: iteration converged = .False. do while (.not.converged) - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(k,i) SHARED(U,u_in,sze,N_st) - do k=1,N_st - !$OMP DO + + do k=1,N_st_diag do i=1,sze U(i,k,1) = u_in(i,k) enddo - !$OMP END DO enddo - !$OMP END PARALLEL - + do iter=1,davidson_sze_max-1 - - ! Compute W_k = H |u_k> - ! ---------------------- + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------------- + + call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st_diag,sze_8) - call H_u_0_mrcc_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,istate,N_st,sze_8) ! Compute h_kl = = ! ------------------------------------------- - do l=1,N_st - do k=1,N_st - do iter2=1,iter-1 - h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze) - h(k,iter,l,iter2) = h(k,iter2,l,iter) - enddo - enddo - do k=1,l - h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze) - h(l,iter,k,iter) = h(k,iter,l,iter) - enddo - enddo - !DEBUG H MATRIX - !do i=1,iter - ! print '(10(x,F16.10))', h(1,i,1,1:i) - !enddo - !print *, '' - !END - +! do l=1,N_st_diag +! do k=1,N_st_diag +! do iter2=1,iter-1 +! h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze) +! h(k,iter,l,iter2) = h(k,iter2,l,iter) +! enddo +! enddo +! do k=1,l +! h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze) +! h(l,iter,k,iter) = h(k,iter,l,iter) +! enddo +! enddo + + call dgemm('T','N', N_st_diag*iter, N_st_diag, sze, & + 1.d0, U, size(U,1), W(1,1,iter), size(W,1), & + 0.d0, h(1,1,1,iter), size(h,1)*size(h,2)) + ! Diagonalize h ! ------------- - call lapack_diag(lambda,y,h,N_st*davidson_sze_max,N_st*iter) + call lapack_diag(lambda,y,h,N_st_diag*davidson_sze_max,N_st_diag*iter) ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- - do k=1,N_st + do k=1,N_st_diag do i=1,sze U(i,k,iter+1) = 0.d0 W(i,k,iter+1) = 0.d0 - do l=1,N_st - do iter2=1,iter - U(i,k,iter+1) = U(i,k,iter+1) + U(i,l,iter2)*y(l,iter2,k,1) - W(i,k,iter+1) = W(i,k,iter+1) + W(i,l,iter2)*y(l,iter2,k,1) - enddo - enddo enddo enddo - +! do k=1,N_st_diag +! do iter2=1,iter +! do l=1,N_st_diag +! do i=1,sze +! U(i,k,iter+1) = U(i,k,iter+1) + U(i,l,iter2)*y(l,iter2,k,1) +! W(i,k,iter+1) = W(i,k,iter+1) + W(i,l,iter2)*y(l,iter2,k,1) +! enddo +! enddo +! enddo +! enddo +! +! + call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, & + 1.d0, U, size(U,1), y, size(y,1)*size(y,2), 0.d0, U(1,1,iter+1), size(U,1)) + call dgemm('N','N',sze,N_st_diag,N_st_diag*iter, & + 1.d0, W, size(W,1), y, size(y,1)*size(y,2), 0.d0, W(1,1,iter+1), size(W,1)) + + ! Compute residual vector ! ----------------------- - do k=1,N_st + do k=1,N_st_diag do i=1,sze R(i,k) = lambda(k) * U(i,k,iter+1) - W(i,k,iter+1) enddo - residual_norm(k) = u_dot_u(R(1,k),sze) - to_print(1,k) = lambda(k) + nuclear_repulsion - to_print(2,k) = residual_norm(k) + if (k <= N_st) then + residual_norm(k) = u_dot_u(R(1,k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + endif enddo - - write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st) + + write(iunit,'(X,I3,X,100(X,F16.10,X,E16.6))') iter, to_print(:,1:N_st) call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) if (converged) then exit endif - ! Davidson step ! ------------- - do k=1,N_st + do k=1,N_st_diag do i=1,sze U(i,k,iter+1) = -1.d0/max(H_jj(i) - lambda(k),1.d-2) * R(i,k) enddo @@ -303,37 +289,36 @@ endif ! Gram-Schmidt ! ------------ - double precision :: c - do k=1,N_st - do iter2=1,iter - do l=1,N_st - c = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) - do i=1,sze - U(i,k,iter+1) -= c * U(i,l,iter2) - enddo - enddo - enddo - do l=1,k-1 - c = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) - do i=1,sze - U(i,k,iter+1) -= c * U(i,l,iter+1) - enddo - enddo + do k=1,N_st_diag + +! do iter2=1,iter +! do l=1,N_st_diag +! c(1) = u_dot_v(U(1,k,iter+1),U(1,l,iter2),sze) +! do i=1,sze +! U(i,k,iter+1) = U(i,k,iter+1) - c(1) * U(i,l,iter2) +! enddo +! enddo +! enddo +! + call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1), & + U(1,k,iter+1),1,0.d0,c,1) + call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1), & + c,1,1.d0,U(1,k,iter+1),1) +! +! do l=1,k-1 +! c(1) = u_dot_v(U(1,k,iter+1),U(1,l,iter+1),sze) +! do i=1,sze +! U(i,k,iter+1) = U(i,k,iter+1) - c(1) * U(i,l,iter+1) +! enddo +! enddo +! + call dgemv('T',sze,k-1,1.d0,U(1,1,iter+1),size(U,1), & + U(1,k,iter+1),1,0.d0,c,1) + call dgemv('N',sze,k-1,-1.d0,U(1,1,iter+1),size(U,1), & + c,1,1.d0,U(1,k,iter+1),1) + call normalize( U(1,k,iter+1), sze ) enddo - - !DEBUG : CHECK OVERLAP - !print *, '===' - !do k=1,iter+1 - ! do l=1,k - ! c = u_dot_v(U(1,1,k),U(1,1,l),sze) - ! print *, k,l, c - ! enddo - !enddo - !print *, '===' - !pause - !END DEBUG - enddo @@ -344,17 +329,25 @@ endif ! Re-contract to u_in ! ----------- - do k=1,N_st + do k=1,N_st_diag energies(k) = lambda(k) do i=1,sze u_in(i,k) = 0.d0 - do iter2=1,iter - do l=1,N_st - u_in(i,k) += U(i,l,iter2)*y(l,iter2,k,1) - enddo - enddo enddo enddo +! do k=1,N_st_diag +! do i=1,sze +! do iter2=1,iter +! do l=1,N_st_diag +! u_in(i,k) += U(i,l,iter2)*y(l,iter2,k,1) +! enddo +! enddo +! enddo +! enddo + + call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & + U, size(U,1), y, N_st_diag*davidson_sze_max, & + 0.d0, u_in, size(u_in,1)) enddo @@ -368,9 +361,9 @@ endif deallocate ( & kl_pairs, & - W, & - U, & - R, & + W, residual_norm, & + U, overlap, & + R, c, & h, & y, & lambda & diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 9876c756..c429bc03 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -140,7 +140,7 @@ END_PROVIDER integer :: mrcc_state mrcc_state = N_states - do j=1,N_states_diag + do j=1,min(N_states,N_det) do i=1,N_det CI_eigenvectors_dressed(i,j) = psi_coef(i,j) enddo @@ -241,12 +241,14 @@ END_PROVIDER allocate(s2_eigvalues(N_states_diag), e_array(N_states_diag)) call diagonalize_s2_betweenstates(psi_det,CI_eigenvectors_dressed,n_det,size(psi_det,3),size(CI_eigenvectors_dressed,1),min(n_states_diag,n_det),s2_eigvalues) + double precision, allocatable :: psi_coef_tmp(:,:) + allocate(psi_coef_tmp(psi_det_size,N_states_diag)) do j = 1, N_states_diag do i = 1, N_det - psi_coef(i,j) = CI_eigenvectors_dressed(i,j) + psi_coef_tmp(i,j) = CI_eigenvectors_dressed(i,j) enddo enddo - call u_0_H_u_0_mrcc_nstates(e_array,psi_coef,n_det,psi_det,N_int,mrcc_state,N_states_diag,psi_det_size) + call u_0_H_u_0_mrcc_nstates(e_array,psi_coef_tmp,n_det,psi_det,N_int,mrcc_state,N_states,psi_det_size) ! Browsing the "n_states_diag" states and getting the lowest in energy "n_states" ones that have the S^2 value ! closer to the "expected_s2" set as input @@ -265,7 +267,7 @@ END_PROVIDER allocate(iorder(i_state)) do j = 1, i_state do i = 1, N_det - CI_eigenvectors_dressed(i,j) = psi_coef(i,index_good_state_array(j)) + CI_eigenvectors_dressed(i,j) = psi_coef_tmp(i,index_good_state_array(j)) enddo CI_eigenvectors_s2_dressed(j) = s2_eigvalues(index_good_state_array(j)) CI_electronic_energy_dressed(j) = e_array(j) @@ -276,7 +278,7 @@ END_PROVIDER CI_electronic_energy_dressed(j) = e_array(j) CI_eigenvectors_s2_dressed(j) = s2_eigvalues(index_good_state_array(iorder(j))) do i = 1, N_det - CI_eigenvectors_dressed(i,j) = psi_coef(i,index_good_state_array(iorder(j))) + CI_eigenvectors_dressed(i,j) = psi_coef_tmp(i,index_good_state_array(iorder(j))) enddo enddo @@ -286,12 +288,12 @@ END_PROVIDER if(good_state_array(j))cycle i_other_state +=1 do i = 1, N_det - CI_eigenvectors_dressed(i,i_state + i_other_state) = psi_coef(i,j) + CI_eigenvectors_dressed(i,i_state + i_other_state) = psi_coef_tmp(i,j) enddo CI_eigenvectors_s2_dressed(i_state + i_other_state) = s2_eigvalues(j) CI_electronic_energy_dressed(i_state + i_other_state) = e_array(i_state + i_other_state) enddo - deallocate(iorder,e_array,index_good_state_array,good_state_array) + deallocate(iorder,e_array,index_good_state_array,good_state_array,psi_coef_tmp) deallocate(s2_eigvalues) @@ -325,7 +327,7 @@ subroutine diagonalize_CI_dressed(lambda) END_DOC double precision, intent(in) :: lambda integer :: i,j - do j=1,N_states_diag + do j=1,N_states do i=1,N_det psi_coef(i,j) = lambda * CI_eigenvectors_dressed(i,j) + (1.d0 - lambda) * psi_coef(i,j) enddo