diff --git a/ocaml/Progress_bar.ml b/ocaml/Progress_bar.ml index 2ca8bd00..b8e97a59 100644 --- a/ocaml/Progress_bar.ml +++ b/ocaml/Progress_bar.ml @@ -14,13 +14,13 @@ type t = let init ?(bar_length=20) ?(start_value=0.) ?(end_value=1.) ~title = { title ; start_value ; end_value ; bar_length ; cur_value=start_value ; - init_time= Time.now () ; dirty = true ; next = Time.now () } + init_time= Time.now () ; dirty = false ; next = Time.now () } let update ~cur_value bar = { bar with cur_value ; dirty=true } let increment_end bar = - { bar with end_value=(bar.end_value +. 1.) ; dirty=true } + { bar with end_value=(bar.end_value +. 1.) ; dirty=false } let increment_cur bar = { bar with cur_value=(bar.cur_value +. 1.) ; dirty=true } diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index c8ac3733..2db6b4cd 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -95,7 +95,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s double precision :: u_dot_v, u_dot_u - integer, allocatable :: kl_pairs(:,:) integer :: k_pairs, kl integer :: iter2 @@ -107,12 +106,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2 + integer :: shift, shift2, itermax include 'constants.include.F' !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, S, y, h, lambda - if (N_st_diag > sze) then - stop 'error in Davidson : N_st_diag > sze' + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_jacobi to ', N_st_diag*3 + stop -1 endif PROVIDE nuclear_repulsion @@ -147,26 +148,26 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s integer, external :: align_double sze_8 = align_double(sze) + itermax = min(davidson_sze_max, sze/N_st_diag) allocate( & - kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & - W(sze_8,N_st_diag*davidson_sze_max), & - U(sze_8,N_st_diag*davidson_sze_max), & + W(sze_8,N_st_diag*itermax), & + U(sze_8,N_st_diag*itermax), & R(sze_8,N_st_diag), & - S(sze_8,N_st_diag*davidson_sze_max), & - h(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & - y(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & - s_(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & - s_tmp(N_st_diag*davidson_sze_max,N_st_diag*davidson_sze_max), & + S(sze_8,N_st_diag*itermax), & + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & residual_norm(N_st_diag), & - c(N_st_diag*davidson_sze_max), & - s2(N_st_diag*davidson_sze_max), & - lambda(N_st_diag*davidson_sze_max)) + c(N_st_diag*itermax), & + s2(N_st_diag*itermax), & + lambda(N_st_diag*itermax)) h = 0.d0 s_ = 0.d0 s_tmp = 0.d0 - c = 0.d0 U = 0.d0 + W = 0.d0 S = 0.d0 R = 0.d0 y = 0.d0 @@ -183,10 +184,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s converged = .False. - do k=1,N_st - call normalize(u_in(1,k),sze) - enddo - do k=N_st+1,N_st_diag do i=1,sze double precision :: r1, r2 @@ -194,14 +191,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call random_number(r2) u_in(i,k) = dsqrt(-2.d0*dlog(r1))*dcos(dtwo_pi*r2) enddo - - ! Gram-Schmidt - ! ------------ - call dgemv('T',sze,k-1,1.d0,u_in,size(u_in,1), & - u_in(1,k),1,0.d0,c,1) - call dgemv('N',sze,k-1,-1.d0,u_in,size(u_in,1), & - c,1,1.d0,u_in(1,k),1) - call normalize(u_in(1,k),sze) enddo @@ -213,11 +202,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo enddo - do iter=1,davidson_sze_max-1 + do iter=1,itermax-1 shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter + call ortho_qr(U,size(U,1),sze,shift2) ! Compute |W_k> = \sum_i |i> ! ----------------------------------------- @@ -229,20 +219,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! Compute h_kl = = ! ------------------------------------------- - -! do l=1,N_st_diag -! do k=1,N_st_diag -! do iter2=1,iter-1 -! h(k,iter2,l,iter) = u_dot_v(U(1,k,iter2),W(1,l,iter),sze) -! h(k,iter,l,iter2) = h(k,iter2,l,iter) -! enddo -! enddo -! do k=1,l -! h(k,iter,l,iter) = u_dot_v(U(1,k,iter),W(1,l,iter),sze) -! h(l,iter,k,iter) = h(k,iter,l,iter) -! enddo -! enddo - call dgemm('T','N', shift2, N_st_diag, sze, & 1.d0, U, size(U,1), W(1,shift+1), size(W,1), & 0.d0, h(1,shift+1), size(h,1)) @@ -295,22 +271,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! Express eigenvectors of h in the determinant basis ! -------------------------------------------------- -! do k=1,N_st_diag -! do i=1,sze -! U(i,shift2+k) = 0.d0 -! W(i,shift2+k) = 0.d0 -! S(i,shift2+k) = 0.d0 -! enddo -! do l=1,N_st_diag*iter -! do i=1,sze -! U(i,shift2+k) = U(i,shift2+k) + U(i,l)*y(l,k) -! W(i,shift2+k) = W(i,shift2+k) + W(i,l)*y(l,k) -! S(i,shift2+k) = S(i,shift2+k) + S(i,l)*y(l,k) -! enddo -! enddo -! enddo -! -! call dgemm('N','N', sze, N_st_diag, shift2, & 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & @@ -321,13 +281,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ! Compute residual vector ! ----------------------- -! do k=1,N_st_diag -! print *, s2(k) -! s2(k) = u_dot_v(U(1,shift2+k), S(1,shift2+k), sze) + S_z2_Sz -! print *, s2(k) -! print *, '' -! pause -! enddo do k=1,N_st_diag do i=1,sze R(i,k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & @@ -338,14 +291,17 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s to_print(1,k) = lambda(k) + nuclear_repulsion to_print(2,k) = s2(k) to_print(3,k) = residual_norm(k) - if (residual_norm(k) > 1.e9) then - stop 'Davidson failed' - endif endif enddo - write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(:,1:N_st) + write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3,A20))') iter, to_print(:,1:N_st), '' call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) + do k=1,N_st + if (residual_norm(k) > 1.e9) then + print *, '' + stop 'Davidson failed' + endif + enddo if (converged) then exit endif @@ -359,42 +315,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo enddo - ! Gram-Schmidt - ! ------------ - - do k=1,N_st_diag - -! do l=1,N_st_diag*iter -! c(1) = u_dot_v(U(1,shift2+k),U(1,l),sze) -! do i=1,sze -! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,l) -! enddo -! enddo -! - call dgemv('T',sze,N_st_diag*iter,1.d0,U,size(U,1), & - U(1,shift2+k),1,0.d0,c,1) - call dgemv('N',sze,N_st_diag*iter,-1.d0,U,size(U,1), & - c,1,1.d0,U(1,shift2+k),1) -! -! do l=1,k-1 -! c(1) = u_dot_v(U(1,shift2+k),U(1,shift2+l),sze) -! do i=1,sze -! U(i,k,iter+1) = U(i,shift2+k) - c(1) * U(i,shift2+l) -! enddo -! enddo -! - call dgemv('T',sze,k-1,1.d0,U(1,shift2+1),size(U,1), & - U(1,shift2+k),1,0.d0,c,1) - call dgemv('N',sze,k-1,-1.d0,U(1,shift2+1),size(U,1), & - c,1,1.d0,U(1,shift2+k),1) - - call normalize( U(1,shift2+k), sze ) - enddo - enddo if (.not.converged) then - iter = davidson_sze_max-1 + iter = itermax-1 endif ! Re-contract to u_in @@ -404,20 +328,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s energies(k) = lambda(k) enddo -! do k=1,N_st_diag -! do i=1,sze -! do l=1,iter*N_st_diag -! u_in(i,k) += U(i,l)*y(l,k) -! enddo -! enddo -! enddo -! enddo - call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0, & U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) enddo + do k=1,N_st_diag + S2_jj(k) = s2(k) + enddo write_buffer = '===== ' do i=1,N_st write_buffer = trim(write_buffer)//' ================ =========== ===========' @@ -427,7 +345,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call write_time(iunit) deallocate ( & - kl_pairs, & W, residual_norm, & U, & R, c, S, & diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f index 00f61101..e44e8c2c 100644 --- a/src/Utils/LinearAlgebra.irp.f +++ b/src/Utils/LinearAlgebra.irp.f @@ -11,9 +11,9 @@ subroutine svd(A,LDA,U,LDU,D,Vt,LDVt,m,n) integer, intent(in) :: LDA, LDU, LDVt, m, n double precision, intent(in) :: A(LDA,n) - double precision, intent(out) :: U(LDU,n) + double precision, intent(out) :: U(LDU,m) double precision,intent(out) :: Vt(LDVt,n) - double precision,intent(out) :: D(n) + double precision,intent(out) :: D(min(m,n)) double precision,allocatable :: work(:) integer :: info, lwork, i, j, k @@ -24,13 +24,13 @@ subroutine svd(A,LDA,U,LDU,D,Vt,LDVt,m,n) ! Find optimal size for temp arrays allocate(work(1)) lwork = -1 - call dgesvd('A','A', n, n, A_tmp, LDA, & + call dgesvd('A','A', m, n, A_tmp, LDA, & D, U, LDU, Vt, LDVt, work, lwork, info) lwork = work(1) deallocate(work) allocate(work(lwork)) - call dgesvd('A','A', n, n, A_tmp, LDA, & + call dgesvd('A','A', m, n, A_tmp, LDA, & D, U, LDU, Vt, LDVt, work, lwork, info) deallocate(work,A_tmp) @@ -125,6 +125,40 @@ subroutine ortho_canonical(overlap,LDA,N,C,LDC,m) end +subroutine ortho_qr(A,LDA,m,n) + implicit none + BEGIN_DOC + ! Orthogonalization using Q.R factorization + ! + ! A : matrix to orthogonalize + ! + ! LDA : leftmost dimension of A + ! + ! n : Number of rows of A + ! + ! m : Number of columns of A + ! + END_DOC + integer, intent(in) :: m,n, LDA + double precision, intent(inout) :: A(LDA,n) + + integer :: lwork, info + integer, allocatable :: jpvt(:) + double precision, allocatable :: tau(:), work(:) + + allocate (jpvt(n), tau(n), work(1)) + LWORK=-1 +! call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO) + call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) + LWORK=WORK(1) + deallocate(WORK) + allocate(WORK(LWORK)) +! call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO) + call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) + call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO) + deallocate(WORK,jpvt,tau) +end + subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m) implicit none BEGIN_DOC @@ -161,7 +195,7 @@ subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m) allocate(U(ldc,n),Vt(lda,n),S_half(lda,n),D(n)) - call svd(overlap,lda,U,ldc,D,Vt,lda,m,n) + call svd(overlap,lda,U,ldc,D,Vt,lda,n,n) !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(S_half,U,D,Vt,n,C,m) &