diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 3585940e..f9ba315c 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -1132,7 +1132,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map) call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map) putj = p1 - do puti=1,mo_num + do puti=1,mo_num !HOT if(lbanned(puti,mi)) cycle !p1 fixed putj = p1 diff --git a/src/davidson/EZFIO.cfg b/src/davidson/EZFIO.cfg index 67d7786a..393b25f1 100644 --- a/src/davidson/EZFIO.cfg +++ b/src/davidson/EZFIO.cfg @@ -6,7 +6,7 @@ default: 1.e-10 [n_states_diag] type: States_number -doc: Number of states to consider during the Davdison diagonalization +doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag default: 4 interface: ezfio,ocaml diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index 9e4402f6..c0d94b35 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -428,7 +428,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) integer :: istep, imin, imax, ishift, ipos integer, external :: add_task_to_taskserver - integer, parameter :: tasksize=40000 + integer, parameter :: tasksize=10000 character*(100000) :: task istep=1 ishift=0 diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index ec9a194e..c4684b8e 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -161,7 +161,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ stop -1 endif - itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) + itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1 itertot = 0 if (state_following) then @@ -219,7 +219,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ exit endif - if (itermax > 4) then + if (itermax > 3) then itermax = itermax - 1 else if (m==1.and.disk_based_davidson) then m=0 @@ -322,6 +322,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ call normalize(u_in(1,k),sze) enddo + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + do while (.not.converged) itertot = itertot+1 @@ -329,30 +335,33 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ exit endif - do k=1,N_st_diag - do i=1,sze - U(i,k) = u_in(i,k) - enddo - enddo - do iter=1,itermax-1 shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter - call ortho_qr(U,size(U,1),sze,shift2) - call ortho_qr(U,size(U,1),sze,shift2) + if ((iter > 1).or.(itertot == 1)) then + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- - ! Compute |W_k> = \sum_i |i> - ! ----------------------------------------- + if (disk_based) then + call ortho_qr_unblocked(U,size(U,1),sze,shift2) + call ortho_qr_unblocked(U,size(U,1),sze,shift2) + else + call ortho_qr(U,size(U,1),sze,shift2) + call ortho_qr(U,size(U,1),sze,shift2) + endif - - if ((sze > 100000).and.distributed_davidson) then - call H_S2_u_0_nstates_zmq (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + if ((sze > 100000).and.distributed_davidson) then + call H_S2_u_0_nstates_zmq (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + else + call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + endif + S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) else - call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + ! Already computed in update below + continue endif - S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) if (dressing_state > 0) then @@ -579,7 +588,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ enddo - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter, to_print(1:3,1:N_st) + if ((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st) + endif call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged) do k=1,N_st if (residual_norm(k) > 1.e8) then @@ -600,11 +614,56 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ enddo - ! Re-contract to u_in - ! ----------- + ! Re-contract U and update S and W + ! -------------------------------- + + call sgemm('N','N', sze, N_st_diag, shift2, 1., & + S, size(S,1), y_s, size(y_s,1), 0., S(1,shift2+1), size(S,1)) + do k=1,N_st_diag + do i=1,sze + S(i,k) = S(i,shift2+k) + enddo + enddo + + call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & + W, size(W,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze + W(i,k) = u_in(i,k) + enddo + enddo call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + if (disk_based) then + call ortho_qr_unblocked(U,size(U,1),sze,N_st_diag) + call ortho_qr_unblocked(U,size(U,1),sze,N_st_diag) + else + call ortho_qr(U,size(U,1),sze,N_st_diag) + call ortho_qr(U,size(U,1),sze,N_st_diag) + endif + do j=1,N_st_diag + k=1 + do while ((k qp_max_mem) then print *, 'Not enough memory: aborting in ', routine + print *, int(rss)+1, ' GB required' stop -1 endif !$OMP END CRITICAL