diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index d6b9cc79..374b003d 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -8,6 +8,7 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ] +&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ] implicit none BEGIN_DOC ! cm/ or perturbative 1/Delta_E(m) @@ -15,7 +16,7 @@ END_PROVIDER integer :: i,k double precision :: ihpsi_current(N_states) integer :: i_pert_count - double precision :: hii, lambda_pert + double precision :: hii, E2(N_states), E2var(N_states) integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3 i_pert_count = 0 @@ -25,6 +26,8 @@ END_PROVIDER lambda_mrcc_pt2(0) = 0 lambda_mrcc_kept(0) = 0 + E2 = 0.d0 + E2var = 0.d0 do i=1,N_det_non_ref call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& size(psi_ref_coef,1), N_states,ihpsi_current) @@ -33,24 +36,51 @@ END_PROVIDER if (ihpsi_current(k) == 0.d0) then ihpsi_current(k) = 1.d-32 endif -! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) - lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) - lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) - if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then - ! Ignore lamdba - i_pert_count += 1 - lambda_mrcc(k,i) = 0.d0 - if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then - N_lambda_mrcc_pt2 += 1 - lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i - endif - else - ! Keep lamdba - if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then - N_lambda_mrcc_pt3 += 1 - lambda_mrcc_kept(N_lambda_mrcc_pt3) = i - endif + lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) + lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) + E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii) + E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k) + enddo + enddo + + do i=1,N_det_non_ref + call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& + size(psi_ref_coef,1), N_states,ihpsi_current) + call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii) + do k=1,N_states + if (ihpsi_current(k) == 0.d0) then + ihpsi_current(k) = 1.d-32 endif + lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) + lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k) +! lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) +! if ( lambda_pert / lambda_mrcc(k,i) < 0.5d0) then +! print *, ' xxx ', dabs(lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) +! if ( dabs( (lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) ) > 1.d-3) then +! print *, 'lambda mrcc', lambda_mrcc(k,i) +! print *, 'lambda pert', lambda_pert +! print *, 'coef: ', psi_non_ref_coef(i,k) +! call debug_det(psi_non_ref(1,1,i), N_int) +! call i_H_j(psi_ref(1,1,1),psi_non_ref(1,1,i),N_int,hii) +! print *, hii +! call i_H_j(psi_ref(1,1,2),psi_non_ref(1,1,i),N_int,hii) +! print *, hii +! print *, '---' + ! Ignore lamdba +! i_pert_count += 1 +! lambda_mrcc(k,i) = 0.d0 +! lambda_mrcc(k,i) = lambda_pert * E2var(k)/E2(k) +! if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then +! N_lambda_mrcc_pt2 += 1 +! lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i +! endif +! else +! ! Keep lamdba +! if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then +! N_lambda_mrcc_pt3 += 1 +! lambda_mrcc_kept(N_lambda_mrcc_pt3) = i +! endif +! endif enddo enddo lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2 @@ -784,8 +814,8 @@ END_PROVIDER f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) ! Avoid numerical instabilities - f = min(f,2.d0) - f = max(f,-2.d0) +! f = min(f,2.d0) +! f = max(f,-2.d0) endif norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) @@ -845,11 +875,39 @@ END_PROVIDER +double precision function f_fit(x) + implicit none + double precision :: x + f_fit = 0.d0 + return + if (x < 0.d0) then + f_fit = 0.d0 + else if (x < 1.d0) then + f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2)) + else + f_fit = 1.d0 + endif +end double precision function get_dij_index(II, i, s, Nint) integer, intent(in) :: II, i, s, Nint double precision, external :: get_dij - double precision :: HIi, phase + double precision :: HIi, phase, c, a, b, d + + call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) + call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) + + a = lambda_pert(s,i) +! b = lambda_mrcc(s,i) +! c = f_fit(a/b) + + d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s) + + c = f_fit(a*HIi/d) + +! get_dij_index = HIi * a * c + (1.d0 - c) * d + get_dij_index = d + return if(lambda_type == 0) then call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f index c4147ebc..95c993f0 100644 --- a/plugins/Psiref_Utils/psi_ref_utils.irp.f +++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f @@ -98,8 +98,7 @@ END_PROVIDER enddo N_det_non_ref = i_non_ref if (N_det_non_ref < 1) then - print *, 'Error : All determinants are in the reference' - stop -1 + print *, 'Warning : All determinants are in the reference' endif END_PROVIDER diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 724aac08..04b0cc52 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -144,13 +144,13 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t) end subroutine -subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut) +subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st) use f77_zmq implicit none integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket - integer, intent(in) :: n, n_st_8 - double precision, intent(in) :: ut(n_st_8,n) + integer, intent(in) :: n0,n, n_st + double precision, intent(in) :: u(n0,n_st) integer :: i,k @@ -164,8 +164,8 @@ subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut) enddo enddo do i=1,n - do k=1,N_states_diag - dav_ut(k,i) = ut(k,i) + do k=1,n_st + dav_ut(k,i) = u(i,k) enddo enddo @@ -285,6 +285,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id) end do + deallocate(idx, vt, st) end subroutine @@ -411,8 +412,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD allocate(v0t(N_states_diag, dav_size)) allocate(s0t(N_states_diag, dav_size)) - v0t = 00.d0 - s0t = 00.d0 + v0t = 0.d0 + s0t = 0.d0 more = 1 @@ -456,23 +457,12 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA) double precision , intent(inout) :: s0(LDA, N_states_diag) - PROVIDE nproc - - !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i) - i = omp_get_thread_num() - if (i == 0 ) then - zmq_collector = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) - call end_zmq_to_qp_run_socket(zmq_collector) - call end_zmq_pull_socket(zmq_socket_pull) - call davidson_miniserver_end() - else if (i == 1 ) then - call davidson_miniserver_run () - else - call davidson_slave_inproc(i) - endif - !$OMP END PARALLEL + zmq_collector = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA) + call end_zmq_to_qp_run_socket(zmq_collector) + call end_zmq_pull_socket(zmq_socket_pull) + call davidson_miniserver_end() end subroutine diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 1901525b..4b36e030 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -122,6 +122,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s stop -1 endif + integer, external :: align_double + sze_8 = align_double(sze) + itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) + PROVIDE nuclear_repulsion expected_s2 call write_time(iunit) @@ -134,6 +138,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s call write_int(iunit,N_st,'Number of states') call write_int(iunit,N_st_diag,'Number of states in diagonalization') call write_int(iunit,sze,'Number of determinants') + r1 = 8.d0*(3.d0*dble(sze_8*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 & + + 4.d0*(N_st_diag*itermax))/(1024.d0**3)) + call write_double(iunit, r1, 'Memory(Gb)') write(iunit,'(A)') '' write_buffer = '===== ' do i=1,N_st @@ -151,14 +158,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s enddo write(iunit,'(A)') trim(write_buffer) - integer, external :: align_double - sze_8 = align_double(sze) - - itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) + allocate( & + ! Large W(sze_8,N_st_diag*itermax), & U(sze_8,N_st_diag*itermax), & S(sze_8,N_st_diag*itermax), & + + ! Small h(N_st_diag*itermax,N_st_diag*itermax), & y(N_st_diag*itermax,N_st_diag*itermax), & s_(N_st_diag*itermax,N_st_diag*itermax), & diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 026921d0..42e61b3a 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -57,8 +57,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - integer :: blockb, blockb2, istep - double precision :: ave_workload, workload, target_workload_inv !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st @@ -286,19 +284,16 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ double precision, intent(in) :: H_jj(n), S2_jj(n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision :: hij,s2 - double precision, allocatable :: ut(:,:) integer :: i,j,k,l, jj,ii integer :: i0, j0, ithread - integer, allocatable :: shortcut(:,:), sort_idx(:) - integer(bit_kind), allocatable :: sorted(:,:), version(:,:) integer(bit_kind) :: sorted_i(Nint) integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate integer :: N_st_8 integer, external :: align_double - integer :: blockb, blockb2, istep + integer :: blockb2, istep double precision :: ave_workload, workload, target_workload_inv integer(ZMQ_PTR) :: handler @@ -311,61 +306,52 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ ASSERT (n>0) PROVIDE ref_bitmask_energy - allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n)) - allocate(ut(N_st_8,n)) - v_0 = 0.d0 s_0 = 0.d0 - do i=1,n - do istate=1,N_st - ut(istate,i) = u_0(i,istate) - enddo - enddo - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) - call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) - - blockb = shortcut(0,1) - call davidson_init(handler,n,N_st_8,ut) + call davidson_init(handler,u_0,size(u_0,1),n,N_st) + + ave_workload = 0.d0 + do sh=1,shortcut_(0,1) + ave_workload += shortcut_(0,1) + ave_workload += (shortcut_(sh+1,1) - shortcut_(sh,1))**2 + do i=sh, shortcut_(0,2), shortcut_(0,1) + do j=i, min(i, shortcut_(0,2)) + ave_workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 + end do + end do + enddo + ave_workload = ave_workload/dble(shortcut_(0,1)) + target_workload_inv = 0.01d0/ave_workload - PROVIDE nproc - - !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread) + + !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, & + !$OMP workload,istep,blockb2) ithread = omp_get_thread_num() if (ithread == 0 ) then - - call zmq_set_running(handler) - ave_workload = 0.d0 - do sh=1,shortcut(0,1) - ave_workload += shortcut(0,1) - ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2 - end do - end do - enddo - ave_workload = ave_workload/dble(shortcut(0,1)) - target_workload_inv = 0.01d0/ave_workload - - - do sh=1,shortcut(0,1),1 - workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2 - do i=sh, shortcut(0,2), shortcut(0,1) - do j=i, min(i, shortcut(0,2)) - workload += (shortcut(j+1,2) - shortcut(j, 2))**2 + do sh=1,shortcut_(0,1),1 + workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2 + do i=sh, shortcut_(0,2), shortcut_(0,1) + do j=i, min(i, shortcut_(0,2)) + workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2 end do end do istep = 1+ int(workload*target_workload_inv) do blockb2=0, istep-1 call davidson_add_task(handler, sh, blockb2, istep) enddo + if (sh == shortcut_(0,1)/10 + 1) then + !$OMP BARRIER + endif enddo + call zmq_set_running(handler) call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then + !$OMP BARRIER call davidson_miniserver_run () else + !$OMP BARRIER call davidson_slave_inproc(ithread) endif !$OMP END PARALLEL @@ -378,8 +364,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) enddo enddo - deallocate(shortcut, sort_idx, sorted, version) - deallocate(ut) end @@ -414,8 +398,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) integer :: N_st_8 integer, external :: align_double - integer :: blockb, blockb2, istep - double precision :: ave_workload, workload, target_workload_inv !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st