10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-10 13:08:23 +01:00

Memory in Davidson

This commit is contained in:
Anthony Scemama 2017-02-27 10:43:57 +01:00
parent da50bc6f72
commit 8b1d083de9
5 changed files with 134 additions and 98 deletions

View File

@ -8,6 +8,7 @@ END_PROVIDER
BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ] BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ]
&BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ]
&BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ] &BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ]
&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ]
implicit none implicit none
BEGIN_DOC BEGIN_DOC
! cm/<Psi_0|H|D_m> or perturbative 1/Delta_E(m) ! cm/<Psi_0|H|D_m> or perturbative 1/Delta_E(m)
@ -15,7 +16,7 @@ END_PROVIDER
integer :: i,k integer :: i,k
double precision :: ihpsi_current(N_states) double precision :: ihpsi_current(N_states)
integer :: i_pert_count integer :: i_pert_count
double precision :: hii, lambda_pert double precision :: hii, E2(N_states), E2var(N_states)
integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3 integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3
i_pert_count = 0 i_pert_count = 0
@ -25,6 +26,8 @@ END_PROVIDER
lambda_mrcc_pt2(0) = 0 lambda_mrcc_pt2(0) = 0
lambda_mrcc_kept(0) = 0 lambda_mrcc_kept(0) = 0
E2 = 0.d0
E2var = 0.d0
do i=1,N_det_non_ref do i=1,N_det_non_ref
call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,& call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
size(psi_ref_coef,1), N_states,ihpsi_current) size(psi_ref_coef,1), N_states,ihpsi_current)
@ -33,24 +36,51 @@ END_PROVIDER
if (ihpsi_current(k) == 0.d0) then if (ihpsi_current(k) == 0.d0) then
ihpsi_current(k) = 1.d-32 ihpsi_current(k) = 1.d-32
endif endif
! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k)
lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii)
lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii)
if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k)
! Ignore lamdba enddo
i_pert_count += 1 enddo
lambda_mrcc(k,i) = 0.d0
if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then do i=1,N_det_non_ref
N_lambda_mrcc_pt2 += 1 call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i size(psi_ref_coef,1), N_states,ihpsi_current)
endif call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii)
else do k=1,N_states
! Keep lamdba if (ihpsi_current(k) == 0.d0) then
if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then ihpsi_current(k) = 1.d-32
N_lambda_mrcc_pt3 += 1
lambda_mrcc_kept(N_lambda_mrcc_pt3) = i
endif
endif endif
lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k)
lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k)
! lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) )
! if ( lambda_pert / lambda_mrcc(k,i) < 0.5d0) then
! print *, ' xxx ', dabs(lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k)
! if ( dabs( (lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) ) > 1.d-3) then
! print *, 'lambda mrcc', lambda_mrcc(k,i)
! print *, 'lambda pert', lambda_pert
! print *, 'coef: ', psi_non_ref_coef(i,k)
! call debug_det(psi_non_ref(1,1,i), N_int)
! call i_H_j(psi_ref(1,1,1),psi_non_ref(1,1,i),N_int,hii)
! print *, hii
! call i_H_j(psi_ref(1,1,2),psi_non_ref(1,1,i),N_int,hii)
! print *, hii
! print *, '---'
! Ignore lamdba
! i_pert_count += 1
! lambda_mrcc(k,i) = 0.d0
! lambda_mrcc(k,i) = lambda_pert * E2var(k)/E2(k)
! if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then
! N_lambda_mrcc_pt2 += 1
! lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i
! endif
! else
! ! Keep lamdba
! if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then
! N_lambda_mrcc_pt3 += 1
! lambda_mrcc_kept(N_lambda_mrcc_pt3) = i
! endif
! endif
enddo enddo
enddo enddo
lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2 lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2
@ -784,8 +814,8 @@ END_PROVIDER
f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
! Avoid numerical instabilities ! Avoid numerical instabilities
f = min(f,2.d0) ! f = min(f,2.d0)
f = max(f,-2.d0) ! f = max(f,-2.d0)
endif endif
norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
@ -845,11 +875,39 @@ END_PROVIDER
double precision function f_fit(x)
implicit none
double precision :: x
f_fit = 0.d0
return
if (x < 0.d0) then
f_fit = 0.d0
else if (x < 1.d0) then
f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2))
else
f_fit = 1.d0
endif
end
double precision function get_dij_index(II, i, s, Nint) double precision function get_dij_index(II, i, s, Nint)
integer, intent(in) :: II, i, s, Nint integer, intent(in) :: II, i, s, Nint
double precision, external :: get_dij double precision, external :: get_dij
double precision :: HIi, phase double precision :: HIi, phase, c, a, b, d
call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi)
call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
a = lambda_pert(s,i)
! b = lambda_mrcc(s,i)
! c = f_fit(a/b)
d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s)
c = f_fit(a*HIi/d)
! get_dij_index = HIi * a * c + (1.d0 - c) * d
get_dij_index = d
return
if(lambda_type == 0) then if(lambda_type == 0) then
call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)

View File

@ -98,8 +98,7 @@ END_PROVIDER
enddo enddo
N_det_non_ref = i_non_ref N_det_non_ref = i_non_ref
if (N_det_non_ref < 1) then if (N_det_non_ref < 1) then
print *, 'Error : All determinants are in the reference' print *, 'Warning : All determinants are in the reference'
stop -1
endif endif
END_PROVIDER END_PROVIDER

View File

@ -144,13 +144,13 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
end subroutine end subroutine
subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut) subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
integer, intent(in) :: n, n_st_8 integer, intent(in) :: n0,n, n_st
double precision, intent(in) :: ut(n_st_8,n) double precision, intent(in) :: u(n0,n_st)
integer :: i,k integer :: i,k
@ -164,8 +164,8 @@ subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
enddo enddo
enddo enddo
do i=1,n do i=1,n
do k=1,N_states_diag do k=1,n_st
dav_ut(k,i) = ut(k,i) dav_ut(k,i) = u(i,k)
enddo enddo
enddo enddo
@ -285,6 +285,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id) call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
end do end do
deallocate(idx, vt, st)
end subroutine end subroutine
@ -411,8 +412,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
allocate(v0t(N_states_diag, dav_size)) allocate(v0t(N_states_diag, dav_size))
allocate(s0t(N_states_diag, dav_size)) allocate(s0t(N_states_diag, dav_size))
v0t = 00.d0 v0t = 0.d0
s0t = 00.d0 s0t = 0.d0
more = 1 more = 1
@ -456,23 +457,12 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
double precision , intent(inout) :: s0(LDA, N_states_diag) double precision , intent(inout) :: s0(LDA, N_states_diag)
PROVIDE nproc zmq_collector = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i) call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
i = omp_get_thread_num() call end_zmq_to_qp_run_socket(zmq_collector)
if (i == 0 ) then call end_zmq_pull_socket(zmq_socket_pull)
zmq_collector = new_zmq_to_qp_run_socket() call davidson_miniserver_end()
zmq_socket_pull = new_zmq_pull_socket()
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
call end_zmq_to_qp_run_socket(zmq_collector)
call end_zmq_pull_socket(zmq_socket_pull)
call davidson_miniserver_end()
else if (i == 1 ) then
call davidson_miniserver_run ()
else
call davidson_slave_inproc(i)
endif
!$OMP END PARALLEL
end subroutine end subroutine

View File

@ -122,6 +122,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
stop -1 stop -1
endif endif
integer, external :: align_double
sze_8 = align_double(sze)
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
PROVIDE nuclear_repulsion expected_s2 PROVIDE nuclear_repulsion expected_s2
call write_time(iunit) call write_time(iunit)
@ -134,6 +138,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
call write_int(iunit,N_st,'Number of states') call write_int(iunit,N_st,'Number of states')
call write_int(iunit,N_st_diag,'Number of states in diagonalization') call write_int(iunit,N_st_diag,'Number of states in diagonalization')
call write_int(iunit,sze,'Number of determinants') call write_int(iunit,sze,'Number of determinants')
r1 = 8.d0*(3.d0*dble(sze_8*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
+ 4.d0*(N_st_diag*itermax))/(1024.d0**3))
call write_double(iunit, r1, 'Memory(Gb)')
write(iunit,'(A)') '' write(iunit,'(A)') ''
write_buffer = '===== ' write_buffer = '===== '
do i=1,N_st do i=1,N_st
@ -151,14 +158,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
enddo enddo
write(iunit,'(A)') trim(write_buffer) write(iunit,'(A)') trim(write_buffer)
integer, external :: align_double
sze_8 = align_double(sze)
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
allocate( & allocate( &
! Large
W(sze_8,N_st_diag*itermax), & W(sze_8,N_st_diag*itermax), &
U(sze_8,N_st_diag*itermax), & U(sze_8,N_st_diag*itermax), &
S(sze_8,N_st_diag*itermax), & S(sze_8,N_st_diag*itermax), &
! Small
h(N_st_diag*itermax,N_st_diag*itermax), & h(N_st_diag*itermax,N_st_diag*itermax), &
y(N_st_diag*itermax,N_st_diag*itermax), & y(N_st_diag*itermax,N_st_diag*itermax), &
s_(N_st_diag*itermax,N_st_diag*itermax), & s_(N_st_diag*itermax,N_st_diag*itermax), &

View File

@ -57,8 +57,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
integer :: N_st_8 integer :: N_st_8
integer, external :: align_double integer, external :: align_double
integer :: blockb, blockb2, istep
double precision :: ave_workload, workload, target_workload_inv
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
@ -286,19 +284,16 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
double precision, intent(in) :: H_jj(n), S2_jj(n) double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2 double precision :: hij,s2
double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii integer :: i,j,k,l, jj,ii
integer :: i0, j0, ithread integer :: i0, j0, ithread
integer, allocatable :: shortcut(:,:), sort_idx(:)
integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
integer(bit_kind) :: sorted_i(Nint) integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8 integer :: N_st_8
integer, external :: align_double integer, external :: align_double
integer :: blockb, blockb2, istep integer :: blockb2, istep
double precision :: ave_workload, workload, target_workload_inv double precision :: ave_workload, workload, target_workload_inv
integer(ZMQ_PTR) :: handler integer(ZMQ_PTR) :: handler
@ -311,61 +306,52 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
ASSERT (n>0) ASSERT (n>0)
PROVIDE ref_bitmask_energy PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n))
allocate(ut(N_st_8,n))
v_0 = 0.d0 v_0 = 0.d0
s_0 = 0.d0 s_0 = 0.d0
do i=1,n call davidson_init(handler,u_0,size(u_0,1),n,N_st)
do istate=1,N_st
ut(istate,i) = u_0(i,istate) ave_workload = 0.d0
enddo do sh=1,shortcut_(0,1)
ave_workload += shortcut_(0,1)
ave_workload += (shortcut_(sh+1,1) - shortcut_(sh,1))**2
do i=sh, shortcut_(0,2), shortcut_(0,1)
do j=i, min(i, shortcut_(0,2))
ave_workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2
end do
end do
enddo enddo
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) ave_workload = ave_workload/dble(shortcut_(0,1))
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) target_workload_inv = 0.01d0/ave_workload
blockb = shortcut(0,1)
call davidson_init(handler,n,N_st_8,ut)
PROVIDE nproc PROVIDE nproc
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread) !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, &
!$OMP workload,istep,blockb2)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
if (ithread == 0 ) then if (ithread == 0 ) then
do sh=1,shortcut_(0,1),1
call zmq_set_running(handler) workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2
ave_workload = 0.d0 do i=sh, shortcut_(0,2), shortcut_(0,1)
do sh=1,shortcut(0,1) do j=i, min(i, shortcut_(0,2))
ave_workload += shortcut(0,1) workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
enddo
ave_workload = ave_workload/dble(shortcut(0,1))
target_workload_inv = 0.01d0/ave_workload
do sh=1,shortcut(0,1),1
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do end do
end do end do
istep = 1+ int(workload*target_workload_inv) istep = 1+ int(workload*target_workload_inv)
do blockb2=0, istep-1 do blockb2=0, istep-1
call davidson_add_task(handler, sh, blockb2, istep) call davidson_add_task(handler, sh, blockb2, istep)
enddo enddo
if (sh == shortcut_(0,1)/10 + 1) then
!$OMP BARRIER
endif
enddo enddo
call zmq_set_running(handler)
call davidson_run(handler, v_0, s_0, size(v_0,1)) call davidson_run(handler, v_0, s_0, size(v_0,1))
else if (ithread == 1 ) then else if (ithread == 1 ) then
!$OMP BARRIER
call davidson_miniserver_run () call davidson_miniserver_run ()
else else
!$OMP BARRIER
call davidson_slave_inproc(ithread) call davidson_slave_inproc(ithread)
endif endif
!$OMP END PARALLEL !$OMP END PARALLEL
@ -378,8 +364,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate) s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
enddo enddo
enddo enddo
deallocate(shortcut, sort_idx, sorted, version)
deallocate(ut)
end end
@ -414,8 +398,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
integer :: N_st_8 integer :: N_st_8
integer, external :: align_double integer, external :: align_double
integer :: blockb, blockb2, istep
double precision :: ave_workload, workload, target_workload_inv
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st