mirror of
https://github.com/LCPQ/quantum_package
synced 2024-12-23 04:43:50 +01:00
Memory in Davidson
This commit is contained in:
parent
da50bc6f72
commit
8b1d083de9
@ -8,6 +8,7 @@ END_PROVIDER
|
|||||||
BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ]
|
BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ]
|
||||||
&BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ]
|
&BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ]
|
||||||
&BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ]
|
&BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ]
|
||||||
|
&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ]
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! cm/<Psi_0|H|D_m> or perturbative 1/Delta_E(m)
|
! cm/<Psi_0|H|D_m> or perturbative 1/Delta_E(m)
|
||||||
@ -15,7 +16,7 @@ END_PROVIDER
|
|||||||
integer :: i,k
|
integer :: i,k
|
||||||
double precision :: ihpsi_current(N_states)
|
double precision :: ihpsi_current(N_states)
|
||||||
integer :: i_pert_count
|
integer :: i_pert_count
|
||||||
double precision :: hii, lambda_pert
|
double precision :: hii, E2(N_states), E2var(N_states)
|
||||||
integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3
|
integer :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3
|
||||||
|
|
||||||
i_pert_count = 0
|
i_pert_count = 0
|
||||||
@ -25,6 +26,8 @@ END_PROVIDER
|
|||||||
lambda_mrcc_pt2(0) = 0
|
lambda_mrcc_pt2(0) = 0
|
||||||
lambda_mrcc_kept(0) = 0
|
lambda_mrcc_kept(0) = 0
|
||||||
|
|
||||||
|
E2 = 0.d0
|
||||||
|
E2var = 0.d0
|
||||||
do i=1,N_det_non_ref
|
do i=1,N_det_non_ref
|
||||||
call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
|
call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
|
||||||
size(psi_ref_coef,1), N_states,ihpsi_current)
|
size(psi_ref_coef,1), N_states,ihpsi_current)
|
||||||
@ -33,24 +36,51 @@ END_PROVIDER
|
|||||||
if (ihpsi_current(k) == 0.d0) then
|
if (ihpsi_current(k) == 0.d0) then
|
||||||
ihpsi_current(k) = 1.d-32
|
ihpsi_current(k) = 1.d-32
|
||||||
endif
|
endif
|
||||||
! lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k)
|
lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k)
|
||||||
lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) )
|
lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii)
|
||||||
lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii)
|
E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii)
|
||||||
if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then
|
E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do i=1,N_det_non_ref
|
||||||
|
call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
|
||||||
|
size(psi_ref_coef,1), N_states,ihpsi_current)
|
||||||
|
call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii)
|
||||||
|
do k=1,N_states
|
||||||
|
if (ihpsi_current(k) == 0.d0) then
|
||||||
|
ihpsi_current(k) = 1.d-32
|
||||||
|
endif
|
||||||
|
lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k)
|
||||||
|
lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k)
|
||||||
|
! lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) )
|
||||||
|
! if ( lambda_pert / lambda_mrcc(k,i) < 0.5d0) then
|
||||||
|
! print *, ' xxx ', dabs(lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k)
|
||||||
|
! if ( dabs( (lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) ) > 1.d-3) then
|
||||||
|
! print *, 'lambda mrcc', lambda_mrcc(k,i)
|
||||||
|
! print *, 'lambda pert', lambda_pert
|
||||||
|
! print *, 'coef: ', psi_non_ref_coef(i,k)
|
||||||
|
! call debug_det(psi_non_ref(1,1,i), N_int)
|
||||||
|
! call i_H_j(psi_ref(1,1,1),psi_non_ref(1,1,i),N_int,hii)
|
||||||
|
! print *, hii
|
||||||
|
! call i_H_j(psi_ref(1,1,2),psi_non_ref(1,1,i),N_int,hii)
|
||||||
|
! print *, hii
|
||||||
|
! print *, '---'
|
||||||
! Ignore lamdba
|
! Ignore lamdba
|
||||||
i_pert_count += 1
|
! i_pert_count += 1
|
||||||
lambda_mrcc(k,i) = 0.d0
|
! lambda_mrcc(k,i) = 0.d0
|
||||||
if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then
|
! lambda_mrcc(k,i) = lambda_pert * E2var(k)/E2(k)
|
||||||
N_lambda_mrcc_pt2 += 1
|
! if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then
|
||||||
lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i
|
! N_lambda_mrcc_pt2 += 1
|
||||||
endif
|
! lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i
|
||||||
else
|
! endif
|
||||||
! Keep lamdba
|
! else
|
||||||
if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then
|
! ! Keep lamdba
|
||||||
N_lambda_mrcc_pt3 += 1
|
! if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then
|
||||||
lambda_mrcc_kept(N_lambda_mrcc_pt3) = i
|
! N_lambda_mrcc_pt3 += 1
|
||||||
endif
|
! lambda_mrcc_kept(N_lambda_mrcc_pt3) = i
|
||||||
endif
|
! endif
|
||||||
|
! endif
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2
|
lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2
|
||||||
@ -784,8 +814,8 @@ END_PROVIDER
|
|||||||
f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
|
f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
|
||||||
|
|
||||||
! Avoid numerical instabilities
|
! Avoid numerical instabilities
|
||||||
f = min(f,2.d0)
|
! f = min(f,2.d0)
|
||||||
f = max(f,-2.d0)
|
! f = max(f,-2.d0)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
|
norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
|
||||||
@ -845,11 +875,39 @@ END_PROVIDER
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
double precision function f_fit(x)
|
||||||
|
implicit none
|
||||||
|
double precision :: x
|
||||||
|
f_fit = 0.d0
|
||||||
|
return
|
||||||
|
if (x < 0.d0) then
|
||||||
|
f_fit = 0.d0
|
||||||
|
else if (x < 1.d0) then
|
||||||
|
f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2))
|
||||||
|
else
|
||||||
|
f_fit = 1.d0
|
||||||
|
endif
|
||||||
|
end
|
||||||
|
|
||||||
double precision function get_dij_index(II, i, s, Nint)
|
double precision function get_dij_index(II, i, s, Nint)
|
||||||
integer, intent(in) :: II, i, s, Nint
|
integer, intent(in) :: II, i, s, Nint
|
||||||
double precision, external :: get_dij
|
double precision, external :: get_dij
|
||||||
double precision :: HIi, phase
|
double precision :: HIi, phase, c, a, b, d
|
||||||
|
|
||||||
|
call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi)
|
||||||
|
call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
|
||||||
|
|
||||||
|
a = lambda_pert(s,i)
|
||||||
|
! b = lambda_mrcc(s,i)
|
||||||
|
! c = f_fit(a/b)
|
||||||
|
|
||||||
|
d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s)
|
||||||
|
|
||||||
|
c = f_fit(a*HIi/d)
|
||||||
|
|
||||||
|
! get_dij_index = HIi * a * c + (1.d0 - c) * d
|
||||||
|
get_dij_index = d
|
||||||
|
return
|
||||||
|
|
||||||
if(lambda_type == 0) then
|
if(lambda_type == 0) then
|
||||||
call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
|
call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
|
||||||
|
@ -98,8 +98,7 @@ END_PROVIDER
|
|||||||
enddo
|
enddo
|
||||||
N_det_non_ref = i_non_ref
|
N_det_non_ref = i_non_ref
|
||||||
if (N_det_non_ref < 1) then
|
if (N_det_non_ref < 1) then
|
||||||
print *, 'Error : All determinants are in the reference'
|
print *, 'Warning : All determinants are in the reference'
|
||||||
stop -1
|
|
||||||
endif
|
endif
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
@ -144,13 +144,13 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
|
|||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
|
subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st)
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
|
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
|
||||||
integer, intent(in) :: n, n_st_8
|
integer, intent(in) :: n0,n, n_st
|
||||||
double precision, intent(in) :: ut(n_st_8,n)
|
double precision, intent(in) :: u(n0,n_st)
|
||||||
integer :: i,k
|
integer :: i,k
|
||||||
|
|
||||||
|
|
||||||
@ -164,8 +164,8 @@ subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
|
|||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
do i=1,n
|
do i=1,n
|
||||||
do k=1,N_states_diag
|
do k=1,n_st
|
||||||
dav_ut(k,i) = ut(k,i)
|
dav_ut(k,i) = u(i,k)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
@ -285,6 +285,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
|||||||
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
||||||
call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
|
call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
|
||||||
end do
|
end do
|
||||||
|
deallocate(idx, vt, st)
|
||||||
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
@ -411,8 +412,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
|
|||||||
allocate(v0t(N_states_diag, dav_size))
|
allocate(v0t(N_states_diag, dav_size))
|
||||||
allocate(s0t(N_states_diag, dav_size))
|
allocate(s0t(N_states_diag, dav_size))
|
||||||
|
|
||||||
v0t = 00.d0
|
v0t = 0.d0
|
||||||
s0t = 00.d0
|
s0t = 0.d0
|
||||||
|
|
||||||
more = 1
|
more = 1
|
||||||
|
|
||||||
@ -456,23 +457,12 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
|
|||||||
double precision , intent(inout) :: s0(LDA, N_states_diag)
|
double precision , intent(inout) :: s0(LDA, N_states_diag)
|
||||||
|
|
||||||
|
|
||||||
PROVIDE nproc
|
|
||||||
|
|
||||||
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
|
|
||||||
i = omp_get_thread_num()
|
|
||||||
if (i == 0 ) then
|
|
||||||
zmq_collector = new_zmq_to_qp_run_socket()
|
zmq_collector = new_zmq_to_qp_run_socket()
|
||||||
zmq_socket_pull = new_zmq_pull_socket()
|
zmq_socket_pull = new_zmq_pull_socket()
|
||||||
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
|
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
|
||||||
call end_zmq_to_qp_run_socket(zmq_collector)
|
call end_zmq_to_qp_run_socket(zmq_collector)
|
||||||
call end_zmq_pull_socket(zmq_socket_pull)
|
call end_zmq_pull_socket(zmq_socket_pull)
|
||||||
call davidson_miniserver_end()
|
call davidson_miniserver_end()
|
||||||
else if (i == 1 ) then
|
|
||||||
call davidson_miniserver_run ()
|
|
||||||
else
|
|
||||||
call davidson_slave_inproc(i)
|
|
||||||
endif
|
|
||||||
!$OMP END PARALLEL
|
|
||||||
|
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
@ -122,6 +122,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
|||||||
stop -1
|
stop -1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
integer, external :: align_double
|
||||||
|
sze_8 = align_double(sze)
|
||||||
|
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
|
||||||
|
|
||||||
PROVIDE nuclear_repulsion expected_s2
|
PROVIDE nuclear_repulsion expected_s2
|
||||||
|
|
||||||
call write_time(iunit)
|
call write_time(iunit)
|
||||||
@ -134,6 +138,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
|||||||
call write_int(iunit,N_st,'Number of states')
|
call write_int(iunit,N_st,'Number of states')
|
||||||
call write_int(iunit,N_st_diag,'Number of states in diagonalization')
|
call write_int(iunit,N_st_diag,'Number of states in diagonalization')
|
||||||
call write_int(iunit,sze,'Number of determinants')
|
call write_int(iunit,sze,'Number of determinants')
|
||||||
|
r1 = 8.d0*(3.d0*dble(sze_8*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
||||||
|
+ 4.d0*(N_st_diag*itermax))/(1024.d0**3))
|
||||||
|
call write_double(iunit, r1, 'Memory(Gb)')
|
||||||
write(iunit,'(A)') ''
|
write(iunit,'(A)') ''
|
||||||
write_buffer = '===== '
|
write_buffer = '===== '
|
||||||
do i=1,N_st
|
do i=1,N_st
|
||||||
@ -151,14 +158,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
|||||||
enddo
|
enddo
|
||||||
write(iunit,'(A)') trim(write_buffer)
|
write(iunit,'(A)') trim(write_buffer)
|
||||||
|
|
||||||
integer, external :: align_double
|
|
||||||
sze_8 = align_double(sze)
|
|
||||||
|
|
||||||
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
|
|
||||||
allocate( &
|
allocate( &
|
||||||
|
! Large
|
||||||
W(sze_8,N_st_diag*itermax), &
|
W(sze_8,N_st_diag*itermax), &
|
||||||
U(sze_8,N_st_diag*itermax), &
|
U(sze_8,N_st_diag*itermax), &
|
||||||
S(sze_8,N_st_diag*itermax), &
|
S(sze_8,N_st_diag*itermax), &
|
||||||
|
|
||||||
|
! Small
|
||||||
h(N_st_diag*itermax,N_st_diag*itermax), &
|
h(N_st_diag*itermax,N_st_diag*itermax), &
|
||||||
y(N_st_diag*itermax,N_st_diag*itermax), &
|
y(N_st_diag*itermax,N_st_diag*itermax), &
|
||||||
s_(N_st_diag*itermax,N_st_diag*itermax), &
|
s_(N_st_diag*itermax,N_st_diag*itermax), &
|
||||||
|
@ -57,8 +57,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
|
|||||||
integer :: N_st_8
|
integer :: N_st_8
|
||||||
|
|
||||||
integer, external :: align_double
|
integer, external :: align_double
|
||||||
integer :: blockb, blockb2, istep
|
|
||||||
double precision :: ave_workload, workload, target_workload_inv
|
|
||||||
|
|
||||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
||||||
|
|
||||||
@ -286,19 +284,16 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
|
|||||||
double precision, intent(in) :: H_jj(n), S2_jj(n)
|
double precision, intent(in) :: H_jj(n), S2_jj(n)
|
||||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||||
double precision :: hij,s2
|
double precision :: hij,s2
|
||||||
double precision, allocatable :: ut(:,:)
|
|
||||||
integer :: i,j,k,l, jj,ii
|
integer :: i,j,k,l, jj,ii
|
||||||
integer :: i0, j0, ithread
|
integer :: i0, j0, ithread
|
||||||
|
|
||||||
integer, allocatable :: shortcut(:,:), sort_idx(:)
|
|
||||||
integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
|
|
||||||
integer(bit_kind) :: sorted_i(Nint)
|
integer(bit_kind) :: sorted_i(Nint)
|
||||||
|
|
||||||
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
||||||
integer :: N_st_8
|
integer :: N_st_8
|
||||||
|
|
||||||
integer, external :: align_double
|
integer, external :: align_double
|
||||||
integer :: blockb, blockb2, istep
|
integer :: blockb2, istep
|
||||||
double precision :: ave_workload, workload, target_workload_inv
|
double precision :: ave_workload, workload, target_workload_inv
|
||||||
|
|
||||||
integer(ZMQ_PTR) :: handler
|
integer(ZMQ_PTR) :: handler
|
||||||
@ -311,61 +306,52 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
|
|||||||
ASSERT (n>0)
|
ASSERT (n>0)
|
||||||
PROVIDE ref_bitmask_energy
|
PROVIDE ref_bitmask_energy
|
||||||
|
|
||||||
allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n))
|
|
||||||
allocate(ut(N_st_8,n))
|
|
||||||
|
|
||||||
v_0 = 0.d0
|
v_0 = 0.d0
|
||||||
s_0 = 0.d0
|
s_0 = 0.d0
|
||||||
|
|
||||||
do i=1,n
|
call davidson_init(handler,u_0,size(u_0,1),n,N_st)
|
||||||
do istate=1,N_st
|
|
||||||
ut(istate,i) = u_0(i,istate)
|
|
||||||
enddo
|
|
||||||
enddo
|
|
||||||
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
|
|
||||||
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
|
|
||||||
|
|
||||||
blockb = shortcut(0,1)
|
|
||||||
call davidson_init(handler,n,N_st_8,ut)
|
|
||||||
|
|
||||||
|
ave_workload = 0.d0
|
||||||
|
do sh=1,shortcut_(0,1)
|
||||||
|
ave_workload += shortcut_(0,1)
|
||||||
|
ave_workload += (shortcut_(sh+1,1) - shortcut_(sh,1))**2
|
||||||
|
do i=sh, shortcut_(0,2), shortcut_(0,1)
|
||||||
|
do j=i, min(i, shortcut_(0,2))
|
||||||
|
ave_workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2
|
||||||
|
end do
|
||||||
|
end do
|
||||||
|
enddo
|
||||||
|
ave_workload = ave_workload/dble(shortcut_(0,1))
|
||||||
|
target_workload_inv = 0.01d0/ave_workload
|
||||||
|
|
||||||
PROVIDE nproc
|
PROVIDE nproc
|
||||||
|
|
||||||
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread)
|
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, &
|
||||||
|
!$OMP workload,istep,blockb2)
|
||||||
ithread = omp_get_thread_num()
|
ithread = omp_get_thread_num()
|
||||||
if (ithread == 0 ) then
|
if (ithread == 0 ) then
|
||||||
|
do sh=1,shortcut_(0,1),1
|
||||||
call zmq_set_running(handler)
|
workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2
|
||||||
ave_workload = 0.d0
|
do i=sh, shortcut_(0,2), shortcut_(0,1)
|
||||||
do sh=1,shortcut(0,1)
|
do j=i, min(i, shortcut_(0,2))
|
||||||
ave_workload += shortcut(0,1)
|
workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2
|
||||||
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
|
||||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
|
||||||
do j=i, min(i, shortcut(0,2))
|
|
||||||
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
|
||||||
end do
|
|
||||||
end do
|
|
||||||
enddo
|
|
||||||
ave_workload = ave_workload/dble(shortcut(0,1))
|
|
||||||
target_workload_inv = 0.01d0/ave_workload
|
|
||||||
|
|
||||||
|
|
||||||
do sh=1,shortcut(0,1),1
|
|
||||||
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
|
|
||||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
|
||||||
do j=i, min(i, shortcut(0,2))
|
|
||||||
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
istep = 1+ int(workload*target_workload_inv)
|
istep = 1+ int(workload*target_workload_inv)
|
||||||
do blockb2=0, istep-1
|
do blockb2=0, istep-1
|
||||||
call davidson_add_task(handler, sh, blockb2, istep)
|
call davidson_add_task(handler, sh, blockb2, istep)
|
||||||
enddo
|
enddo
|
||||||
|
if (sh == shortcut_(0,1)/10 + 1) then
|
||||||
|
!$OMP BARRIER
|
||||||
|
endif
|
||||||
enddo
|
enddo
|
||||||
|
call zmq_set_running(handler)
|
||||||
call davidson_run(handler, v_0, s_0, size(v_0,1))
|
call davidson_run(handler, v_0, s_0, size(v_0,1))
|
||||||
else if (ithread == 1 ) then
|
else if (ithread == 1 ) then
|
||||||
|
!$OMP BARRIER
|
||||||
call davidson_miniserver_run ()
|
call davidson_miniserver_run ()
|
||||||
else
|
else
|
||||||
|
!$OMP BARRIER
|
||||||
call davidson_slave_inproc(ithread)
|
call davidson_slave_inproc(ithread)
|
||||||
endif
|
endif
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
@ -378,8 +364,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
|
|||||||
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
deallocate(shortcut, sort_idx, sorted, version)
|
|
||||||
deallocate(ut)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -414,8 +398,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
|||||||
integer :: N_st_8
|
integer :: N_st_8
|
||||||
|
|
||||||
integer, external :: align_double
|
integer, external :: align_double
|
||||||
integer :: blockb, blockb2, istep
|
|
||||||
double precision :: ave_workload, workload, target_workload_inv
|
|
||||||
|
|
||||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user