10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-11-04 21:24:02 +01:00

reduced bandwidth and checkpoint updates

This commit is contained in:
Yann Garniron 2018-03-27 16:22:14 +02:00
parent a865a842d2
commit 4394c04728
5 changed files with 154 additions and 69 deletions

View File

@ -43,14 +43,13 @@ subroutine run_wf
! Selection ! Selection
! --------- ! ---------
print *, 'dress'
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order PROVIDE psi_bilinear_matrix_transp_order
!$OMP PARALLEL PRIVATE(i) !$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num() i = omp_get_thread_num()
call dress_slave_tcp(i+1, energy) call dress_slave_tcp(i+1, energy)

View File

@ -134,6 +134,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
implicit none implicit none
integer, parameter :: delta_loc_N = 2
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
integer, intent(in) :: istate integer, intent(in) :: istate
@ -144,7 +145,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta(N_states, N_det)
double precision, intent(out) :: delta_s2(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det)
double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) double precision, allocatable :: delta_loc(:,:,:,:), delta_det(:,:,:,:)
double precision, allocatable :: dress_detail(:,:) double precision, allocatable :: dress_detail(:,:)
double precision :: dress_mwen(N_states) double precision :: dress_mwen(N_states)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
@ -154,7 +155,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
integer :: more integer :: more
integer :: i, j, k, i_state, N integer :: i, j, k, i_state, N
integer :: task_id, ind integer :: task_id, ind, inds(delta_loc_N)
double precision, save :: time0 = -1.d0 double precision, save :: time0 = -1.d0
double precision :: time, timeLast, old_tooth double precision :: time, timeLast, old_tooth
double precision, external :: omp_get_wtime double precision, external :: omp_get_wtime
@ -162,12 +163,17 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
integer, allocatable :: parts_to_get(:) integer, allocatable :: parts_to_get(:)
logical, allocatable :: actually_computed(:) logical, allocatable :: actually_computed(:)
integer :: total_computed integer :: total_computed
integer :: delta_loc_cur
double precision :: fac(delta_loc_N) , wei(delta_loc_N)
logical :: ok
delta_loc_cur = 1
delta = 0d0 delta = 0d0
delta_s2 = 0d0 delta_s2 = 0d0
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det))
allocate(delta_loc(N_states, N_det, 2)) allocate(delta_loc(N_states, N_det, 2, delta_loc_N))
dress_detail = 0d0 dress_detail = 0d0
delta_det = 0d0 delta_det = 0d0
cp = 0d0 cp = 0d0
@ -196,58 +202,102 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
cur_cp = 0 cur_cp = 0
old_cur_cp = 0 old_cur_cp = 0
logical :: loop logical :: loop
integer :: felem, felem_loc
loop = .true. loop = .true.
felem = N_det+1
pullLoop : do while (loop) pullLoop : do while (loop)
call pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), task_id, felem_loc)
dress_mwen(:) = 0d0 felem = min(felem_loc, felem)
dress_mwen(:) = 0d0
!!!!! A VERIFIER !!!!!
do i_state=1,N_states
do i=1, N_det
dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(i, i_state)
end do
end do
dress_detail(:, ind) += dress_mwen(:)
do j=1,N_cp !! optimizable
if(cps(ind, j) > 0d0) then
if(tooth_of_det(ind) < cp_first_tooth(j)) stop "coef on supposedely deterministic det"
double precision :: fac
integer :: toothMwen
logical :: fracted
fac = cps(ind, j) / cps_N(j) * dress_weight_inv(ind) * comb_step
cp(1:N_states,1:N_det,j,1) += delta_loc(1:N_states,1:N_det,1) * fac
cp(1:N_states,1:N_det,j,2) += delta_loc(1:N_states,1:N_det,2) * fac
end if
end do
toothMwen = tooth_of_det(ind)
fracted = (toothMwen /= 0)
if(fracted) fracted = (ind == first_det_of_teeth(toothMwen))
if(fracted) then
delta_det(1:N_states,1:N_det,toothMwen-1, 1) = delta_det(1:N_states,1:N_det,toothMwen-1, 1) + delta_loc(1:N_states,1:N_det,1) * (1d0-fractage(toothMwen))
delta_det(1:N_states,1:N_det,toothMwen-1, 2) = delta_det(1:N_states,1:N_det,toothMwen-1, 2) + delta_loc(1:N_states,1:N_det,2) * (1d0-fractage(toothMwen))
delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1) * (fractage(toothMwen))
delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2) * (fractage(toothMwen))
else
delta_det(1:N_states,1:N_det,toothMwen , 1) = delta_det(1:N_states,1:N_det,toothMwen , 1) + delta_loc(1:N_states,1:N_det,1)
delta_det(1:N_states,1:N_det,toothMwen , 2) = delta_det(1:N_states,1:N_det,toothMwen , 2) + delta_loc(1:N_states,1:N_det,2)
end if
parts_to_get(ind) -= 1
if(parts_to_get(ind) == 0) then
actually_computed(ind) = .true.
total_computed += 1
end if
integer, external :: zmq_delete_tasks integer, external :: zmq_delete_tasks
if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then
stop 'Unable to delete tasks' stop 'Unable to delete tasks'
endif endif
if(more == 0) loop = .false. if(more == 0) loop = .false.
do i_state=1,N_states
do i=1, N_det
dress_mwen(i_state) += delta_loc(i_state, i, 1, delta_loc_cur) * psi_coef(i, i_state)
end do
end do
dress_detail(:, ind) += dress_mwen(:)
wei(delta_loc_cur) = dress_weight_inv(ind)
inds(delta_loc_cur) = ind
if(delta_loc_cur == delta_loc_N .or. .not. loop) then
do j=1,N_cp !! optimizable
fac = 0d0
ok = .false.
do i=1,delta_loc_cur
!fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step
fac(i) = cps(inds(i), j) * wei(i) * comb_step
if(fac(i) /= 0d0) ok = .true.
end do
if(ok) then
do i=felem,N_det
cp(:,i,j,1) += delta_loc(:,i,1,1) * fac(1) &
+ delta_loc(:,i,1,2) * fac(2)
!+ delta_loc(:,i,1,3) * fac(3) &
!+ delta_loc(:,i,1,4) * fac(4) &
!+ delta_loc(:,i,1,5) * fac(5) &
!+ delta_loc(:,i,1,6) * fac(6) &
!+ delta_loc(:,i,1,7) * fac(7) &
!+ delta_loc(:,i,1,8) * fac(8)
cp(:,i,j,1) += delta_loc(:,i,2,1) * fac(1) &
+ delta_loc(:,i,2,2) * fac(2)
!+ delta_loc(:,i,2,3) * fac(3) &
!+ delta_loc(:,i,2,4) * fac(4) &
!+ delta_loc(:,i,2,5) * fac(5) &
!+ delta_loc(:,i,2,6) * fac(6) &
!+ delta_loc(:,i,2,7) * fac(7) &
!+ delta_loc(:,i,2,8) * fac(8)
end do
!cp(1:N_states,indi:N_det,j,1) += delta_loc(1:N_states,indi:N_det,1) * fac
!cp(1:N_states,indi:N_det,j,2) += delta_loc(1:N_states,indi:N_det,2) * fac
end if
end do
do i=1,delta_loc_cur
logical :: fracted
integer :: toothMwen
ind = inds(i)
toothMwen = tooth_of_det(ind)
fracted = (toothMwen /= 0)
if(fracted) fracted = (ind == first_det_of_teeth(toothMwen))
if(fracted) then
delta_det(1:N_states,felem:N_det,toothMwen-1, 1) = delta_det(1:N_states,felem:N_det,toothMwen-1, 1) + delta_loc(felem:N_states,1:N_det,1,i) * (1d0-fractage(toothMwen))
delta_det(1:N_states,felem:N_det,toothMwen-1, 2) = delta_det(1:N_states,felem:N_det,toothMwen-1, 2) + delta_loc(felem:N_states,1:N_det,2,i) * (1d0-fractage(toothMwen))
delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(felem:N_states,1:N_det,1,i) * (fractage(toothMwen))
delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(felem:N_states,1:N_det,2,i) * (fractage(toothMwen))
else
delta_det(1:N_states,felem:N_det,toothMwen , 1) = delta_det(1:N_states,felem:N_det,toothMwen , 1) + delta_loc(1:N_states,felem:N_det,1,i)
delta_det(1:N_states,felem:N_det,toothMwen , 2) = delta_det(1:N_states,felem:N_det,toothMwen , 2) + delta_loc(1:N_states,felem:N_det,2,i)
end if
parts_to_get(ind) -= 1
if(parts_to_get(ind) == 0) then
actually_computed(ind) = .true.
total_computed += 1
end if
end do
felem = N_det+1
delta_loc_cur = 1
else
delta_loc_cur += 1
cycle
end if
time = omp_get_wtime() time = omp_get_wtime()
if((time - timeLast > 2d0) .or. (.not. loop)) then if((time - timeLast > 2d0) .or. (.not. loop)) then
@ -303,18 +353,22 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
endif endif
end if end if
end do pullLoop end do pullLoop
delta (1:N_states,1:N_det) = 0d0
delta_s2(1:N_states,1:N_det) = 0d0
if(total_computed == N_det_generators) then if(total_computed == N_det_generators) then
delta (1:N_states,1:N_det) = 0d0
delta_s2(1:N_states,1:N_det) = 0d0
do i=comb_teeth+1,0,-1 do i=comb_teeth+1,0,-1
delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1)
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2)
end do end do
else else
do i=1,cur_cp
delta (1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,1) delta (1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,1)
delta_s2(1:N_states,1:N_det) = cp(1:N_states,1:N_det,cur_cp,2) delta_s2(1:N_states,1:N_det) += cp(1:N_states,1:N_det,i,2)
end do
delta (1:N_states,1:N_det) = delta(1:N_states,1:N_det) / cps_N(cur_cp)
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) / cps_N(cur_cp)
do i=cp_first_tooth(cur_cp)-1,0,-1 do i=cp_first_tooth(cur_cp)-1,0,-1
delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1) delta (1:N_states,1:N_det) = delta (1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,1)
delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2) delta_s2(1:N_states,1:N_det) = delta_s2(1:N_states,1:N_det) + delta_det(1:N_states,1:N_det,i,2)
@ -363,7 +417,7 @@ end function
! gen_per_cp : number of generators per checkpoint ! gen_per_cp : number of generators per checkpoint
END_DOC END_DOC
comb_teeth = 64 comb_teeth = 64
N_cps_max = 64 N_cps_max = 32
gen_per_cp = (N_det_generators / N_cps_max) + 1 gen_per_cp = (N_det_generators / N_cps_max) + 1
END_PROVIDER END_PROVIDER
@ -455,13 +509,19 @@ END_PROVIDER
end if end if
end do end do
end do end do
cps(:, N_cp) = 0d0
cp_first_tooth(N_cp) = comb_teeth+1 cp_first_tooth(N_cp) = comb_teeth+1
iorder = -1 iorder = -1
do i=1,N_cp-1 do i=1,N_cp-1
call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i))
end do end do
do i=1,N_det_generators
do j=N_cp,2,-1
cps(i,j) -= cps(i,j-1)
end do
end do
cps(:, N_cp) = 0d0
END_PROVIDER END_PROVIDER

View File

@ -101,6 +101,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ]
! errr = 1d-4 ! errr = 1d-4
! end if ! end if
relative_error = 1.d-5 relative_error = 1.d-5
call write_double(6,relative_error,"Convergence of the stochastic algorithm") call write_double(6,relative_error,"Convergence of the stochastic algorithm")
call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error))

View File

@ -74,15 +74,30 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id)
integer(ZMQ_PTR), intent(in) :: zmq_socket_push integer(ZMQ_PTR), intent(in) :: zmq_socket_push
double precision, intent(in) :: delta_loc(N_states, N_det, 2) double precision, intent(in) :: delta_loc(N_states, N_det, 2)
integer, intent(in) :: ind, task_id integer, intent(in) :: ind, task_id
integer :: rc, i integer :: rc, i, j, felem
felem = 1
dloop : do i=1, N_det
do j=1,N_states
if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then
felem = i
exit dloop
end if
end do
end do dloop
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push" if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE)
if(rc /= 8*N_states*(N_det+1-felem)) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE)
rc = f77_zmq_send( zmq_socket_push, delta_loc, 8*N_states*N_det*2, ZMQ_SNDMORE) if(rc /= 8*N_states*(N_det+1-felem)) stop "push"
if(rc /= 8*N_states*N_det*2) stop "push"
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "push" if(rc /= 4) stop "push"
@ -97,11 +112,12 @@ IRP_ENDIF
end subroutine end subroutine
subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id) subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id, felem)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
double precision, intent(inout) :: delta_loc(N_states, N_det, 2) double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
integer, intent(out) :: felem
integer, intent(out) :: ind integer, intent(out) :: ind
integer, intent(out) :: task_id integer, intent(out) :: task_id
integer :: rc, i integer :: rc, i
@ -110,8 +126,16 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, task_id)
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
if(rc /= 4) stop "pull" if(rc /= 4) stop "pull"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc, N_states*8*N_det*2, 0) rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0)
if(rc /= 8*N_states*N_det*2) stop "pull" if(rc /= 4) stop "pull"
delta_loc(:,:felem,:) = 0d0
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0)
if(rc /= 8*N_states*(N_det+1-felem)) stop "pull"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0)
if(rc /= 8*N_states*(N_det+1-felem)) stop "pull"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "pull" if(rc /= 4) stop "pull"

View File

@ -75,7 +75,7 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha)
delta_ij_loc = 0d0 delta_ij_loc = 0d0
!$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC,1) PRIVATE(i, j, iproc, n_minilist, ex) & !$OMP PARALLEL DO DEFAULT(SHARED) SCHEDULE(STATIC) PRIVATE(i, j, iproc, n_minilist, ex) &
!$OMP PRIVATE(det_minilist, minilist, haa, contrib) & !$OMP PRIVATE(det_minilist, minilist, haa, contrib) &
!$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok) !$OMP PRIVATE(exc, h1, h2, p1, p2, s1, s2, phase, degree, ok)
do i=n_alpha,1,-1 do i=n_alpha,1,-1
@ -115,10 +115,11 @@ subroutine undress_with_alpha(old_generators, old_det_gen, alpha, n_alpha)
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
do i=1,Nproc do i=2,Nproc
delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,i) delta_ij_loc(:,:,:,1) += delta_ij_loc(:,:,:,i)
!print *, "DELTA_IJ_LOC", delta_ij_loc(:,2:5,2,i)
end do end do
delta_ij_tmp(:,:,:) -= delta_ij_loc(:,:,:,1)
end subroutine end subroutine