10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-11-04 21:24:02 +01:00

real(4) dressing restored

This commit is contained in:
Yann Garniron 2018-05-02 14:32:41 +02:00
parent 727c9a84cd
commit d5f66787fe
5 changed files with 88 additions and 36 deletions

View File

@ -99,10 +99,10 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index
allocate (indices(N_det), & allocate (indices(N_det), &
exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) exc_degree(max(N_det_alpha_unique,N_det_beta_unique)))
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique !PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order !PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns !PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order !PROVIDE psi_bilinear_matrix_transp_order
k=1 k=1
do i=1,N_det_alpha_unique do i=1,N_det_alpha_unique

View File

@ -46,14 +46,11 @@ subroutine run_wf
! Selection ! Selection
! --------- ! ---------
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order PROVIDE psi_bilinear_matrix_transp_order
!!$OMP PARALLEL PRIVATE(i) !!$OMP PARALLEL PRIVATE(i)
!i = omp_get_thread_num() !i = omp_get_thread_num()
! call dress_slave_tcp(i+1, energy) ! call dress_slave_tcp(i+1, energy)

View File

@ -227,8 +227,13 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
if(floop) then if(floop) then
call wall_time(time) call wall_time(time)
print *, "FIRST PULL", time-time0 print *, "FIRST PULL", time-time0
time0 = time
floop = .false. floop = .false.
end if end if
if(cur_cp == -1 .and. ind == N_det_generators) then
call wall_time(time)
print *, "FINISHED_CPL", N_cp-1, time-time0
end if
if(cur_cp == -1) then if(cur_cp == -1) then
@ -260,7 +265,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
end if end if
if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle
print *, "FINISHED CP", cur_cp call wall_time(time)
print *, "FINISHED_CP", cur_cp, time-time0
double precision :: su, su2, eqt, avg, E0, val double precision :: su, su2, eqt, avg, E0, val
integer, external :: zmq_abort integer, external :: zmq_abort
@ -282,10 +289,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp)))
end if end if
call wall_time(time)
print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, ''
if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp-4) then if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then
! Termination ! Termination
print *, "TERMINATE" print *, "TERMINATE"
if (zmq_abort(zmq_to_qp_run_socket) == -1) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then
@ -347,7 +353,7 @@ end function
! gen_per_cp : number of generators per checkpoint ! gen_per_cp : number of generators per checkpoint
END_DOC END_DOC
comb_teeth = 64 comb_teeth = 64
N_cps_max = 32 N_cps_max = 16
gen_per_cp = (N_det_generators / N_cps_max) + 1 gen_per_cp = (N_det_generators / N_cps_max) + 1
END_PROVIDER END_PROVIDER
@ -374,7 +380,6 @@ END_PROVIDER
integer :: nfiller, lfiller, cfiller integer :: nfiller, lfiller, cfiller
logical :: fracted logical :: fracted
integer :: first_suspect integer :: first_suspect
first_suspect = 1 first_suspect = 1
@ -394,11 +399,13 @@ END_PROVIDER
tooth_reduce = 0 tooth_reduce = 0
integer :: fragsize integer :: fragsize
fragsize = N_det_generators / ((N_cps_max+1)*(N_cps_max+2)/2) fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2)
print *, "FRAGSIZE", fragsize
do i=1,N_cps_max do i=1,N_cps_max
cp_limit(i) = fragsize * i * (i+1) / 2 cp_limit(i) = fragsize * i * (i+1) / 2
end do end do
cp_limit(N_cps_max) = N_det*2
print *, "CP_LIMIT", cp_limit print *, "CP_LIMIT", cp_limit
N_dress_jobs = first_det_of_comb - 1 N_dress_jobs = first_det_of_comb - 1
@ -413,12 +420,14 @@ END_PROVIDER
lfiller = 1 lfiller = 1
nfiller = 1 nfiller = 1
do i=1,N_det_generators do i=1,N_det_generators
!print *, i, N_dress_jobs
comb(i) = comb(i) * comb_step comb(i) = comb(i) * comb_step
!DIR$ FORCEINLINE !DIR$ FORCEINLINE
call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs)
!if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then
if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then
print *, "END CUR_CP", cur_cp, N_dress_jobs
first_cp(cur_cp+1) = N_dress_jobs first_cp(cur_cp+1) = N_dress_jobs
done_cp_at(N_dress_jobs) = cur_cp done_cp_at(N_dress_jobs) = cur_cp
cps_N(cur_cp) = dfloat(i) cps_N(cur_cp) = dfloat(i)
@ -427,16 +436,35 @@ END_PROVIDER
cur_cp += 1 cur_cp += 1
end if end if
if (N_dress_jobs == N_det_generators) exit if (N_dress_jobs == N_det_generators) then
exit
end if
end if end if
!!!!!!!!!!!!!!!!!!!!!!!!
if(.FALSE.) then
do l=first_suspect,N_det_generators
if((.not. computed(l))) then
N_dress_jobs+=1
dress_jobs(N_dress_jobs) = l
computed(l) = .true.
first_suspect = l
exit
end if
end do
if (N_dress_jobs == N_det_generators) exit
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ELSE
!!!!!!!!!!!!!!!!!!!!!!!!!!!!
do l=first_suspect,N_det_generators do l=first_suspect,N_det_generators
if((.not. computed(l)) .and. (.not. comp_filler(l))) exit if((.not. computed(l)) .and. (.not. comp_filler(l))) exit
end do end do
first_suspect = l first_suspect = l
if(l > N_det_generators) exit if(l > N_det_generators) cycle
cfiller = tooth_of_det(l) cfiller = tooth_of_det(l)-1
if(cfiller > lfiller) then if(cfiller > lfiller) then
do j=1,nfiller-1 do j=1,nfiller-1
if(.not. computed(filler(j))) then if(.not. computed(filler(j))) then
@ -454,6 +482,8 @@ END_PROVIDER
nfiller += 1 nfiller += 1
end if end if
comp_filler(l) = .True. comp_filler(l) = .True.
end if
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
enddo enddo
@ -463,9 +493,10 @@ END_PROVIDER
dress_jobs(k) = filler(j) dress_jobs(k) = filler(j)
N_dress_jobs = k N_dress_jobs = k
end if end if
computed(filler(j)) = .true. computed(filler(j)) = .true.
end do end do
N_cp = cur_cp N_cp = cur_cp
if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then

View File

@ -48,7 +48,6 @@ subroutine run_dress_slave(thread,iproce,energy)
double precision :: fac double precision :: fac
if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
@ -81,6 +80,9 @@ subroutine run_dress_slave(thread,iproce,energy)
send = .false. send = .false.
done_for = 0 done_for = 0
double precision :: hij, sij
call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij)
print *, E0_denominator(1)
!$OMP PARALLEL DEFAULT(SHARED) & !$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) &
!$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) &
@ -209,10 +211,12 @@ subroutine run_dress_slave(thread,iproce,energy)
do i=0,comb_teeth+1 do i=0,comb_teeth+1
call omp_destroy_lock(lck_det(i)) call omp_destroy_lock(lck_det(i))
end do end do
stop
end subroutine end subroutine
subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
use f77_zmq use f77_zmq
implicit none implicit none
@ -228,10 +232,9 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf,
integer, intent(in) :: ind, cur_cp, task_id integer, intent(in) :: ind, cur_cp, task_id
integer :: rc, i, j, k, l integer :: rc, i, j, k, l
double precision :: contrib(N_states) double precision :: contrib(N_states)
real(4), allocatable :: r4buf(:,:,:)
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push" if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE)
@ -239,14 +242,22 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf,
if(cur_cp /= -1) then if(cur_cp /= -1) then
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det, ZMQ_SNDMORE) allocate(r4buf(N_states, N_det, 2))
if(rc /= 8*N_states*N_det) stop "push" do i=1,2
do j=1,N_det
do k=1,N_states
r4buf(k,j,i) = real(delta_loc(k,j,i), 4)
end do
end do
end do
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 8*N_states*N_det) stop "push" if(rc /= 4*N_states*N_det) stop "push"
rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE)
if(rc /= 4*N_states*N_det) stop "push"
else else
contrib = 0d0 contrib = 0d0
do i=1,N_det do i=1,N_det
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
end do end do
@ -255,7 +266,7 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf,
if(rc /= 8*N_states) stop "push" if(rc /= 8*N_states) stop "push"
N_buf = N_bufi N_buf = N_bufi
!N_buf = (/0,1,0/) N_buf = (/0,1,0/)
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5" if(rc /= 4*3) stop "push5"
@ -294,6 +305,11 @@ IRP_ENDIF
end subroutine end subroutine
BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ]
END_PROVIDER
subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib)
use f77_zmq use f77_zmq
implicit none implicit none
@ -308,8 +324,6 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf,
integer :: rc, i, j, k integer :: rc, i, j, k
integer, intent(out) :: N_buf(3) integer, intent(out) :: N_buf(3)
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
if(rc /= 4) stop "pulla" if(rc /= 4) stop "pulla"
@ -320,11 +334,21 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf,
if(cur_cp /= -1) then if(cur_cp /= -1) then
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det, 0)
if(rc /= 8*N_states*N_det) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det, 0) rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*4*N_det, 0)
if(rc /= 8*N_states*N_det) stop "pulld" if(rc /= 4*N_states*N_det) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*4*N_det, 0)
if(rc /= 4*N_states*N_det) stop "pulld"
do i=1,2
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k)
do j=1,N_det
do k=1,N_states
delta_loc(k,j,i) = real(real4buf(k,j,i), 8)
end do
end do
end do
else else
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
if(rc /= 8*N_states) stop "pullc" if(rc /= 8*N_states) stop "pullc"

View File

@ -283,7 +283,7 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili
haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int)
call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc)