From d5f66787fe4305af227bea09e6e57b7ece0ddf3f Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 2 May 2018 14:32:41 +0200 Subject: [PATCH] real(4) dressing restored --- plugins/dress_zmq/alpha_factory.irp.f | 8 +-- plugins/dress_zmq/dress_slave.irp.f | 3 - plugins/dress_zmq/dress_stoch_routines.irp.f | 51 +++++++++++++---- plugins/dress_zmq/run_dress_slave.irp.f | 60 ++++++++++++++------ plugins/shiftedbk/shifted_bk_routines.irp.f | 2 +- 5 files changed, 88 insertions(+), 36 deletions(-) diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index ccbf177a..1cb286fc 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -99,10 +99,10 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate (indices(N_det), & exc_degree(max(N_det_alpha_unique,N_det_beta_unique))) - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique - PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order - PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns - PROVIDE psi_bilinear_matrix_transp_order + !PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + !PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + !PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + !PROVIDE psi_bilinear_matrix_transp_order k=1 do i=1,N_det_alpha_unique diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 10453d2a..b752507b 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -46,14 +46,11 @@ subroutine run_wf ! Selection ! --------- - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) - PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - !!$OMP PARALLEL PRIVATE(i) !i = omp_get_thread_num() ! call dress_slave_tcp(i+1, energy) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 25bec079..a3c59976 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -227,8 +227,13 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, if(floop) then call wall_time(time) print *, "FIRST PULL", time-time0 + time0 = time floop = .false. end if + if(cur_cp == -1 .and. ind == N_det_generators) then + call wall_time(time) + print *, "FINISHED_CPL", N_cp-1, time-time0 + end if if(cur_cp == -1) then @@ -260,7 +265,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end if if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle - print *, "FINISHED CP", cur_cp + call wall_time(time) + + print *, "FINISHED_CP", cur_cp, time-time0 double precision :: su, su2, eqt, avg, E0, val integer, external :: zmq_abort @@ -282,10 +289,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) end if - call wall_time(time) print '(2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp-4) then + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. cur_cp == N_cp) then ! Termination print *, "TERMINATE" if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -347,7 +353,7 @@ end function ! gen_per_cp : number of generators per checkpoint END_DOC comb_teeth = 64 - N_cps_max = 32 + N_cps_max = 16 gen_per_cp = (N_det_generators / N_cps_max) + 1 END_PROVIDER @@ -373,7 +379,6 @@ END_PROVIDER integer, allocatable :: filler(:) integer :: nfiller, lfiller, cfiller logical :: fracted - integer :: first_suspect first_suspect = 1 @@ -394,11 +399,13 @@ END_PROVIDER tooth_reduce = 0 integer :: fragsize - fragsize = N_det_generators / ((N_cps_max+1)*(N_cps_max+2)/2) + fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) + print *, "FRAGSIZE", fragsize do i=1,N_cps_max cp_limit(i) = fragsize * i * (i+1) / 2 end do + cp_limit(N_cps_max) = N_det*2 print *, "CP_LIMIT", cp_limit N_dress_jobs = first_det_of_comb - 1 @@ -413,12 +420,14 @@ END_PROVIDER lfiller = 1 nfiller = 1 do i=1,N_det_generators + !print *, i, N_dress_jobs comb(i) = comb(i) * comb_step !DIR$ FORCEINLINE call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then + print *, "END CUR_CP", cur_cp, N_dress_jobs first_cp(cur_cp+1) = N_dress_jobs done_cp_at(N_dress_jobs) = cur_cp cps_N(cur_cp) = dfloat(i) @@ -427,16 +436,35 @@ END_PROVIDER cur_cp += 1 end if - if (N_dress_jobs == N_det_generators) exit + if (N_dress_jobs == N_det_generators) then + exit + end if end if + + !!!!!!!!!!!!!!!!!!!!!!!! + if(.FALSE.) then + do l=first_suspect,N_det_generators + if((.not. computed(l))) then + N_dress_jobs+=1 + dress_jobs(N_dress_jobs) = l + computed(l) = .true. + first_suspect = l + exit + end if + end do + if (N_dress_jobs == N_det_generators) exit + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ELSE + !!!!!!!!!!!!!!!!!!!!!!!!!!!! do l=first_suspect,N_det_generators if((.not. computed(l)) .and. (.not. comp_filler(l))) exit end do first_suspect = l - if(l > N_det_generators) exit + if(l > N_det_generators) cycle - cfiller = tooth_of_det(l) + cfiller = tooth_of_det(l)-1 if(cfiller > lfiller) then do j=1,nfiller-1 if(.not. computed(filler(j))) then @@ -454,6 +482,8 @@ END_PROVIDER nfiller += 1 end if comp_filler(l) = .True. + end if + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! enddo @@ -463,9 +493,10 @@ END_PROVIDER dress_jobs(k) = filler(j) N_dress_jobs = k end if - computed(filler(j)) = .true. + computed(filler(j)) = .true. end do + N_cp = cur_cp if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index c38b2c90..339f78b7 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -46,8 +46,7 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: toothMwen logical :: fracted double precision :: fac - - + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" @@ -81,6 +80,9 @@ subroutine run_dress_slave(thread,iproce,energy) send = .false. done_for = 0 + double precision :: hij, sij + call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) + print *, E0_denominator(1) !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & @@ -208,11 +210,13 @@ subroutine run_dress_slave(thread,iproce,energy) end do do i=0,comb_teeth+1 call omp_destroy_lock(lck_det(i)) - end do + end do + stop end subroutine + subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) use f77_zmq implicit none @@ -228,10 +232,9 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, integer, intent(in) :: ind, cur_cp, task_id integer :: rc, i, j, k, l double precision :: contrib(N_states) + real(4), allocatable :: r4buf(:,:,:) - - - rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) + rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push" rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) @@ -239,14 +242,22 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, if(cur_cp /= -1) then - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det) stop "push" + allocate(r4buf(N_states, N_det, 2)) + do i=1,2 + do j=1,N_det + do k=1,N_states + r4buf(k,j,i) = real(delta_loc(k,j,i), 4) + end do + end do + end do - rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det, ZMQ_SNDMORE) - if(rc /= 8*N_states*N_det) stop "push" + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), 4*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 4*N_states*N_det) stop "push" + + rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), 4*N_states*N_det, ZMQ_SNDMORE) + if(rc /= 4*N_states*N_det) stop "push" else contrib = 0d0 - do i=1,N_det contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) end do @@ -255,7 +266,7 @@ subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, if(rc /= 8*N_states) stop "push" N_buf = N_bufi - !N_buf = (/0,1,0/) + N_buf = (/0,1,0/) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) if(rc /= 4*3) stop "push5" @@ -294,6 +305,11 @@ IRP_ENDIF end subroutine +BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ] + +END_PROVIDER + + subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) use f77_zmq implicit none @@ -308,8 +324,6 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, integer :: rc, i, j, k integer, intent(out) :: N_buf(3) - - rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) if(rc /= 4) stop "pulla" @@ -320,11 +334,21 @@ subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, if(cur_cp /= -1) then - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det, 0) - if(rc /= 8*N_states*N_det) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*4*N_det, 0) + if(rc /= 4*N_states*N_det) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det, 0) - if(rc /= 8*N_states*N_det) stop "pulld" + rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*4*N_det, 0) + if(rc /= 4*N_states*N_det) stop "pulld" + + do i=1,2 + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) + do j=1,N_det + do k=1,N_states + delta_loc(k,j,i) = real(real4buf(k,j,i), 8) + end do + end do + end do else rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) if(rc /= 8*N_states) stop "pullc" diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 7213e831..56c86a91 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -283,7 +283,7 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) + haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc)