From 2e3c54e278e949fd430fa71698e5e9104d6a5adf Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Apr 2018 11:32:27 +0200 Subject: [PATCH] fixed custom buffers --- plugins/dress_zmq/dress_general.irp.f | 2 +- plugins/dress_zmq/dress_slave.irp.f | 6 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 +- plugins/dress_zmq/dress_zmq_routines.irp.f | 2 + plugins/dress_zmq/dressing.irp.f | 4 +- plugins/dress_zmq/run_dress_slave.irp.f | 123 ++++++++++--------- plugins/shiftedbk/shifted_bk.irp.f | 2 +- plugins/shiftedbk/shifted_bk_routines.irp.f | 99 +++++++++------ plugins/shiftedbk/shifted_bk_slave.irp.f | 2 +- 9 files changed, 142 insertions(+), 100 deletions(-) diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index e31f1742..96bba521 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -45,7 +45,7 @@ subroutine run_dressing(N_st,energy) do i=1,N_st if(.true.) call write_double(6,ci_energy_dressed(i),"Energy") enddo - call diagonalize_ci_dressed + if(.true.) call diagonalize_ci_dressed E_new = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) delta_E = (E_new - E_old)/dble(N_states) diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index feea575e..ff003a21 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -6,6 +6,10 @@ subroutine dress_slave read_wf = .False. distributed_davidson = .False. SOFT_TOUCH read_wf distributed_davidson + + threshold_selectors = 1.d0 + threshold_generators = 1d0 + call provide_everything call switch_qp_run_to_master call run_wf @@ -67,6 +71,6 @@ subroutine dress_slave_tcp(i,energy) integer, intent(in) :: i logical :: lstop lstop = .False. - call run_dress_slave(0,i,energy,lstop) + call run_dress_slave(0,i,energy) end diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 6262920a..80c93e84 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -211,7 +211,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, felem = N_det+1 pullLoop : do while (loop) call pull_dress_results(zmq_socket_pull, ind, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) - call dress_pulled(int_buf, double_buf, det_buf, N_buf) + call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) felem = min(felem_loc, felem) dress_mwen(:) = 0d0 diff --git a/plugins/dress_zmq/dress_zmq_routines.irp.f b/plugins/dress_zmq/dress_zmq_routines.irp.f index bde2c6d8..dc47eb20 100644 --- a/plugins/dress_zmq/dress_zmq_routines.irp.f +++ b/plugins/dress_zmq/dress_zmq_routines.irp.f @@ -2,6 +2,8 @@ subroutine dress_zmq() implicit none double precision, allocatable :: energy(:) allocate (energy(N_states)) + threshold_selectors = 1.d0 + threshold_generators = 1d0 read_wf = .True. SOFT_TOUCH read_wf diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 0a640d49..85279029 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -93,8 +93,8 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] delta_ij_tmp = 0d0 E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion - threshold_selectors = 1.d0 - threshold_generators = 1d0 + !threshold_selectors = 1.d0 + !:threshold_generators = 1d0 ! if(errr /= 0d0) then ! errr = errr / 2d0 ! else diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 375829df..c7e0ed0c 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -36,6 +36,15 @@ subroutine run_dress_slave(thread,iproc,energy) integer :: h,p,n,i_state logical :: ok + integer, allocatable :: int_buf(:) + double precision, allocatable :: double_buf(:) + integer(bit_kind), allocatable :: det_buf(:,:,:) + integer :: N_buf(3) + + + allocate(int_buf(N_dress_int_buffer)) + allocate(double_buf(N_dress_double_buffer)) + allocate(det_buf(N_int, 2, N_dress_det_buffer)) allocate(delta_ij_loc(N_states,N_det,2)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -55,10 +64,11 @@ subroutine run_dress_slave(thread,iproc,energy) if(task_id /= 0) then read (task,*) subset, i_generator delta_ij_loc = 0d0 + call generator_start(i_generator, iproc) call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator) + call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, task_id) + call push_dress_results(zmq_socket_push, i_generator, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) else exit end if @@ -69,23 +79,28 @@ subroutine run_dress_slave(thread,iproc,energy) end subroutine - BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ] -&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ] -&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ] - implicit none - - dress_int_buffer = 0 - dress_double_buffer = 0d0 - dress_det_buffer = 0_bit_kind -END_PROVIDER +! BEGIN_PROVIDER [ integer, dress_int_buffer, (N_dress_int_buffer) ] +!&BEGIN_PROVIDER [ double precision, dress_double_buffer, (N_dress_double_buffer) ] +!&BEGIN_PROVIDER [ integer(bit_kind), dress_det_buffer, (N_int, 2, N_dress_det_buffer) ] +! implicit none +! +! dress_int_buffer = 0 +! dress_double_buffer = 0d0 + ! dress_det_buffer = 0_bit_kind +!END_PROVIDER -subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) +!subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) +subroutine push_dress_results(zmq_socket_push, ind, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id) use f77_zmq implicit none integer(ZMQ_PTR), intent(in) :: zmq_socket_push double precision, intent(in) :: delta_loc(N_states, N_det, 2) + double precision, intent(in) :: double_buf(*) + integer, intent(in) :: int_buf(*) + integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) + integer, intent(in) :: N_buf(3) integer, intent(in) :: ind, task_id integer :: rc, i, j, felem @@ -115,28 +130,31 @@ subroutine push_dress_results(zmq_socket_push, ind, delta_loc, task_id) - rc = f77_zmq_send( zmq_socket_push, N_dress_int_buffer, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) + if(rc /= 4*3) stop "push5" - rc = f77_zmq_send( zmq_socket_push, dress_int_buffer, 4*N_dress_int_buffer, ZMQ_SNDMORE) - if(rc /= 4*N_dress_int_buffer) stop "push" + if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" + if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - rc = f77_zmq_send( zmq_socket_push, N_dress_double_buffer, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, dress_double_buffer, 8*N_dress_double_buffer, ZMQ_SNDMORE) - if(rc /= 8*N_dress_double_buffer) stop "push" - - rc = f77_zmq_send( zmq_socket_push, N_dress_det_buffer, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" + if(N_buf(1) > 0) then + rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) + if(rc /= 4*N_buf(1)) stop "push6" + end if - rc = f77_zmq_send( zmq_socket_push, dress_det_buffer, 2*N_int*bit_kind*N_dress_det_buffer, ZMQ_SNDMORE) - if(rc /= 2*N_int*bit_kind*N_dress_det_buffer) stop "push" - + if(N_buf(2) > 0) then + rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) + if(rc /= 8*N_buf(2)) stop "push8" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" + end if rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push" + if(rc /= 4) stop "push11" ! Activate is zmq_socket_push is a REQ IRP_IF ZMQ_PUSH @@ -164,49 +182,44 @@ subroutine pull_dress_results(zmq_socket_pull, ind, delta_loc, int_buf, double_b rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pulla" rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pullb" delta_loc(:,:felem,:) = 0d0 rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0) - if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + if(rc /= 8*N_states*(N_det+1-felem)) stop "pullc" rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0) - if(rc /= 8*N_states*(N_det+1-felem)) stop "pull" + if(rc /= 8*N_states*(N_det+1-felem)) stop "pulld" - rc = f77_zmq_recv( zmq_socket_pull, N_buf(1), 4, 0) - if(rc /= 4) stop "pull" + rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) + if(rc /= 4*3) stop "pull" if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - - - rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) - if(rc /= 4*N_buf(1)) stop "pull1" - - - rc = f77_zmq_recv( zmq_socket_pull, N_buf(2), 4, 0) - if(rc /= 4) stop "pull" if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - - rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) - if(rc /= 8*N_buf(2)) stop "pull2" - - - - - rc = f77_zmq_recv( zmq_socket_pull, N_buf(3), 4, 0) - if(rc /= 4) stop "pull" if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + - rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" - + if(N_buf(1) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) + if(rc /= 4*N_buf(1)) stop "pull1" + end if + + if(N_buf(2) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) + if(rc /= 8*N_buf(2)) stop "pull2" + end if + + if(N_buf(3) > 0) then + rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) + if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" + end if rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull" + if(rc /= 4) stop "pull4" ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH diff --git a/plugins/shiftedbk/shifted_bk.irp.f b/plugins/shiftedbk/shifted_bk.irp.f index 270eec17..897f39f0 100644 --- a/plugins/shiftedbk/shifted_bk.irp.f +++ b/plugins/shiftedbk/shifted_bk.irp.f @@ -11,7 +11,7 @@ program shifted_bk PROVIDE psi_bilinear_matrix_transp_order - call diagonalize_CI() + !call diagonalize_CI() call dress_zmq() end diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 9574584b..67f8424b 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -1,30 +1,21 @@ use selection_types - - BEGIN_PROVIDER [ integer, N_dress_int_buffer ] -&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] -&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] - implicit none - N_dress_int_buffer = 1 - N_dress_double_buffer = 1 - N_dress_det_buffer = 1 -END_PROVIDER - - BEGIN_PROVIDER [ double precision, fock_diag_tmp_, (2,mo_tot_num+1,Nproc) ] -&BEGIN_PROVIDER [ integer, current_generator_, (Nproc) ] +&BEGIN_PROVIDER [ integer, n_det_add ] &BEGIN_PROVIDER [ double precision, a_h_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ double precision, a_s2_i, (N_det, Nproc) ] &BEGIN_PROVIDER [ type(selection_buffer), sb, (Nproc) ] +&BEGIN_PROVIDER [ type(selection_buffer), global_sb ] +&BEGIN_PROVIDER [ type(selection_buffer), mini_sb ] &BEGIN_PROVIDER [ double precision, N_det_increase_factor ] implicit none integer :: i - integer :: n_det_add N_det_increase_factor = 1d0 - current_generator_(:) = 0 n_det_add = max(1, int(float(N_det) * N_det_increase_factor)) + call create_selection_buffer(n_det_add, n_det_add*2, global_sb) + call create_selection_buffer(n_det_add, n_det_add*2, mini_sb) do i=1,Nproc call create_selection_buffer(n_det_add, n_det_add*2, sb(i)) end do @@ -32,46 +23,82 @@ END_PROVIDER a_s2_i = 0d0 END_PROVIDER -subroutine generator_done(i_gen) + + BEGIN_PROVIDER [ integer, N_dress_int_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_double_buffer ] +&BEGIN_PROVIDER [ integer, N_dress_det_buffer ] implicit none - integer, intent(in) :: i_gen + N_dress_int_buffer = 1 + N_dress_double_buffer = n_det_add + N_dress_det_buffer = n_det_add +END_PROVIDER + + +subroutine generator_done(i_gen, int_buf, double_buf, det_buf, N_buf, iproc) + implicit none + integer, intent(in) :: i_gen, iproc + integer, intent(out) :: int_buf(N_dress_int_buffer), N_buf(3) + double precision, intent(out) :: double_buf(N_dress_double_buffer) + integer(bit_kind), intent(out) :: det_buf(N_int, 2, N_dress_det_buffer) + integer :: i - !dress_int_buffer = ... + call sort_selection_buffer(sb(iproc)) + det_buf(:,:,:sb(iproc)%cur) = sb(iproc)%det(:,:,:sb(iproc)%cur) + double_buf(:sb(iproc)%cur) = sb(iproc)%val(:sb(iproc)%cur) + if(sb(iproc)%cur > 0) then + !$OMP CRITICAL + call merge_selection_buffers(sb(iproc), mini_sb) + call sort_selection_buffer(mini_sb) + do i=1,Nproc + sb(i)%mini = min(sb(i)%mini, mini_sb%mini) + end do + !$OMP END CRITICAL + end if + N_buf(1) = 1 + N_buf(2) = sb(iproc)%cur + N_buf(3) = sb(iproc)%cur + sb(iproc)%cur = 0 end subroutine -subroutine dress_pulled(int_buf, double_buf, det_buf, N_buf) +subroutine generator_start(i_gen, iproc) + implicit none + integer, intent(in) :: i_gen, iproc + integer :: i + + call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) +end subroutine + + +subroutine dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) use bitmasks implicit none - integer, intent(in) :: N_buf(3) + integer, intent(in) :: ind, N_buf(3) integer, intent(in) :: int_buf(*) double precision, intent(in) :: double_buf(*) integer(bit_kind), intent(in) :: det_buf(N_int,2,*) - + integer :: i + + do i=1,N_buf(2) + call add_to_selection_buffer(global_sb, det_buf(1,1,i), double_buf(i)) + end do end subroutine subroutine delta_ij_done() use bitmasks implicit none - integer :: i, n_det_add, old_det_gen + integer :: i, old_det_gen integer(bit_kind), allocatable :: old_generators(:,:,:) allocate(old_generators(N_int, 2, N_det_generators)) old_generators(:,:,:) = psi_det_generators(:,:,:N_det_generators) old_det_gen = N_det_generators - call sort_selection_buffer(sb(1)) - do i=2,Nproc - call sort_selection_buffer(sb(i)) - call merge_selection_buffers(sb(i), sb(1)) - end do - - call sort_selection_buffer(sb(1)) - - call fill_H_apply_buffer_no_selection(sb(1)%cur,sb(1)%det,N_int,0) + call sort_selection_buffer(global_sb) + call fill_H_apply_buffer_no_selection(global_sb%cur,global_sb%det,N_int,0) call copy_H_apply_buffer_to_wf() if (s2_eig.or.(N_states > 1) ) then @@ -226,17 +253,13 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili - if(current_generator_(iproc) /= i_gen) then - current_generator_(iproc) = i_gen - call build_fock_tmp(fock_diag_tmp_(1,1,iproc),psi_det_generators(1,1,i_gen),N_int) - end if - haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, iproc) - - call add_to_selection_buffer(sb(iproc), alpha, contrib) - + + if(contrib < sb(iproc)%mini) then + call add_to_selection_buffer(sb(iproc), alpha, contrib) + end if end subroutine diff --git a/plugins/shiftedbk/shifted_bk_slave.irp.f b/plugins/shiftedbk/shifted_bk_slave.irp.f index d7812b97..db943a85 100644 --- a/plugins/shiftedbk/shifted_bk_slave.irp.f +++ b/plugins/shiftedbk/shifted_bk_slave.irp.f @@ -10,7 +10,7 @@ program shifted_bk PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_order - call diagonalize_CI() + !call diagonalize_CI() call dress_slave() end