From 3abccca5e35948e54a659cacccea42fbfcf4c296 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 3 Aug 2018 23:44:05 +0200 Subject: [PATCH 01/35] phasemask_bit --- src/Determinants/slater_rules.irp.f | 134 +++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 2 deletions(-) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index ee597720..5e3b41ae 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -167,8 +167,7 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) end select end - -subroutine get_double_excitation(det1,det2,exc,phase,Nint) +subroutine get_double_excitation_ref(det1,det2,exc,phase,Nint) use bitmasks implicit none BEGIN_DOC @@ -312,6 +311,137 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) end +subroutine get_phasemask_bit(det1, pm, Nint) + use bitmasks + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(out) :: pm(Nint,2) + integer(bit_kind) :: tmp + integer :: ispin, i + do ispin=1,2 + tmp = 0_8 + do i=1,Nint + pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32)) + pm(i,ispin) = xor(pm(i,ispin), tmp) + if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + end do + end do +end subroutine + + +subroutine get_double_excitation(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the two excitation operators between two doubly excited determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(in) :: det2(Nint,2) + integer, intent(out) :: exc(0:2,2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, ispin, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp, pm(Nint,2) + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + double precision :: refaz + logical :: ok + + ASSERT (Nint > 0) + + !do ispin=1,2 + !tmp = 0_8 + !do i=1,Nint + ! pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32)) + ! pm(i,ispin) = xor(pm(i,ispin), tmp) + ! if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + !end do + !end do + call get_phasemask_bit(det1, pm, Nint) + nperm = 0 + exc(0,1,1) = 0 + exc(0,2,1) = 0 + exc(0,1,2) = 0 + exc(0,2,2) = 0 + do ispin = 1,2 + idx_particle = 0 + idx_hole = 0 + ishift = 1-bit_kind_size + !par = 0 + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l,ispin) == det2(l,ispin)) then + cycle + endif + tmp = xor( det1(l,ispin), det2(l,ispin) ) + particle = iand(tmp, det2(l,ispin)) + hole = iand(tmp, det1(l,ispin)) + do while (particle /= 0_bit_kind) + tz = trailz(particle) + nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1) + idx_particle = idx_particle + 1 + exc(0,2,ispin) = exc(0,2,ispin) + 1 + exc(idx_particle,2,ispin) = tz+ishift + particle = iand(particle,particle-1_bit_kind) + enddo + if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)==2 + exit + endif + do while (hole /= 0_bit_kind) + tz = trailz(hole) + nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1) + idx_hole = idx_hole + 1 + exc(0,1,ispin) = exc(0,1,ispin) + 1 + exc(idx_hole,1,ispin) = tz+ishift + hole = iand(hole,hole-1_bit_kind) + enddo + if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin) + exit + endif + enddo + + select case (exc(0,1,ispin)) + case(0) + cycle + + case(1) + if(exc(1,1,ispin) < exc(1,2,ispin)) nperm = nperm+1 + + case (2) + a = exc(1,1,ispin) + b = exc(1,2,ispin) + c = exc(2,1,ispin) + d = exc(2,2,ispin) + + if(min(a,c) > max(b,d) .or. min(b,d) > max(a,c) .or. (a-b)*(c-d)<0) then + nperm = nperm + 1 + end if + exit + end select + + enddo + phase = phase_dble(iand(nperm,1)) + call get_double_excitation_ref(det1,det2,exc,refaz,Nint) + if(phase /= refaz) then + print *, "phase", phase, refaz, n, exc(0,1,1) + end if +end + subroutine get_mono_excitation(det1,det2,exc,phase,Nint) use bitmasks implicit none From 4b9b54e19ac7459589681e5ff7aa358dde9f5fd5 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 28 Aug 2018 10:24:38 +0200 Subject: [PATCH 02/35] removed test for phase_mask_bit --- src/Determinants/slater_rules.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 5e3b41ae..7df79f42 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -436,10 +436,10 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) enddo phase = phase_dble(iand(nperm,1)) - call get_double_excitation_ref(det1,det2,exc,refaz,Nint) - if(phase /= refaz) then - print *, "phase", phase, refaz, n, exc(0,1,1) - end if + !call get_double_excitation_ref(det1,det2,exc,refaz,Nint) + !if(phase == refaz) then + ! print *, "phase", phase, refaz, n, exc(0,1,1) + !end if end subroutine get_mono_excitation(det1,det2,exc,phase,Nint) From d78f64732a5493d7f10c7c80b564005e63a133fc Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 29 Aug 2018 11:30:19 +0200 Subject: [PATCH 03/35] pt2_stoch re-implemented --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 659 ++++++------------ plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 20 +- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 14 +- 4 files changed, 234 insertions(+), 461 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 73d71365..0b26ab33 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,8 +1,31 @@ -BEGIN_PROVIDER [ integer, fragment_first ] - implicit none - fragment_first = first_det_of_teeth(1) +BEGIN_PROVIDER [ integer, pt2_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic PT2 + END_DOC + pt2_stoch_istate = 1 END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] + implicit none + pt2_F(:) = 1 + pt2_F(:N_det_generators/100 + 1) = 1 + pt2_n_tasks_max = N_det_generators/100 + 1 + + if(N_det_generators < 256) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = 5 + pt2_N_teeth = 16 + end if +END_PROVIDER + + subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) use f77_zmq use selection_types @@ -11,22 +34,15 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) character(len=64000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull - type(selection_buffer) :: b integer, external :: omp_get_thread_num double precision, intent(in) :: relative_error, absolute_error, E(N_states) double precision, intent(out) :: pt2(N_states),error(N_states) - double precision, allocatable :: pt2_detail(:,:), comb(:) - logical, allocatable :: computed(:) - integer, allocatable :: tbc(:) - integer :: i, j, k, Ncomb, i_generator_end - integer, external :: pt2_find + integer :: i, j, k - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, external :: omp_get_wtime double precision :: state_average_weight_save(N_states), w(N_states) - double precision :: time integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket if (N_det < max(10,N_states)) then @@ -41,26 +57,9 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) state_average_weight(:) = 0.d0 state_average_weight(pt2_stoch_istate) = 1.d0 TOUCH state_average_weight - - allocate(pt2_detail(N_states,N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) - sumabove = 0d0 - sum2above = 0d0 - Nabove = 0d0 - - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight psi_selectors - - computed = .false. - - tbc(0) = first_det_of_comb - 1 - do i=1, tbc(0) - tbc(i) = i - computed(i) = .true. - end do - - Ncomb=size(comb) - call get_carlo_workbatch(computed, comb, Ncomb, tbc) - pt2_detail = 0d0 + provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors + print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' print *, '========== ================= ================= =================' @@ -83,41 +82,18 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then stop 'Unable to put energy on ZMQ server' endif - call create_selection_buffer(1, 1*2, b) - integer :: ipos - ipos=1 - integer, external :: add_task_to_taskserver - do i=1,tbc(0) - if(tbc(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 + + do i=1,N_det_generators + do j=1,pt2_F(i) !!!!!!!!!!!! + write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + stop 'Unable to add task to task server' endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if + end do end do - if (ipos > 1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - endif integer, external :: zmq_set_running if (zmq_set_running(zmq_to_qp_run_socket) == -1) then @@ -129,18 +105,16 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call pt2_collector(zmq_socket_pull,E(pt2_stoch_istate), b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, w, error) + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, absolute_error, w, error) pt2(pt2_stoch_istate) = w(pt2_stoch_istate) else call pt2_slave_inproc(i) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - call delete_selection_buffer(b) print *, '========== ================= ================= =================' - deallocate(pt2_detail, comb, computed, tbc) enddo FREE pt2_stoch_istate state_average_weight(:) = state_average_weight_save(:) @@ -153,34 +127,6 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) end subroutine -subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) - integer, intent(in) :: tbc(0:size_tbc), Ncomb - logical, intent(in) :: computed(N_det_generators) - double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states,N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - integer :: i, dets(comb_teeth) - double precision :: myVal, myVal2 - - mainLoop : do i=1,Ncomb - call get_comb(comb(i), dets, comb_teeth) - do j=1,comb_teeth - if(.not.(computed(dets(j)))) then - exit mainLoop - end if - end do - - myVal = 0d0 - myVal2 = 0d0 - do j=comb_teeth,1,-1 - myVal += pt2_detail(pt2_stoch_istate,dets(j)) * pt2_weight_inv(dets(j)) * comb_step - sumabove(j) += myVal - sum2above(j) += myVal*myVal - Nabove(j) += 1 - end do - end do mainLoop -end subroutine - - subroutine pt2_slave_inproc(i) implicit none integer, intent(in) :: i @@ -188,197 +134,118 @@ subroutine pt2_slave_inproc(i) call run_pt2_slave(1,i,pt2_e0_denominator) end -subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, pt2,error) + +subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2, error) use f77_zmq use selection_types use bitmasks implicit none - integer, intent(in) :: Ncomb integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) - double precision, intent(in) :: comb(Ncomb), relative_error, absolute_error, E - logical, intent(inout) :: computed(N_det_generators) - integer, intent(in) :: tbc(0:size_tbc) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - double precision, intent(out) :: pt2(N_states),error(N_states) + double precision, intent(in) :: relative_error, absolute_error, E + double precision, intent(out) :: pt2(N_states), error(N_states) - type(selection_buffer), intent(inout) :: b - double precision, allocatable :: pt2_mwen(:,:) + double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, external :: zmq_delete_tasks + integer, external :: pt2_find_sample - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, n_tasks - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) + integer :: more, n, i, p, c, t, n_tasks, U integer, allocatable :: task_id(:) integer, allocatable :: index(:) - double precision :: time0 - double precision :: time, timeLast, Nabove_old + double precision, external :: omp_get_wtime - integer :: tooth, firstTBDcomb, orgTBDcomb, n_tasks_max - integer, allocatable :: parts_to_get(:) - logical, allocatable :: actually_computed(:) - double precision :: eqt - character*(512) :: task - Nabove_old = -1.d0 - n_tasks_max = N_det_generators/100+1 + double precision :: v, x, avg, eqt, E0 + double precision :: time, time0 - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & - pt2_mwen(N_states, n_tasks_max) ) - - pt2_mwen(1:N_states, 1:n_tasks_max) = 0.d0 - do i=1,N_det_generators - actually_computed(i) = computed(i) - enddo - - parts_to_get(:) = 1 - if(fragment_first > 0) then - do i=1,fragment_first - parts_to_get(i) = fragment_count - enddo - endif - - do i=1,tbc(0) - actually_computed(tbc(i)) = .false. - end do - - orgTBDcomb = int(Nabove(1)) - firstTBDcomb = 1 + integer, allocatable :: f(:) + logical, allocatable :: d(:) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(n_tasks_max), index(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) + allocate(d(N_det_generators+1)) + allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + + S(:) = 0d0 + S2(:) = 0d0 + n = 1 + t = 0 + U = 0 + eI(:,:) = 0d0 + f(:) = pt2_F(:) + d(:) = .false. + n_tasks = 0 + E0 = E more = 1 - call wall_time(time0) - timeLast = time0 + time0 = omp_get_wtime() - call get_first_tooth(actually_computed, tooth) - Nabove_old = Nabove(tooth) - - logical :: loop - loop = .True. - pullLoop : do while (loop) - - call pull_pt2_results(zmq_socket_pull, index, pt2_mwen, task_id, n_tasks) - do i=1,n_tasks - pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i) - parts_to_get(index(i)) -= 1 - if(parts_to_get(index(i)) < 0) then - print *, i, index(i), parts_to_get(index(i)) - print *, parts_to_get - stop "PARTS ??" - end if - if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. - enddo - - integer, external :: zmq_delete_tasks - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then - stop 'Unable to delete tasks' - endif - if (more == 0) then - loop = .False. - endif - - time = omp_get_wtime() - - if(time - timeLast > 10d0 .or. (.not.loop)) then - timeLast = time - do i=1, first_det_of_teeth(1)-1 - if(.not.(actually_computed(i))) then - cycle pullLoop + do while (n <= N_det_generators) + if(f(pt2_J(n)) == 0) then + d(pt2_J(n)) = .true. + do while(d(U+1)) + U += 1 + end do + do while(t <= pt2_N_teeth) + if(U >= pt2_n_0(t+1)) then + t=t+1 + E0 = E + do i=pt2_n_0(t),1,-1 + E0 += eI(pt2_stoch_istate, i) + end do + else + exit end if end do - - integer, external :: zmq_abort - double precision :: E0, avg, prop - - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - call get_first_tooth(actually_computed, tooth) - - if (firstTBDcomb > Ncomb) then - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (1)' - endif - endif - exit pullLoop - endif - - !if(Nabove(1) < 5d0) cycle - - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - if (tooth <= comb_teeth) then - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - else - eqt = 0.d0 - endif - call wall_time(time) - if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 30) then - ! Termination - pt2(pt2_stoch_istate) = avg - error(pt2_stoch_istate) = eqt - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' - endif - endif - else - if (Nabove(tooth) > Nabove_old) then - print *, loop - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - Nabove_old = Nabove(tooth) - endif - endif - end if - end do pullLoop -!<<<<<<< HEAD - if(tooth == comb_teeth+1) then - pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:)) - error(pt2_stoch_istate) = 0d0 - else - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) - error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - end if - -!======= -! -! E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) -! prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) -! prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) -! E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop -! pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) -! -!>>>>>>> master + c = pt2_R(n) + if(c /= 0) then + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = pt2_find_sample(v, pt2_cW) + x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do + avg = S(t) / dble(c) + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + pt2(pt2_stoch_istate) = E0-E+avg + error(pt2_stoch_istate) = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' + end if + n += 1 + else if(more == 0) then + exit + else + call pull_pt2_results(zmq_socket_pull, index, eI_task, task_id, n_tasks) + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' + endif + do i=1,n_tasks + eI(:, index(i)) += eI_task(:, i) + f(index(i)) -= 1 + end do + end if + end do + print *, "TOTAL", sum(eI) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call sort_selection_buffer(b) end subroutine -integer function pt2_find(v, w, sze, imin, imax) + +integer function pt2_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) + double precision, intent(in) :: v, w(0:N_det_generators) integer :: i,l,h integer, parameter :: block=64 - l = imin - h = imax-1 + l = 0 + h = N_det_generators do while(h-l >= block) i = ishft(h+l,-1) @@ -389,217 +256,131 @@ integer function pt2_find(v, w, sze, imin, imax) end if end do !DIR$ LOOP COUNT (64) - do pt2_find=l,h - if(w(pt2_find) >= v) then + do pt2_find_sample=l,h + if(w(pt2_find_sample) >= v) then exit end if end do end function -BEGIN_PROVIDER [ integer, comb_teeth ] + + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] implicit none - comb_teeth = 100 + integer :: N_c, N_j, U, t, i + double precision :: v + logical, allocatable :: d(:) + integer, external :: pt2_find_sample + + allocate(d(N_det_generators)) + + pt2_R(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + + do i=1,N_j + d(i) = .true. + pt2_J(i) = i + end do + + call RANDOM_NUMBER(pt2_u) + + U = 0 + + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = pt2_find_sample(v, pt2_cW) + if(.not. d(i)) then + N_j += 1 + pt2_J(N_j) = i + d(i) = .true. + end if + end do + + pt2_R(N_j) = N_c + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J(N_j) = U + d(U) = .true. + exit; + end if + end do + enddo + if(N_det_generators > 1) then + pt2_R(N_det_generators-1) = 0 + pt2_R(N_det_generators) = N_c + end if END_PROVIDER - -subroutine get_first_tooth(computed, first_teeth) + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - logical, intent(in) :: computed(N_det_generators) - integer, intent(out) :: first_teeth - integer :: i, first_det + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: pt2_find_sample - first_det = N_det_generators+1+1 - first_teeth = 1 - do i=first_det_of_comb, N_det_generators - if(.not.(computed(i))) then - first_det = i + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - end do - - do i=comb_teeth+1, 1, -1 - if(first_det_of_teeth(i) < first_det) then - first_teeth = i - exit + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" end if end do - -end subroutine - - -BEGIN_PROVIDER [ integer*8, size_tbc ] - implicit none - BEGIN_DOC -! Size of the tbc array - END_DOC - size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first -END_PROVIDER - -subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) - implicit none - integer, intent(inout) :: Ncomb - double precision, intent(out) :: comb(Ncomb) - integer, intent(inout) :: tbc(0:size_tbc) - logical, intent(inout) :: computed(N_det_generators) - integer :: i, j, last_full, dets(comb_teeth) - integer :: icount, n - integer :: k, l - l=first_det_of_comb - call RANDOM_NUMBER(comb) - do i=1,size(comb) - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) - Ncomb = i - if (tbc(0) == N_det_generators) return - do while (computed(l)) - l=l+1 - enddo - k=tbc(0)+1 - tbc(k) = l - computed(l) = .True. - tbc(0) = k - enddo - -end subroutine - - - -subroutine get_comb(stato, dets, ct) - implicit none - integer, intent(in) :: ct - double precision, intent(in) :: stato - integer, intent(out) :: dets(ct) - double precision :: curs - integer :: j - integer, external :: pt2_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = pt2_find_sample(r, tilde_cW) end do -end subroutine - - -subroutine add_comb(comb, computed, tbc, stbc, ct) - implicit none - integer*8, intent(in) :: stbc - integer, intent(in) :: ct - double precision, intent(in) :: comb - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(0:stbc) - integer :: i, k, l, dets(ct) - - !DIR$ FORCEINLINE - call get_comb(comb, dets, ct) - - k=tbc(0)+1 - do i = 1, ct - l = dets(i) - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - tbc(0) = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, pt2_stoch_istate ] - implicit none - BEGIN_DOC - ! State for stochatsic PT2 - END_DOC - pt2_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: pt2_find - - pt2_weight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 - pt2_cweight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 + pt2_cW(0) = 0d0 do i=1,N_det_generators - pt2_weight(i) = psi_coef_generators(i,pt2_stoch_istate)**2 - enddo - - ! Important to loop backwards for numerical precision - pt2_cweight(N_det_generators) = pt2_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - pt2_cweight(i) = pt2_weight(i) + pt2_cweight(i+1) + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do - - do i=1,N_det_generators - pt2_weight(i) = pt2_weight(i) / pt2_cweight(1) - pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(1) - enddo - - do i=1,N_det_generators-1 - pt2_cweight(i) = 1.d0 - pt2_cweight(i+1) - end do - pt2_cweight(N_det_generators) = 1.d0 - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - first_det_of_comb = 1 - do i=1,N_det_generators - if(pt2_weight(i)/norm_left < .5d0*comb_step) then - first_det_of_comb = i - exit - end if - norm_left -= pt2_weight(i) - end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - stato -= comb_step - end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ] - implicit none - BEGIN_DOC -! Inverse of pt2_weight array - END_DOC - integer :: i - do i=1,N_det_generators - pt2_weight_inv(i) = 1.d0/pt2_weight(i) - enddo - + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 88c8aacb..6808e553 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -22,12 +22,11 @@ subroutine run_pt2_slave(thread,iproc,energy) logical :: done double precision,allocatable :: pt2(:,:) - integer :: n_tasks, k, n_tasks_max + integer :: n_tasks, k integer, allocatable :: i_generator(:), subset(:) - n_tasks_max = N_det_generators/100+1 - allocate(task_id(n_tasks_max), task(n_tasks_max)) - allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -46,7 +45,7 @@ subroutine run_pt2_slave(thread,iproc,energy) done = .False. do while (.not.done) - n_tasks = min(n_tasks+1,n_tasks_max) + n_tasks = min(n_tasks+1,pt2_n_tasks_max) integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then @@ -63,7 +62,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 - call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k)) + call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k))) enddo integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then @@ -190,12 +189,5 @@ IRP_ENDIF end subroutine - -BEGIN_PROVIDER [ double precision, pt2_workload, (N_det_generators) ] - integer :: i - do i=1,N_det_generators - pt2_workload(i) = dfloat(N_det_generators - i + 1)**2 - end do - pt2_workload = pt2_workload / sum(pt2_workload) -END_PROVIDER + diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 464f0a9f..c6f0fbd3 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -60,7 +60,7 @@ subroutine run_selection_slave(thread,iproc,energy) else ASSERT (N == buf%N) end if - call select_connected(i_generator,energy,pt2,buf,0) + call select_connected(i_generator,energy,pt2,buf,1,1) endif integer, external :: task_done_to_taskserver diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 2463b762..79ed1746 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -46,11 +46,11 @@ subroutine get_mask_phase(det, phasemask) end subroutine -subroutine select_connected(i_generator,E0,pt2,b,subset) +subroutine select_connected(i_generator,E0,pt2,b,subset,csubset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset type(selection_buffer), intent(inout) :: b double precision, intent(inout) :: pt2(N_states) integer :: k,l @@ -71,7 +71,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset) + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset,csubset) enddo deallocate(fock_diag_tmp) end subroutine @@ -254,7 +254,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) end -subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -262,7 +262,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d ! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_tot_num) double precision, intent(in) :: E0(N_states) @@ -286,7 +286,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer(bit_kind), allocatable:: preinteresting_det(:,:,:) allocate (preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count + !PROVIDE fragment_count monoAdo = .true. monoBdo = .true. @@ -559,7 +559,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle From ad69f39f99d0b0dd73f556fb13d1d55337c5b066 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 29 Aug 2018 20:54:58 +0200 Subject: [PATCH 04/35] dress_zmq re-implemented --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- plugins/dress_zmq/alpha_factory.irp.f | 13 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 887 +++++++------------ plugins/dress_zmq/run_dress_slave.irp.f | 418 ++++----- plugins/shiftedbk/NEEDED_CHILDREN_MODULES | 2 +- plugins/shiftedbk/shifted_bk_routines.irp.f | 3 +- 6 files changed, 506 insertions(+), 819 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 0b26ab33..31598181 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -233,7 +233,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2 end do end if end do - print *, "TOTAL", sum(eI) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index 261966be..f2902afb 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -2,10 +2,10 @@ use bitmasks -subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) +subroutine alpha_callback(delta_ij_loc, i_generator, subset, csubset, iproc) use bitmasks implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) integer, intent(in) :: iproc @@ -15,7 +15,7 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) do l=1,N_generators_bitmask - call generate_singles_and_doubles(delta_ij_loc, i_generator,l,subset,iproc) + call generate_singles_and_doubles(delta_ij_loc,i_generator,l,subset,csubset,iproc) enddo end subroutine @@ -34,7 +34,7 @@ BEGIN_PROVIDER [ integer, psi_from_sorted_gen, (N_det) ] END_PROVIDER -subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, iproc) +subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, csubset, iproc) use bitmasks implicit none BEGIN_DOC @@ -42,7 +42,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index END_DOC double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) - integer, intent(in) :: i_generator, subset, bitmask_index + integer, intent(in) :: i_generator, subset, csubset, bitmask_index integer, intent(in) :: iproc @@ -69,7 +69,6 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate(abuf(N_det*6), labuf(N_det)) allocate(preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count monoAdo = .true. @@ -345,7 +344,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 8844e064..9f112b75 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -1,17 +1,182 @@ -BEGIN_PROVIDER [ integer, fragment_first ] +BEGIN_PROVIDER [ integer, dress_stoch_istate ] implicit none - fragment_first = first_det_of_teeth(1) + dress_stoch_istate = 1 +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] + implicit none + pt2_F(:) = 1 + pt2_F(:N_det_generators/100 + 1) = 1 + pt2_n_tasks_max = N_det_generators/100 + 1 + + if(N_det_generators < 256) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = 5 + pt2_N_teeth = 16 + end if END_PROVIDER -subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) + +BEGIN_PROVIDER [ integer, dress_N_cp_max ] + dress_N_cp_max = 100 +END_PROVIDER + +BEGIN_PROVIDER[ integer, dress_M_m, (dress_N_cp_max)] + implicit none + integer :: i + + do i=1,dress_N_cp_max-1 + dress_M_m(i) = N_det_generators * i / (dress_N_cp_max+1) + end do + dress_M_m(1) = 1 + dress_M_m(dress_N_cp_max) = N_det_generators+1 +END_PROVIDER + + + + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, dress_R, (0:N_det_generators)] +&BEGIN_PROVIDER[ integer, dress_R1, (0:N_det_generators)] +&BEGIN_PROVIDER[ double precision, dress_M_mi, (dress_N_cp_max, N_det_generators+1)] +&BEGIN_PROVIDER [ integer, dress_P, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, dress_T, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, dress_N_cp ] + implicit none + integer :: N_c, N_j, U, t, i, m + double precision :: v + double precision, allocatable :: tilde_M(:) + logical, allocatable :: d(:) + integer, external :: dress_find_sample + + allocate(d(N_det_generators), tilde_M(N_det_generators)) + + dress_M_mi = 0d0 + tilde_M = 0d0 + dress_R(:) = 0 + dress_R1(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + + do i=1,N_j + d(i) = .true. + pt2_J(i) = i + end do + call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) + call RANDOM_NUMBER(pt2_u) + call RANDOM_NUMBER(pt2_u) + + U = 0 + + m = 1 + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = dress_find_sample(v, pt2_cW) + tilde_M(i) += 1d0 + if(.not. d(i)) then + N_j += 1 + pt2_J(N_j) = i + d(i) = .true. + end if + end do + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J(N_j) = U + d(U) = .true. + exit; + end if + end do + + if(N_c == dress_M_m(m)) then + dress_R1(m) = N_j + dress_R(N_j) = N_c + dress_M_mi(m, :N_det_generators) = tilde_M(:) + m += 1 + end if + enddo + + dress_N_cp = m-1 + dress_R1(dress_N_cp) = N_j + + !!!!!!!!!!!!!! + do m=1,dress_N_cp + do i=dress_R1(m-1)+1, dress_R1(m) + dress_P(pt2_J(i)) = m + end do + end do + + do i=1, pt2_n_0(1) + dress_T(i) = 0 + end do + + do t=2,pt2_N_teeth+1 + do i=pt2_n_0(t-1)+1, pt2_n_0(t) + dress_T(i) = t-1 + end do + end do + !!!!!!!!!!!!! + +END_PROVIDER + + +! BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] +!&BEGIN_PROVIDER [integer, dress_dot_t, (0:dress_N_cp)] +!&BEGIN_PROVIDER [integer, dress_dot_n_0, (0:dress_N_cp)] +! implicit none +! dress_e(:,:) = 1d0 +! dress_dot_t(:) = 0 +! dress_dot_n_0(:) = 0 +! +! integer :: U, m, t, i +! +! U = pt2_n_0(1)+1 + ! +! do m=1,dress_N_cp +! do while(dress_M_mi(m, U) /= 0d0) +! U = U+1 +! end do + ! dress_dot_t(m) = pt2_N_teeth + 1 + ! dress_dot_n_0(m) = N_det_generators + !! + ! do t = 2, pt2_N_teeth+1 + ! if(U <= pt2_n_0(t)) then + ! dress_dot_t(m) = t-1 +! dress_dot_n_0(m) = pt2_n_0(t-1) +! exit +! end if +! end do +! do t=dress_dot_t(m), pt2_N_teeth +! do i=pt2_n_0(t)+1, pt2_n_0(t+1) +! dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) + ! end do + ! end do + ! end do +! do m=dress_N_cp, 2, -1 +! dress_e(:,m) -= dress_e(:,m-1) +! end do +!END_PROVIDER + + +subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) use f77_zmq implicit none - integer, intent(in) :: lndet character(len=64000) :: task - character(len=3200) :: temp integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -24,12 +189,9 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) integer :: i, j, k, Ncp - double precision, external :: omp_get_wtime - double precision :: time integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) task(:) = CHAR(0) - temp(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) do dress_stoch_istate=1,N_states @@ -39,7 +201,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) TOUCH state_average_weight !provide psi_coef_generators - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral dress_weight psi_selectors + provide nproc mo_bielec_integrals_in_map mo_mono_elec_integral psi_selectors !print *, dress_e0_denominator print *, '========== ================= ================= =================' @@ -75,59 +237,15 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket - integer :: ipos, sz - integer :: block(1), block_i, cur_tooth_reduce, ntas - logical :: flushme - block = 0 - block_i = 0 - cur_tooth_reduce = 0 - ipos=1 - ntas = 0 - do i=1,N_dress_jobs+1 - flushme = (i==N_dress_jobs+1 .or. block_i == size(block) .or. block_i >=cur_tooth_reduce ) - if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) - - if(flushme .and. block_i > 0) then - if(block(1) > fragment_first) then - ntas += 1 - write(temp, '(I9,1X,60(I9,1X))') 0, block(:block_i) - sz = len(trim(temp))+1 - temp(sz:sz) = '|' - !write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) - write(task(ipos:ipos+sz), *) temp(:sz) - !ipos += 20 - ipos += sz+1 - if (ipos > 63000 .or. i==N_dress_jobs+1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - - ipos=1 - endif - else - if(block_i /= 1) stop "reduced fragmented dets" - do j=1,fragment_count - ntas += 1 - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, block(1) - ipos += 20 - if (ipos > 63000 .or. i==N_dress_jobs+1) then - ntas += 1 - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if - block_i = 0 - block = 0 - end if - - if(i /= N_dress_jobs+1) then - cur_tooth_reduce = tooth_reduce(dress_jobs(i)) - block_i += 1 - block(block_i) = dress_jobs(i) - end if + + + do i=1,N_det_generators + do j=1,pt2_F(i) !!!!!!!!!!!! + write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + stop 'Unable to add task to task server' + endif + end do end do if (zmq_set_running(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Failed in zmq_set_running' @@ -164,6 +282,54 @@ subroutine dress_slave_inproc(i) call run_dress_slave(1,i,dress_e0_denominator) end + BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_t, (0:dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_n_0, (0:dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_F, (dress_N_cp)] + implicit none + + logical, allocatable :: d(:) + integer :: U, m, t, i + + allocate(d(N_det_generators+1)) + + dress_e(:,:) = 1d0 + dress_dot_t(:) = 0 + dress_dot_n_0(:) = 0 + dress_dot_F = 0 + d(:) = .false. + U=0 + + do m=1,dress_N_cp + do i=dress_R1(m-1)+1,dress_R1(m) + dress_dot_F(m) += pt2_F(pt2_J(i)) + d(pt2_J(i)) = .true. + end do + + do while(d(U+1)) + U += 1 + end do + + dress_dot_t(m) = pt2_N_teeth + 1 + dress_dot_n_0(m) = N_det_generators + + do t = 2, pt2_N_teeth+1 + if(U < pt2_n_0(t)) then + dress_dot_t(m) = t-1 + dress_dot_n_0(m) = pt2_n_0(t-1) + exit + end if + end do + do t=dress_dot_t(m), pt2_N_teeth + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) + end do + end do + end do + do m=dress_N_cp, 2, -1 + dress_e(:,m) -= dress_e(:,m-1) + end do +END_PROVIDER subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, dress, istate) @@ -181,147 +347,102 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:) - double precision, allocatable :: dress_detail(:,:) - double precision :: dress_mwen(N_states) + double precision, allocatable :: breve_delta_m(:,:,:), S(:), S2(:) + double precision, allocatable :: edI(:,:), edI_task(:,:) + integer, allocatable :: edI_index(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_pull_socket - integer :: more - integer :: i, j, k, i_state, N - integer :: task_id, ind - double precision, save :: time0 = -1.d0 - double precision :: time + integer :: i, c, j, k, f, t, m, p, m_task + integer :: task_id, n_tasks + double precision :: E0, error, x, v, time, time0 + double precision :: avg, eqt double precision, external :: omp_get_wtime - integer :: cur_cp, last_cp - integer :: delta_loc_cur, is, N_buf(3) - integer, allocatable :: int_buf(:), agreg_for_cp(:) - double precision, allocatable :: double_buf(:) - integer(bit_kind), allocatable :: det_buf(:,:,:) - integer, external :: zmq_delete_tasks - last_cp = 10000000 - allocate(agreg_for_cp(N_cp)) - agreg_for_cp = 0 - allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) - delta_loc_cur = 1 + integer, allocatable :: dot_f(:) + integer, external :: zmq_delete_tasks, dress_find_sample delta = 0d0 delta_s2 = 0d0 - allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) - allocate(delta_loc(N_states, N_det, 2)) - dress_detail = -1000d0 + allocate(cp(N_states, N_det, dress_N_cp, 2), edI(N_states, N_det)) + allocate(edI_task(N_states, N_det), edI_index(N_det)) + allocate(breve_delta_m(N_states, N_det, 2)) + allocate(dot_f(dress_N_cp)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + edI = -100000d0 + cp = 0d0 - character*(512) :: task + dot_f(:) = dress_dot_F(:) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() more = 1 - if (time0 < 0.d0) then - call wall_time(time0) - endif - logical :: loop, floop - - floop = .true. - loop = .true. - - pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) - !print *, cur_cp, ind - if(floop) then - call wall_time(time) - print *, "first_pull", time-time0 - time0 = time - floop = .false. - end if - if(cur_cp == -1 .and. ind == N_det_generators) then - call wall_time(time) - end if - - - if(cur_cp == -1) then - call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then - stop 'Unable to delete tasks' - endif - if(more == 0) loop = .false. !stop 'loop = .false.' !!!!!!!!!!!!!!!! - dress_detail(:, ind) = dress_mwen(:) - !print *, "DETAIL", ind, dress_mwen - else if(cur_cp > 0) then - if(ind == 0) cycle - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,N_det - cp(:,i,cur_cp,1) += delta_loc(:,i,1) + m = 1 + c = 0 + S(:) = 0d0 + S2(:) = 0d0 + time0 = omp_get_wtime() + do while (m <= dress_N_cp) + if(dot_f(m) == 0) then + E0 = 0 + do i=dress_dot_n_0(m),1,-1 + E0 += edI(dress_stoch_istate, i) end do - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,N_det - cp(:,i,cur_cp,2) += delta_loc(:,i,2) + do while(c < dress_M_m(m)) + c = c+1 + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = dress_find_sample(v, pt2_cW) + x += edI(dress_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do end do - !$OMP END PARALLEL DO - agreg_for_cp(cur_cp) += ind - !print *, agreg_for_cp(cur_cp), ind, needed_by_cp(cur_cp), cur_cp - if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then - stop "too much results..." - end if - if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle - - call wall_time(time) + t = dress_dot_t(m) + avg = S(t) / dble(c) + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + error = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' + !end do + m += 1 + else + task_id = 0 + do + call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + if(task_id == 0) exit + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' + endif + end do + do i=1,n_tasks + edI(:, edI_index(i)) = edI_task(:, i) !!!!!!!!!!!!!!! += !!!!! + end do + cp(:,:,m_task,1) += breve_delta_m(:,:,1) + cp(:,:,m_task,2) += breve_delta_m(:,:,2) - last_cp = cur_cp - double precision :: su, su2, eqt, avg, E0, val - integer, external :: zmq_abort - - su = 0d0 - su2 = 0d0 - !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i, val) SHARED(comb, dress_detail, & - !$OMP cur_cp,istate,cps_N) REDUCTION(+:su) REDUCTION(+:su2) - do i=1, int(cps_N(cur_cp)) - call get_comb_val(comb(i), dress_detail, cur_cp, val, istate) - su += val - su2 += val*val - end do - !$OMP END PARALLEL DO - - avg = su / cps_N(cur_cp) - eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) ) - E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) - if(cp_first_tooth(cur_cp) <= comb_teeth) then - E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) - end if - - !print '(I2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', cps_N(cur_cp), avg+E0+E(istate), eqt, time-time0, '' - if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30)) then - ! Termination - print *, "TERMINATE" - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' - endif - endif - endif + dot_f(m_task) -= f end if - end do pullLoop + end do - delta(:,:) = cp(:,:,last_cp,1) - delta_s2(:,:) = cp(:,:,last_cp,2) + delta(:,:) = cp(:,:,m-1,1) + delta_s2(:,:) = cp(:,:,m-1,2) dress(istate) = E(istate)+E0+avg call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine -integer function dress_find(v, w, sze, imin, imax) +integer function dress_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) + double precision, intent(in) :: v, w(0:N_det_generators) integer :: i,l,h integer, parameter :: block=64 - l = imin - h = imax-1 + l = 0 + h = N_det_generators do while(h-l >= block) i = ishft(h+l,-1) @@ -332,401 +453,73 @@ integer function dress_find(v, w, sze, imin, imax) end if end do !DIR$ LOOP COUNT (64) - do dress_find=l,h - if(w(dress_find) >= v) then + do dress_find_sample=l,h + if(w(dress_find_sample) >= v) then exit end if end do end function - BEGIN_PROVIDER [ integer, gen_per_cp ] -&BEGIN_PROVIDER [ integer, comb_teeth ] -&BEGIN_PROVIDER [ integer, N_cps_max ] + + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - BEGIN_DOC -! N_cps_max : max number of checkpoints -! -! gen_per_cp : number of generators per checkpoint - END_DOC - comb_teeth = 64 - N_cps_max = 16 - gen_per_cp = (N_det_generators / N_cps_max) + 1 -END_PROVIDER + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: dress_find_sample - - BEGIN_PROVIDER [ integer, N_cp ] -&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] -&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] -&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ] -&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] -&BEGIN_PROVIDER [ integer, N_dress_jobs ] -&BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, tooth_reduce, (N_det_generators) ] - implicit none - logical, allocatable :: computed(:), comp_filler(:) - integer :: i, j, last_full, dets(comb_teeth) + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) - integer :: k, l, cur_cp, under_det(comb_teeth+1) - integer :: cp_limit(N_cps_max) - integer, allocatable :: iorder(:), first_cp(:) - integer, allocatable :: filler(:) - integer :: nfiller, lfiller, cfiller - logical :: fracted - - integer :: first_suspect - provide psi_coef_generators - first_suspect = 1 - - allocate(filler(n_det_generators)) - allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) - allocate(computed(N_det_generators)) - allocate(comp_filler(N_det_generators)) - first_cp = 1 - cps = 0d0 - cur_cp = 1 - done_cp_at = 0 - done_cp_at_det = 0 - needed_by_cp = 0 - comp_filler = .false. - computed = .false. - cps_N = 1d0 - tooth_reduce = 0 + tilde_cW(0) = 0d0 - integer :: fragsize - fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) - - do i=1,N_cps_max - cp_limit(i) = fragsize * i * (i+1) / 2 - end do - cp_limit(N_cps_max) = N_det*2 - - N_dress_jobs = first_det_of_comb - 1 - do i=1, N_dress_jobs - dress_jobs(i) = i - computed(i) = .true. - end do - - l=first_det_of_comb - call random_seed(put=(/321,654,65,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) - call RANDOM_NUMBER(comb) - lfiller = 1 - nfiller = 1 do i=1,N_det_generators - !print *, i, N_dress_jobs - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) - - !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then - if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then - first_cp(cur_cp+1) = N_dress_jobs - done_cp_at(N_dress_jobs) = cur_cp - cps_N(cur_cp) = dfloat(i) - if(N_dress_jobs /= N_det_generators) then - cps(:, cur_cp+1) = cps(:, cur_cp) - cur_cp += 1 - end if - - if (N_dress_jobs == N_det_generators) then - exit - end if - end if - - !!!!!!!!!!!!!!!!!!!!!!!! - if(.TRUE.) then - do l=first_suspect,N_det_generators - if((.not. computed(l))) then - N_dress_jobs+=1 - dress_jobs(N_dress_jobs) = l - computed(l) = .true. - first_suspect = l - exit - end if - end do - - if (N_dress_jobs == N_det_generators) exit - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ELSE - !!!!!!!!!!!!!!!!!!!!!!!!!!!! - do l=first_suspect,N_det_generators - if((.not. computed(l)) .and. (.not. comp_filler(l))) exit - end do - first_suspect = l - if(l > N_det_generators) cycle - - cfiller = tooth_of_det(l)-1 - if(cfiller > lfiller) then - do j=1,nfiller-1 - if(.not. computed(filler(j))) then - k=N_dress_jobs+1 - dress_jobs(k) = filler(j) - N_dress_jobs = k - end if - computed(filler(j)) = .true. - end do - nfiller = 2 - filler(1) = l - lfiller = cfiller - else - filler(nfiller) = l - nfiller += 1 - end if - comp_filler(l) = .True. - end if - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + tilde_w(i) = psi_coef_generators(i,dress_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) enddo - - do j=1,nfiller-1 - if(.not. computed(filler(j)))then - k=N_dress_jobs+1 - dress_jobs(k) = filler(j) - N_dress_jobs = k - end if - computed(filler(j)) = .true. - end do - - - N_cp = cur_cp - - if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then - print *, N_dress_jobs, N_det_generators, N_cp, N_cps_max - stop "error in jobs creation" - end if - - cur_cp = 0 - do i=1,N_dress_jobs - if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) - done_cp_at(i) = cur_cp - done_cp_at_det(dress_jobs(i)) = cur_cp - needed_by_cp(cur_cp) += 1 - end do - - - under_det = 0 - cp_first_tooth = 0 - do i=1,N_dress_jobs - do j=comb_teeth+1,1,-1 - if(dress_jobs(i) <= first_det_of_teeth(j)) then - under_det(j) = under_det(j) + 1 - if(under_det(j) == first_det_of_teeth(j))then - do l=done_cp_at(i)+1, N_cp - cps(:first_det_of_teeth(j)-1, l) = 0d0 - cp_first_tooth(l) = j - end do - cps(first_det_of_teeth(j), done_cp_at(i)+1) = & - cps(first_det_of_teeth(j), done_cp_at(i)+1) * fractage(j) - end if - else - exit - end if - end do - end do - cp_first_tooth(N_cp) = comb_teeth+1 - - do i=1,N_det_generators - do j=N_cp,2,-1 - cps(i,j) -= cps(i,j-1) - end do - end do - - iorder = -1 - - cps(:, N_cp) = 0d0 - - iloop : do i=fragment_first+1,N_det_generators - k = tooth_of_det(i) - if(k == 0) cycle - if (i == first_det_of_teeth(k)) cycle - - do j=1,N_cp - if(cps(i, j) /= 0d0) cycle iloop - end do - - tooth_reduce(i) = k - end do iloop - - do i=1,N_det_generators - if(tooth_reduce(dress_jobs(i)) == 0) dress_jobs(i) = dress_jobs(i)+N_det*2 - end do - - do i=1,N_cp-1 - call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1) - end do - - do i=1,N_det_generators - if(dress_jobs(i) > N_det) dress_jobs(i) = dress_jobs(i) - N_det*2 - end do -END_PROVIDER - - -subroutine get_comb_val(stato, detail, cur_cp, val, istate) - implicit none - integer, intent(in) :: cur_cp, istate - integer :: first - double precision, intent(in) :: stato, detail(N_states, N_det_generators) - double precision, intent(out) :: val - double precision :: curs - integer :: j, k - integer, external :: dress_find - - curs = 1d0 - stato - val = 0d0 - first = cp_first_tooth(cur_cp) - - do j = comb_teeth, first, -1 - !DIR$ FORCEINLINE - k = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - if(k == first_det_of_teeth(first)) then - val += detail(istate, k) * dress_weight_inv(k) * comb_step * fractage(first) - else - val += detail(istate, k) * dress_weight_inv(k) * comb_step - end if - - curs -= comb_step - end do -end subroutine - - -subroutine get_comb(stato, dets) - implicit none - double precision, intent(in) :: stato - integer, intent(out) :: dets(comb_teeth) - double precision :: curs - integer :: j - integer, external :: dress_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step - end do -end subroutine - - -subroutine add_comb(com, computed, cp, N, tbc) - implicit none - double precision, intent(in) :: com - integer, intent(inout) :: N - double precision, intent(inout) :: cp(N_det) - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(N_det_generators) - integer :: i, k, l, dets(comb_teeth) - - !DIR$ FORCEINLINE - call get_comb(com, dets) - k=N+1 - do i = 1, comb_teeth - l = dets(i) - cp(l) += 1d0 - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if - end do - N = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, dress_stoch_istate ] - implicit none - dress_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, dress_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_weight_inv, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, fractage, (comb_teeth) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] -&BEGIN_PROVIDER [ integer, tooth_of_det, (N_det_generators) ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: dress_find - - dress_weight(1) = psi_coef_generators(1,dress_stoch_istate)**2 - dress_cweight(1) = psi_coef_generators(1,dress_stoch_istate)**2 - - do i=1,N_det_generators - dress_weight(i) = psi_coef_generators(i,dress_stoch_istate)**2 - enddo - - ! Important to loop backwards for numerical precision - dress_cweight(N_det_generators) = dress_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - dress_cweight(i) = dress_weight(i) + dress_cweight(i+1) - end do - - do i=1,N_det_generators - dress_weight(i) = dress_weight(i) / dress_cweight(1) - dress_cweight(i) = dress_cweight(i) / dress_cweight(1) - enddo - - do i=1,N_det_generators-1 - dress_cweight(i) = 1.d0 - dress_cweight(i+1) - end do - dress_cweight(N_det_generators) = 1.d0 - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - !print *, "comb_step", comb_step - first_det_of_comb = 1 - do i=1,N_det_generators ! min(100,N_det_generators) - first_det_of_comb = i - if(dress_weight(i)/norm_left < comb_step) then + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - norm_left -= dress_weight(i) + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" + end if end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - - comb_step = (1d0 - dress_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = dress_find(stato, dress_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - fractage(i) = (dress_cweight(iloc) - stato) / dress_weight(iloc) - stato -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = dress_find_sample(r, tilde_cW) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - - - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif + pt2_cW(0) = 0d0 do i=1,N_det_generators - dress_weight_inv(i) = 1.d0/dress_weight(i) - enddo - - tooth_of_det(:first_det_of_teeth(1)-1) = 0 - do i=1,comb_teeth - tooth_of_det(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = i + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - - diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index b61a4d5a..513cdbda 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -1,13 +1,5 @@ use bitmasks -BEGIN_PROVIDER [ integer, fragment_count ] - implicit none - BEGIN_DOC - ! Number of fragments for the deterministic part - END_DOC - fragment_count = 1 -END_PROVIDER - subroutine run_dress_slave(thread,iproce,energy) use f77_zmq @@ -18,7 +10,7 @@ subroutine run_dress_slave(thread,iproce,energy) integer, intent(in) :: thread, iproce integer :: rc, i, subset, i_generator - integer :: worker_id, task_id, ctask, ltask + integer :: worker_id, ctask, ltask character*(5120) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -26,69 +18,60 @@ subroutine run_dress_slave(thread,iproce,energy) integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - - logical :: done - - double precision,allocatable :: dress_detail(:) - integer :: ind - double precision,allocatable :: delta_ij_loc(:,:,:) - integer :: h,p,n,i_state - logical :: ok - - integer, allocatable :: int_buf(:) - double precision, allocatable :: double_buf(:) - integer(bit_kind), allocatable :: det_buf(:,:,:) - integer :: N_buf(3) - logical :: last + double precision,allocatable :: breve_delta_m(:,:,:) + integer :: i_state,m,l,t,p,sum_f !integer, external :: omp_get_thread_num - double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:) - integer :: toothMwen - logical :: fracted + double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:), edI(:) + double precision, allocatable :: edI_task(:) + integer, allocatable :: edI_index(:), edI_taskID(:) + integer :: n_tasks + + integer :: iproc + integer, allocatable :: f(:) + integer :: cp_sent, cp_done + integer :: cp_max(Nproc) + integer :: will_send, task_id + integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) + integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending double precision :: fac - + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" - allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) - allocate(cp(N_states, N_det, N_cp, 2)) + allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) + allocate(cp(N_states, N_det, dress_N_cp, 2)) + allocate(edI(N_det_generators), f(N_det_generators)) + allocate(edI_index(N_det_generators), edI_task(N_det_generators)) + + edI = 0d0 + f = 0 delta_det = 0d9 cp = 0d0 - task(:) = CHAR(0) - - - integer :: iproc, cur_cp, done_for(0:N_cp) - integer, allocatable :: tasks(:) - integer :: lastCp(Nproc) - integer :: lastSent, lastSendable - logical :: send - integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1) - integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1) - - do i=0,N_cp+1 + call omp_init_lock(sending) + do i=0,dress_N_cp+1 call omp_init_lock(lck_sto(i)) end do - do i=0,comb_teeth+1 + do i=0,pt2_N_teeth+1 call omp_init_lock(lck_det(i)) end do - lastCp = 0 - lastSent = 0 - send = .false. - done_for = 0 + cp_done = 0 + cp_sent = 0 + will_send = 0 - double precision :: hij, sij + double precision :: hij, sij, tmp !call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) hij = E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & - !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & - !$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) & + !$OMP PRIVATE(breve_delta_m, task, task_id) & + !$OMP PRIVATE(fac,m) & + !$OMP PRIVATE(i, will_send, i_generator, subset, iproc) & !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -102,92 +85,101 @@ subroutine run_dress_slave(thread,iproce,energy) iproc = omp_get_thread_num()+1 - allocate(int_buf(N_dress_int_buffer)) - allocate(double_buf(N_dress_double_buffer)) - allocate(det_buf(N_int, 2, N_dress_det_buffer)) - allocate(delta_ij_loc(N_states,N_det,2)) - do + allocate(breve_delta_m(N_states,N_det,2)) + + + do while(m /= dress_N_cp+1) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) task = task//" 0" - if(task_id == 0) exit - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if(task_id /= 0) then read (task,*) subset, i_generator - - !$OMP ATOMIC - done_for(done_cp_at_det(i_generator)) += 1 - ! print *, "IGEN", i_generator, done_cp_at_det(i_generator) - delta_ij_loc(:,:,:) = 0d0 - call generator_start(i_generator, iproc) - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) - - do i=1,N_cp - fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step - if(fac == 0d0) cycle - call omp_set_lock(lck_sto(i)) - cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) - cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) - call omp_unset_lock(lck_sto(i)) - end do - - - toothMwen = tooth_of_det(i_generator) - fracted = (toothMwen /= 0) - if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) - if(fracted) then - call omp_set_lock(lck_det(toothMwen)) - call omp_set_lock(lck_det(toothMwen-1)) - delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) - call omp_unset_lock(lck_det(toothMwen)) - call omp_unset_lock(lck_det(toothMwen-1)) - else - call omp_set_lock(lck_det(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) - call omp_unset_lock(lck_det(toothMwen)) - end if - call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - lastCp(iproc) = done_cp_at_det(i_generator) + m = dress_P(i_generator) + else + m = dress_N_cp + 1 end if - + + will_send = 0 + !$OMP CRITICAL - send = .false. - lastSendable = N_cp*2 - do i=1,Nproc - lastSendable = min(lastCp(i), lastSendable) - end do - lastSendable -= 1 - if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then - lastSent = lastSendable - cur_cp = lastSent - send = .true. + cp_max(iproc) = m + cp_done = minval(cp_max)-1 + if(cp_done > cp_sent) then + will_send = cp_sent + 1 + cp_sent = will_send end if !$OMP END CRITICAL - if(send) then - N_buf = (/0,1,0/) - - delta_ij_loc = 0d0 - if(cur_cp < 1) stop "cur_cp < 1" - do i=1,cur_cp - delta_ij_loc(:,:,1) += cp(:,:,i,1) - delta_ij_loc(:,:,2) += cp(:,:,i,2) + if(will_send /= 0) then + breve_delta_m = 0d0 + + do l=1, will_send + breve_delta_m(:,:,1) += cp(:,:,l,1) + breve_delta_m(:,:,2) += cp(:,:,l,2) end do - delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) - do i=cp_first_tooth(cur_cp)-1,0,-1 - delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1) - delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2) + breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) !/ cps_N(cur_cp) + + do t=dress_dot_t(will_send)-1,0,-1 + breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1) + breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2) end do - call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) + + + + call omp_set_lock(sending) + n_tasks = 0 + sum_f = 0 + do i=1,N_det_generators + if(dress_P(i) == will_send .and. f(i) /= 0) then + n_tasks += 1 + edI_task(n_tasks) = edI(i) + edI_index(n_tasks) = i + sum_f += f(i) + end if + end do +!!!!call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) + call omp_unset_lock(sending) end if + + if(m /= dress_N_cp+1) then + !UPDATE i_generator - if(task_id == 0) exit + + breve_delta_m(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + + + call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator)*0 + 1, iproc) + + t = dress_T(i_generator) + + call omp_set_lock(lck_det(t)) + delta_det(:,:,t, 1) += breve_delta_m(:,:,1) + delta_det(:,:,t, 2) += breve_delta_m(:,:,2) + call omp_unset_lock(lck_det(t)) + + do p=1,dress_N_cp ! m, dress_N_cp + if(dress_e(i_generator, p) /= 0) then + fac = dress_e(i_generator, p) * pt2_W_T / pt2_w(i_generator) + call omp_set_lock(lck_sto(p)) + cp(:,:,p,1) += breve_delta_m(:,:,1) * fac + cp(:,:,p,2) += breve_delta_m(:,:,2) * fac + call omp_unset_lock(lck_sto(p)) + end if + end do + + tmp = 0d0 + do i=1,N_det + tmp += psi_coef(i, dress_stoch_istate)*breve_delta_m(dress_stoch_istate, i, 1) + end do + !$OMP ATOMIC + edI(i_generator) += tmp! dot_product(psi_det_coef(:, dress_stoch_istate), breve_delta_m(dress_stoch_istate, :, 1)) + !$OMP ATOMIC + f(i_generator) += 1 + !push bidon + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_id, 1) + end if end do call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) @@ -195,180 +187,84 @@ subroutine run_dress_slave(thread,iproce,energy) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL - do i=0,N_cp+1 + do i=0,dress_N_cp+1 call omp_destroy_lock(lck_sto(i)) end do - do i=0,comb_teeth+1 + do i=0,pt2_N_teeth+1 call omp_destroy_lock(lck_det(i)) end do end subroutine - -subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) +subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) use f77_zmq implicit none - - integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - real(kind=4), allocatable :: delta_loc4(:,:,:) - double precision, intent(in) :: double_buf(*) - integer, intent(in) :: int_buf(*) - integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) - integer, intent(in) :: N_bufi(3) - integer :: N_buf(3) - integer, intent(in) :: ind, cur_cp, task_id - integer :: rc, i, j, k, l - double precision :: contrib(N_states) - real(sendt), allocatable :: r4buf(:,:,:) - - rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - - if(cur_cp /= -1) then - allocate(r4buf(N_states, N_det, 2)) - do i=1,2 - do j=1,N_det - do k=1,N_states - r4buf(k,j,i) = real(delta_loc(k,j,i), sendt) - end do - end do - end do - - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE) - if(rc /= sendt*N_states*N_det) stop "push" - - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE) - if(rc /= sendt*N_states*N_det) stop "push" - else - contrib = 0d0 - do i=1,N_det - contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) - end do - - rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" + integer, intent(in) :: m_task, f, edI_index(n_tasks) + double precision, intent(in) :: breve_delta_m(N_states, N_det, 2), edI_task(n_tasks) + integer, intent(in) :: task_id, n_tasks + integer :: rc, i, j, k + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" - N_buf = N_bufi - !N_buf = (/0,1,0/) - - rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) - if(rc /= 4*3) stop "push5" + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push2" - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" + rc = f77_zmq_send( zmq_socket_push, m_task, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push3" - - if(N_buf(1) > 0) then - rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) - if(rc /= 4*N_buf(1)) stop "push6" - end if - - if(N_buf(2) > 0) then - rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) - if(rc /= 8*N_buf(2)) stop "push8" - end if + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" - if(N_buf(3) > 0) then - rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" - end if + rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) + if(rc /= 8*n_tasks) stop "push5" - rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push11" - end if + rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, ZMQ_SNDMORE) + if(rc /= 4*n_tasks) stop "push6" -! Activate is zmq_socket_push is a REQ + rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, 0) + if(rc /= 8*N_det*N_states*2) stop "push6" +! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE character*(2) :: ok rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) IRP_ENDIF - end subroutine -BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ] - -END_PROVIDER -subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) +subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) use f77_zmq implicit none - integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - integer, intent(out) :: cur_cp - double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - double precision, intent(out) :: double_buf(*), contrib(N_states) - integer, intent(out) :: int_buf(*) - integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) - integer, intent(out) :: ind - integer, intent(out) :: task_id + integer, intent(out) :: m_task, f, edI_index(N_det_generators) + double precision, intent(out) :: breve_delta_m(N_states, N_det, 2), edI_task(N_det_generators) + integer, intent(out) :: task_id, n_tasks integer :: rc, i, j, k - integer, intent(out) :: N_buf(3) - rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) - if(rc /= 4) stop "pulla" - - rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0) - if(rc /= 4) stop "pulla" - - - - - if(cur_cp /= -1) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0) - if(rc /= sendt*N_states*N_det) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0) - if(rc /= sendt*N_states*N_det) stop "pulld" - - do i=1,2 - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) - do j=1,N_det - do k=1,N_states - delta_loc(k,j,i) = real(real4buf(k,j,i), 8) - end do - end do - end do - else - rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) - if(rc /= 8*N_states) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) - if(rc /= 4*3) stop "pull" - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) - if(rc /= 4*N_buf(1)) stop "pull1" - end if - - if(N_buf(2) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) - if(rc /= 8*N_buf(2)) stop "pull2" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" - end if - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "pull4" - end if + + rc = f77_zmq_recv( zmq_socket_pull, m_task, 4, 0) + if(rc /= 4) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0) + if(rc /= 8*n_tasks) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) + if(rc /= 8*N_det*N_states*2) stop "pullc" ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE diff --git a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES index 0a06e986..4f09bfc8 100644 --- a/plugins/shiftedbk/NEEDED_CHILDREN_MODULES +++ b/plugins/shiftedbk/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -dress_zmq DavidsonDressed Selectors_full Generators_CAS +dress_zmq DavidsonDressed Selectors_full Generators_full diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index 6a481389..65188e45 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -294,9 +294,8 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - + call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) - slave_sum_alpha2(:,iproc) += c_alpha(:)**2 if(contrib < sb(iproc)%mini) then call add_to_selection_buffer(sb(iproc), alpha, contrib) From aae9d203ecbf8d53accc6bc35c2ab56aeeae78a6 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 30 Aug 2018 10:07:02 +0200 Subject: [PATCH 05/35] potential fragmentation bug --- plugins/dress_zmq/dress_stoch_routines.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 9f112b75..56a35fee 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -370,7 +370,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, allocate(breve_delta_m(N_states, N_det, 2)) allocate(dot_f(dress_N_cp)) allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) - edI = -100000d0 + edI = 0d0 cp = 0d0 dot_f(:) = dress_dot_F(:) @@ -418,7 +418,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, endif end do do i=1,n_tasks - edI(:, edI_index(i)) = edI_task(:, i) !!!!!!!!!!!!!!! += !!!!! + edI(:, edI_index(i)) += edI_task(:, i) end do cp(:,:,m_task,1) += breve_delta_m(:,:,1) cp(:,:,m_task,2) += breve_delta_m(:,:,2) From bac039bdf1bd00673c0795b1cb23adb593e9d426 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 30 Aug 2018 10:58:17 +0200 Subject: [PATCH 06/35] relative error 1d-5 --- plugins/dress_zmq/dress_stoch_routines.irp.f | 19 +++++++++++++------ plugins/dress_zmq/dressing.irp.f | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 56a35fee..a9577e72 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -353,7 +353,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR), external :: new_zmq_pull_socket, zmq_abort integer :: more integer :: i, c, j, k, f, t, m, p, m_task integer :: task_id, n_tasks @@ -362,7 +362,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, external :: omp_get_wtime integer, allocatable :: dot_f(:) integer, external :: zmq_delete_tasks, dress_find_sample - + delta = 0d0 delta_s2 = 0d0 allocate(cp(N_states, N_det, dress_N_cp, 2), edI(N_states, N_det)) @@ -382,7 +382,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, S(:) = 0d0 S2(:) = 0d0 time0 = omp_get_wtime() - do while (m <= dress_N_cp) + more = 1 + do while (m <= dress_N_cp .and. more == 1) if(dot_f(m) == 0) then E0 = 0 do i=dress_dot_n_0(m),1,-1 @@ -406,8 +407,15 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, error = eqt time = omp_get_wtime() print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' - !end do - m += 1 + m += 1 + if(eqt < relative_error) then + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif + end if else task_id = 0 do @@ -429,7 +437,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, delta(:,:) = cp(:,:,m-1,1) delta_s2(:,:) = cp(:,:,m-1,2) - dress(istate) = E(istate)+E0+avg call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 0e95ef56..7005cd86 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] ! else ! errr = 1d-4 ! end if - relative_error = 0d0 ! 5.d-5 + relative_error = 1.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") From 2a6c1941d45be78083ec8c1dccb453f51a12c338 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 30 Aug 2018 11:43:11 +0200 Subject: [PATCH 07/35] corrected when relative_error=0d0 --- plugins/dress_zmq/dress_stoch_routines.irp.f | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index a9577e72..767c4598 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -383,7 +383,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, S2(:) = 0d0 time0 = omp_get_wtime() more = 1 - do while (m <= dress_N_cp .and. more == 1) + do while (m <= dress_N_cp) + if(more == 0 .and. dot_f(m) /= 0) exit if(dot_f(m) == 0) then E0 = 0 do i=dress_dot_n_0(m),1,-1 @@ -408,7 +409,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, time = omp_get_wtime() print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' m += 1 - if(eqt < relative_error) then + if(eqt <= relative_error) then + print *, "ABORT" if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(1) if (zmq_abort(zmq_to_qp_run_socket) == -1) then From a254fdd7cffd8348846c910f240965104c969a09 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 30 Aug 2018 15:24:07 +0200 Subject: [PATCH 08/35] parallel bug --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 1 - plugins/dress_zmq/dress_stoch_routines.irp.f | 57 +++++++++----------- plugins/dress_zmq/run_dress_slave.irp.f | 24 ++++----- 3 files changed, 33 insertions(+), 49 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 31598181..02058dfb 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -233,7 +233,6 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2 end do end if end do - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 767c4598..faf26c8a 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -27,22 +27,10 @@ BEGIN_PROVIDER [ integer, dress_N_cp_max ] dress_N_cp_max = 100 END_PROVIDER -BEGIN_PROVIDER[ integer, dress_M_m, (dress_N_cp_max)] - implicit none - integer :: i - - do i=1,dress_N_cp_max-1 - dress_M_m(i) = N_det_generators * i / (dress_N_cp_max+1) - end do - dress_M_m(1) = 1 - dress_M_m(dress_N_cp_max) = N_det_generators+1 -END_PROVIDER - - - BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] + BEGIN_PROVIDER[ integer, dress_M_m, (dress_N_cp_max)] +&BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] &BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] -&BEGIN_PROVIDER[ integer, dress_R, (0:N_det_generators)] &BEGIN_PROVIDER[ integer, dress_R1, (0:N_det_generators)] &BEGIN_PROVIDER[ double precision, dress_M_mi, (dress_N_cp_max, N_det_generators+1)] &BEGIN_PROVIDER [ integer, dress_P, (N_det_generators) ] @@ -59,12 +47,17 @@ END_PROVIDER dress_M_mi = 0d0 tilde_M = 0d0 - dress_R(:) = 0 dress_R1(:) = 0 N_c = 0 N_j = pt2_n_0(1) d(:) = .false. + do i=1,dress_N_cp_max-1 + dress_M_m(i) = N_det_generators * i / (dress_N_cp_max+1) + end do + dress_M_m(1) = 1 + dress_M_m(dress_N_cp_max) = N_det_generators+1 + do i=1,N_j d(i) = .true. pt2_J(i) = i @@ -103,7 +96,6 @@ END_PROVIDER if(N_c == dress_M_m(m)) then dress_R1(m) = N_j - dress_R(N_j) = N_c dress_M_mi(m, :N_det_generators) = tilde_M(:) m += 1 end if @@ -111,7 +103,7 @@ END_PROVIDER dress_N_cp = m-1 dress_R1(dress_N_cp) = N_j - + dress_M_m(dress_N_cp) = N_c !!!!!!!!!!!!!! do m=1,dress_N_cp do i=dress_R1(m-1)+1, dress_R1(m) @@ -263,6 +255,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) !!$OMP END PARALLEL delta_out(dress_stoch_istate,1:N_det) = delta(dress_stoch_istate,1:N_det) delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2(dress_stoch_istate,1:N_det) + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') print *, '========== ================= ================= =================' @@ -401,25 +394,23 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, S2(p) += x**2 end do end do - t = dress_dot_t(m) - avg = S(t) / dble(c) - eqt = (S2(t) / c) - (S(t)/c)**2 - eqt = sqrt(eqt / dble(c-1)) - error = eqt - time = omp_get_wtime() - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' - m += 1 - if(eqt <= relative_error) then - print *, "ABORT" + t = dress_dot_t(m) + avg = S(t) / dble(c) + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + error = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' + m += 1 + if(eqt <= 0d0*relative_error) then + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' - endif + print *, irp_here, ': Error in sending abort signal (2)' endif - end if + endif + end if else - task_id = 0 do call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) if(task_id == 0) exit diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 513cdbda..6941b7b2 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -46,7 +46,7 @@ subroutine run_dress_slave(thread,iproce,energy) edI = 0d0 f = 0 - delta_det = 0d9 + delta_det = 0d0 cp = 0d0 task(:) = CHAR(0) @@ -64,14 +64,13 @@ subroutine run_dress_slave(thread,iproce,energy) will_send = 0 double precision :: hij, sij, tmp - !call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) hij = E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(breve_delta_m, task, task_id) & - !$OMP PRIVATE(fac,m) & - !$OMP PRIVATE(i, will_send, i_generator, subset, iproc) & + !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & + !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -117,15 +116,13 @@ subroutine run_dress_slave(thread,iproce,energy) breve_delta_m(:,:,2) += cp(:,:,l,2) end do - breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) !/ cps_N(cur_cp) + breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) do t=dress_dot_t(will_send)-1,0,-1 breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1) breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2) end do - - call omp_set_lock(sending) n_tasks = 0 sum_f = 0 @@ -137,7 +134,6 @@ subroutine run_dress_slave(thread,iproce,energy) sum_f += f(i) end if end do -!!!!call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) call omp_unset_lock(sending) end if @@ -145,11 +141,9 @@ subroutine run_dress_slave(thread,iproce,energy) if(m /= dress_N_cp+1) then !UPDATE i_generator - breve_delta_m(:,:,:) = 0d0 call generator_start(i_generator, iproc) - call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator)*0 + 1, iproc) t = dress_T(i_generator) @@ -159,9 +153,9 @@ subroutine run_dress_slave(thread,iproce,energy) delta_det(:,:,t, 2) += breve_delta_m(:,:,2) call omp_unset_lock(lck_det(t)) - do p=1,dress_N_cp ! m, dress_N_cp - if(dress_e(i_generator, p) /= 0) then - fac = dress_e(i_generator, p) * pt2_W_T / pt2_w(i_generator) + do p=1,dress_N_cp + if(dress_e(i_generator, p) /= 0d0) then + fac = dress_e(i_generator, p) call omp_set_lock(lck_sto(p)) cp(:,:,p,1) += breve_delta_m(:,:,1) * fac cp(:,:,p,2) += breve_delta_m(:,:,2) * fac @@ -170,11 +164,11 @@ subroutine run_dress_slave(thread,iproce,energy) end do tmp = 0d0 - do i=1,N_det + do i=N_det,1,-1 tmp += psi_coef(i, dress_stoch_istate)*breve_delta_m(dress_stoch_istate, i, 1) end do !$OMP ATOMIC - edI(i_generator) += tmp! dot_product(psi_det_coef(:, dress_stoch_istate), breve_delta_m(dress_stoch_istate, :, 1)) + edI(i_generator) += tmp !$OMP ATOMIC f(i_generator) += 1 !push bidon From 0609e8c627ea7bf88d617e45dffeb64c9aea37b6 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 30 Aug 2018 20:52:05 +0200 Subject: [PATCH 09/35] debugging --- plugins/dress_zmq/dress_stoch_routines.irp.f | 59 +++++++++++++------- plugins/dress_zmq/run_dress_slave.irp.f | 59 ++++++++++++++------ 2 files changed, 79 insertions(+), 39 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index faf26c8a..025600d6 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -183,6 +183,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) + print *, "ZMQ_dress" task(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) @@ -232,7 +233,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) do i=1,N_det_generators - do j=1,pt2_F(i) !!!!!!!!!!!! + do j=1,pt2_F(i) write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then stop 'Unable to add task to task server' @@ -286,7 +287,7 @@ end allocate(d(N_det_generators+1)) - dress_e(:,:) = 1d0 + dress_e(:,:) = 0d0 dress_dot_t(:) = 0 dress_dot_n_0(:) = 0 dress_dot_F = 0 @@ -319,6 +320,7 @@ end end do end do end do + do m=dress_N_cp, 2, -1 dress_e(:,m) -= dress_e(:,m-1) end do @@ -355,19 +357,20 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, external :: omp_get_wtime integer, allocatable :: dot_f(:) integer, external :: zmq_delete_tasks, dress_find_sample - + logical :: found + found = .false. delta = 0d0 delta_s2 = 0d0 allocate(cp(N_states, N_det, dress_N_cp, 2), edI(N_states, N_det)) allocate(edI_task(N_states, N_det), edI_index(N_det)) allocate(breve_delta_m(N_states, N_det, 2)) - allocate(dot_f(dress_N_cp)) + allocate(dot_f(dress_N_cp+1)) allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) edI = 0d0 cp = 0d0 - dot_f(:) = dress_dot_F(:) - + dot_f(:dress_N_cp) = dress_dot_F(:) + dot_f(dress_N_cp+1) = 1 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() more = 1 m = 1 @@ -376,8 +379,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, S2(:) = 0d0 time0 = omp_get_wtime() more = 1 - do while (m <= dress_N_cp) - if(more == 0 .and. dot_f(m) /= 0) exit + do while (.not. found) !(m <= dress_N_cp) + !if(more == 0 .and. dot_f(m) /= 0) exit if(dot_f(m) == 0) then E0 = 0 do i=dress_dot_n_0(m),1,-1 @@ -400,23 +403,22 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, eqt = sqrt(eqt / dble(c-1)) error = eqt time = omp_get_wtime() - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' m += 1 - if(eqt <= 0d0*relative_error) then - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' - endif - endif + if(eqt <= 1d0*relative_error) then + found = .true. end if else do call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) - if(task_id == 0) exit - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then - stop 'Unable to delete tasks' - endif + if(m_task == 0) then + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' + endif + else + i= zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + exit + end if end do do i=1,n_tasks edI(:, edI_index(i)) += edI_task(:, i) @@ -427,7 +429,22 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, dot_f(m_task) -= f end if end do - + + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif + + do while(more /= 0) + call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + if(m_task == 0) then + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) + else + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + end if + end do delta(:,:) = cp(:,:,m-1,1) delta_s2(:,:) = cp(:,:,m-1,2) dress(istate) = E(istate)+E0+avg diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 6941b7b2..9a090b36 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -31,11 +31,10 @@ subroutine run_dress_slave(thread,iproce,energy) integer, allocatable :: f(:) integer :: cp_sent, cp_done integer :: cp_max(Nproc) - integer :: will_send, task_id + integer :: will_send, task_id, purge_task_id(dress_N_cp+1) integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending double precision :: fac - if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" @@ -43,7 +42,6 @@ subroutine run_dress_slave(thread,iproce,energy) allocate(cp(N_states, N_det, dress_N_cp, 2)) allocate(edI(N_det_generators), f(N_det_generators)) allocate(edI_index(N_det_generators), edI_task(N_det_generators)) - edI = 0d0 f = 0 delta_det = 0d0 @@ -64,7 +62,8 @@ subroutine run_dress_slave(thread,iproce,energy) will_send = 0 double precision :: hij, sij, tmp - + logical :: purge + purge_task_id = 0 hij = E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL !$OMP PARALLEL DEFAULT(SHARED) & @@ -72,7 +71,6 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -82,12 +80,11 @@ subroutine run_dress_slave(thread,iproce,energy) stop "WORKER -1" end if - iproc = omp_get_thread_num()+1 allocate(breve_delta_m(N_states,N_det,2)) - do while(m /= dress_N_cp+1) + do while(cp_done > cp_sent .or. m /= dress_N_cp+1) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) task = task//" 0" if(task_id /= 0) then @@ -106,12 +103,16 @@ subroutine run_dress_slave(thread,iproce,energy) will_send = cp_sent + 1 cp_sent = will_send end if + if(purge_task_id(m) == 0) then + purge_task_id(m) = task_id + task_id = 0 + end if !$OMP END CRITICAL if(will_send /= 0) then breve_delta_m = 0d0 - do l=1, will_send + do l=will_send, 1,-1 breve_delta_m(:,:,1) += cp(:,:,l,1) breve_delta_m(:,:,2) += cp(:,:,l,2) end do @@ -134,7 +135,10 @@ subroutine run_dress_slave(thread,iproce,energy) sum_f += f(i) end if end do - call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) + if(purge_task_id(will_send) /= 0) then + call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id(will_send), n_tasks) + end if + purge_task_id(will_send) = 0 call omp_unset_lock(sending) end if @@ -172,10 +176,20 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP ATOMIC f(i_generator) += 1 !push bidon - call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_id, 1) + if(task_id /= 0) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_id, 1) + end if end if end do - + !$OMP BARRIER + !$OMP SINGLE + do m=1,dress_N_cp + if(purge_task_id(m) /= 0) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, purge_task_id(m), 1) + end if + end do + !$OMP END SINGLE + call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) @@ -210,15 +224,20 @@ subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, b rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push4" - + rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) if(rc /= 8*n_tasks) stop "push5" rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, ZMQ_SNDMORE) if(rc /= 4*n_tasks) stop "push6" - - rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, 0) - if(rc /= 8*N_det*N_states*2) stop "push6" + + if(m_task /= 0) then + rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, 0) + if(rc /= 8*N_det*N_states*2) stop "push6" + else + rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8, 0) + if(rc /= 8) stop "push6" + end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE @@ -256,9 +275,13 @@ subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, b rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) if(rc /= 4*n_tasks) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) - if(rc /= 8*N_det*N_states*2) stop "pullc" + if(m_task /= 0) then + rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) + if(rc /= 8*N_det*N_states*2) stop "pullc" + else + rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8, 0) + if(rc /= 8) stop "pullc" + end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE From bb6e073cf10039f8a333d120b88dda109169528b Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Thu, 30 Aug 2018 21:24:45 +0200 Subject: [PATCH 10/35] ungodly hack to prevent double providing --- plugins/dress_zmq/dressing.irp.f | 7 ++++++- plugins/dress_zmq/run_dress_slave.irp.f | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 7005cd86..efd73874 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -82,11 +82,16 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] double precision, allocatable :: dress(:), del(:,:), del_s2(:,:) double precision :: E_CI_before(N_states), relative_error + integer :: cnt = 0 ! prevents re-providing if delta_ij_tmp is ! just being copied - if(N_det_delta_ij /= N_det) return + !if(N_det_delta_ij /= N_det) return + + cnt += 1 + if(mod(cnt,2) == 0) return + if(.true.) then allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 9a090b36..01df767e 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -137,6 +137,7 @@ subroutine run_dress_slave(thread,iproce,energy) end do if(purge_task_id(will_send) /= 0) then call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id(will_send), n_tasks) + !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,purge_task_id(will_send)) end if purge_task_id(will_send) = 0 call omp_unset_lock(sending) @@ -177,6 +178,7 @@ subroutine run_dress_slave(thread,iproce,energy) f(i_generator) += 1 !push bidon if(task_id /= 0) then + !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_id, 1) end if end if @@ -185,6 +187,7 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP SINGLE do m=1,dress_N_cp if(purge_task_id(m) /= 0) then + !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,purge_task_id(m)) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, purge_task_id(m), 1) end if end do From 02893a419de07c922708fe8ee2b828d9ebfd2e9b Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 31 Aug 2018 15:52:16 +0200 Subject: [PATCH 11/35] bug in blocked search - replaced with thesis version --- plugins/dress_zmq/dress_stoch_routines.irp.f | 23 ++++++++------------ 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 025600d6..395f945d 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -455,26 +455,21 @@ end subroutine integer function dress_find_sample(v, w) implicit none double precision, intent(in) :: v, w(0:N_det_generators) - integer :: i,l,h - integer, parameter :: block=64 + integer :: i,l,r l = 0 - h = N_det_generators + r = N_det_generators - do while(h-l >= block) - i = ishft(h+l,-1) - if(w(i+1) > v) then - h = i-1 + do while(r-l > 1) + i = (r+l) / 2 + if(w(i) < v) then + l = i else - l = i+1 - end if - end do - !DIR$ LOOP COUNT (64) - do dress_find_sample=l,h - if(w(dress_find_sample) >= v) then - exit + r = i end if end do + + dress_find_sample = r end function From fee31d4e3e70c2edf1aeda202d3f067eddc17532 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 31 Aug 2018 18:56:23 +0200 Subject: [PATCH 12/35] dress fragmentation --- plugins/dress_zmq/dress_stoch_routines.irp.f | 56 ++++---------------- 1 file changed, 9 insertions(+), 47 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 395f945d..d899d041 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -9,7 +9,7 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] implicit none pt2_F(:) = 1 - pt2_F(:N_det_generators/100 + 1) = 1 + pt2_F(:N_det_generators/100+1) = 5 pt2_n_tasks_max = N_det_generators/100 + 1 if(N_det_generators < 256) then @@ -125,44 +125,6 @@ END_PROVIDER END_PROVIDER -! BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] -!&BEGIN_PROVIDER [integer, dress_dot_t, (0:dress_N_cp)] -!&BEGIN_PROVIDER [integer, dress_dot_n_0, (0:dress_N_cp)] -! implicit none -! dress_e(:,:) = 1d0 -! dress_dot_t(:) = 0 -! dress_dot_n_0(:) = 0 -! -! integer :: U, m, t, i -! -! U = pt2_n_0(1)+1 - ! -! do m=1,dress_N_cp -! do while(dress_M_mi(m, U) /= 0d0) -! U = U+1 -! end do - ! dress_dot_t(m) = pt2_N_teeth + 1 - ! dress_dot_n_0(m) = N_det_generators - !! - ! do t = 2, pt2_N_teeth+1 - ! if(U <= pt2_n_0(t)) then - ! dress_dot_t(m) = t-1 -! dress_dot_n_0(m) = pt2_n_0(t-1) -! exit -! end if -! end do -! do t=dress_dot_t(m), pt2_N_teeth -! do i=pt2_n_0(t)+1, pt2_n_0(t+1) -! dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) - ! end do - ! end do - ! end do -! do m=dress_N_cp, 2, -1 -! dress_e(:,m) -= dress_e(:,m-1) -! end do -!END_PROVIDER - - subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) use f77_zmq @@ -233,7 +195,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) do i=1,N_det_generators - do j=1,pt2_F(i) + do j=1,pt2_F(pt2_J(i)) write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then stop 'Unable to add task to task server' @@ -314,10 +276,8 @@ end exit end if end do - do t=dress_dot_t(m), pt2_N_teeth - do i=pt2_n_0(t)+1, pt2_n_0(t+1) - dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) - end do + do i=dress_dot_n_0(m)+1, N_det_generators !pt2_n_0(t+1) + dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) end do end do @@ -379,8 +339,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, S2(:) = 0d0 time0 = omp_get_wtime() more = 1 - do while (.not. found) !(m <= dress_N_cp) - !if(more == 0 .and. dot_f(m) /= 0) exit + + do while (.not. found) if(dot_f(m) == 0) then E0 = 0 do i=dress_dot_n_0(m),1,-1 @@ -425,7 +385,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end do cp(:,:,m_task,1) += breve_delta_m(:,:,1) cp(:,:,m_task,2) += breve_delta_m(:,:,2) - + if(m_task == 1) then + print *, "M1", f + end if dot_f(m_task) -= f end if end do From de4a0d0caf0e1d441e0244a9ba7c65ae535d58b1 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 31 Aug 2018 18:57:03 +0200 Subject: [PATCH 13/35] removed print --- plugins/dress_zmq/dress_stoch_routines.irp.f | 3 --- 1 file changed, 3 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index d899d041..af3f8a27 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -385,9 +385,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end do cp(:,:,m_task,1) += breve_delta_m(:,:,1) cp(:,:,m_task,2) += breve_delta_m(:,:,2) - if(m_task == 1) then - print *, "M1", f - end if dot_f(m_task) -= f end if end do From 168ca2f2e29b902ab37afc58672b678998f0212a Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 31 Aug 2018 21:07:01 +0200 Subject: [PATCH 14/35] task list optimized --- plugins/dress_zmq/dress_stoch_routines.irp.f | 85 +++++++++++++++----- 1 file changed, 63 insertions(+), 22 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index af3f8a27..015c03e1 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -23,17 +23,45 @@ END_PROVIDER -BEGIN_PROVIDER [ integer, dress_N_cp_max ] +BEGIN_PROVIDER[ integer, dress_N_cp_max ] dress_N_cp_max = 100 END_PROVIDER + BEGIN_PROVIDER[integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER [integer, dress_R1, (0:N_det_generators) ] + implicit none + integer :: m,j + integer :: l,nmov + integer, allocatable :: iorder(:) + allocate(iorder(N_det_generators)) + + pt2_J = pt2_J_ + dress_R1 = dress_R1_ + + do m=1,dress_N_cp + nmov = 0 + l=dress_R1(m-1)+1 + do j=l, dress_R1(m) + if(dress_M_mi(m, pt2_J(j)) == 0 .and. pt2_J(j) > dress_dot_n_0(m)) then + pt2_J(j) += N_det_generators**2 + nmov += 1 + end if + end do + if(dress_R1(m)-dress_R1(m-1) > 0) then + call isort(pt2_J(l), iorder, dress_R1(m)-dress_R1(m-1)) + end if + dress_R1(m) -= nmov + do j=dress_R1(m)+1, dress_R1(m) + nmov + pt2_J(j) -= N_det_generators**2 + end do + end do +END_PROVIDER BEGIN_PROVIDER[ integer, dress_M_m, (dress_N_cp_max)] -&BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_J_, (N_det_generators)] &BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] -&BEGIN_PROVIDER[ integer, dress_R1, (0:N_det_generators)] +&BEGIN_PROVIDER[ integer, dress_R1_, (0:N_det_generators)] &BEGIN_PROVIDER[ double precision, dress_M_mi, (dress_N_cp_max, N_det_generators+1)] -&BEGIN_PROVIDER [ integer, dress_P, (N_det_generators) ] &BEGIN_PROVIDER [ integer, dress_T, (N_det_generators) ] &BEGIN_PROVIDER [ integer, dress_N_cp ] implicit none @@ -47,7 +75,7 @@ END_PROVIDER dress_M_mi = 0d0 tilde_M = 0d0 - dress_R1(:) = 0 + dress_R1_(:) = 0 N_c = 0 N_j = pt2_n_0(1) d(:) = .false. @@ -60,7 +88,7 @@ END_PROVIDER do i=1,N_j d(i) = .true. - pt2_J(i) = i + pt2_J_(i) = i end do call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) call RANDOM_NUMBER(pt2_u) @@ -78,7 +106,7 @@ END_PROVIDER tilde_M(i) += 1d0 if(.not. d(i)) then N_j += 1 - pt2_J(N_j) = i + pt2_J_(N_j) = i d(i) = .true. end if end do @@ -88,29 +116,24 @@ END_PROVIDER U += 1 if(.not. d(U)) then N_j += 1 - pt2_J(N_j) = U + pt2_J_(N_j) = U d(U) = .true. exit; end if end do if(N_c == dress_M_m(m)) then - dress_R1(m) = N_j + dress_R1_(m) = N_j dress_M_mi(m, :N_det_generators) = tilde_M(:) m += 1 end if enddo dress_N_cp = m-1 - dress_R1(dress_N_cp) = N_j + dress_R1_(dress_N_cp) = N_j dress_M_m(dress_N_cp) = N_c !!!!!!!!!!!!!! - do m=1,dress_N_cp - do i=dress_R1(m-1)+1, dress_R1(m) - dress_P(pt2_J(i)) = m - end do - end do - + do i=1, pt2_n_0(1) dress_T(i) = 0 end do @@ -238,10 +261,29 @@ subroutine dress_slave_inproc(i) call run_dress_slave(1,i,dress_e0_denominator) end - BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] + BEGIN_PROVIDER [integer, dress_dot_F, (dress_N_cp)] +&BEGIN_PROVIDER [ integer, dress_P, (N_det_generators) ] + implicit none + integer :: m,i + + do m=1,dress_N_cp + do i=dress_R1(m-1)+1, dress_R1(m) + dress_P(pt2_J(i)) = m + end do + end do + + dress_dot_F = 0 + do m=1,dress_N_cp + do i=dress_R1(m-1)+1,dress_R1(m) + dress_dot_F(m) += pt2_F(pt2_J(i)) + end do + end do + +END_PROVIDER + +BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] &BEGIN_PROVIDER [integer, dress_dot_t, (0:dress_N_cp)] &BEGIN_PROVIDER [integer, dress_dot_n_0, (0:dress_N_cp)] -&BEGIN_PROVIDER [integer, dress_dot_F, (dress_N_cp)] implicit none logical, allocatable :: d(:) @@ -252,14 +294,13 @@ end dress_e(:,:) = 0d0 dress_dot_t(:) = 0 dress_dot_n_0(:) = 0 - dress_dot_F = 0 d(:) = .false. U=0 do m=1,dress_N_cp - do i=dress_R1(m-1)+1,dress_R1(m) - dress_dot_F(m) += pt2_F(pt2_J(i)) - d(pt2_J(i)) = .true. + do i=dress_R1_(m-1)+1,dress_R1_(m) + !dress_dot_F(m) += pt2_F(pt2_J_(i)) + d(pt2_J_(i)) = .true. end do do while(d(U+1)) From 6a7f04cb79ec921d5829a011aa8d8b2ea224760d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sat, 1 Sep 2018 16:58:07 +0200 Subject: [PATCH 15/35] simpler purge --- plugins/dress_zmq/dress_stoch_routines.irp.f | 6 ++++-- plugins/dress_zmq/run_dress_slave.irp.f | 19 ++++++++----------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 015c03e1..ef0835dc 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -406,7 +406,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, time = omp_get_wtime() print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' m += 1 - if(eqt <= 1d0*relative_error) then + if(eqt <= 0d0*relative_error) then found = .true. end if else @@ -417,7 +417,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, stop 'Unable to delete tasks' endif else - i= zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + if(task_id /= 0) stop "TASKID" + !i= zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) exit end if end do @@ -439,6 +440,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do while(more /= 0) call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + if(task_id == 0) cycle if(m_task == 0) then i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) else diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 01df767e..e7f2fec4 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -31,7 +31,7 @@ subroutine run_dress_slave(thread,iproce,energy) integer, allocatable :: f(:) integer :: cp_sent, cp_done integer :: cp_max(Nproc) - integer :: will_send, task_id, purge_task_id(dress_N_cp+1) + integer :: will_send, task_id, purge_task_id integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending double precision :: fac @@ -103,8 +103,8 @@ subroutine run_dress_slave(thread,iproce,energy) will_send = cp_sent + 1 cp_sent = will_send end if - if(purge_task_id(m) == 0) then - purge_task_id(m) = task_id + if(purge_task_id == 0) then + purge_task_id = task_id task_id = 0 end if !$OMP END CRITICAL @@ -135,11 +135,8 @@ subroutine run_dress_slave(thread,iproce,energy) sum_f += f(i) end if end do - if(purge_task_id(will_send) /= 0) then - call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id(will_send), n_tasks) + call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,purge_task_id(will_send)) - end if - purge_task_id(will_send) = 0 call omp_unset_lock(sending) end if @@ -185,12 +182,12 @@ subroutine run_dress_slave(thread,iproce,energy) end do !$OMP BARRIER !$OMP SINGLE - do m=1,dress_N_cp - if(purge_task_id(m) /= 0) then + !do m=1,dress_N_cp + if(purge_task_id /= 0) then !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,purge_task_id(m)) - call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, purge_task_id(m), 1) + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, purge_task_id, 1) end if - end do + !end do !$OMP END SINGLE call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) From a4a6a69459321b1b210c2f51ffbef4f792943919 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sat, 1 Sep 2018 17:01:56 +0200 Subject: [PATCH 16/35] cumulative dot_F --- plugins/dress_zmq/dress_stoch_routines.irp.f | 6 ++++-- plugins/dress_zmq/run_dress_slave.irp.f | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index ef0835dc..b93e7c3b 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -278,7 +278,9 @@ end dress_dot_F(m) += pt2_F(pt2_J(i)) end do end do - + do m=2,dress_N_cp + dress_dot_F(m) += dress_dot_F(m-1) + end do END_PROVIDER BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] @@ -406,7 +408,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, time = omp_get_wtime() print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' m += 1 - if(eqt <= 0d0*relative_error) then + if(eqt <= 1d0*relative_error) then found = .true. end if else diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index e7f2fec4..99e67e67 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -128,11 +128,11 @@ subroutine run_dress_slave(thread,iproce,energy) n_tasks = 0 sum_f = 0 do i=1,N_det_generators + if(dress_P(i) <= will_send) sum_f = sum_f + f(i) if(dress_P(i) == will_send .and. f(i) /= 0) then n_tasks += 1 edI_task(n_tasks) = edI(i) edI_index(n_tasks) = i - sum_f += f(i) end if end do call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) From 4ba5b79eb391819a4966c7cd19b2e2fdf2ed5129 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sun, 2 Sep 2018 15:50:14 +0200 Subject: [PATCH 17/35] dressing only sent for chosen checkpoint --- plugins/dress_zmq/dress_stoch_routines.irp.f | 46 +++++++++++------- plugins/dress_zmq/run_dress_slave.irp.f | 50 ++++++++++++++------ 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index b93e7c3b..7934beff 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -341,7 +341,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(in) :: relative_error, E(N_states) double precision, intent(out) :: dress(N_states) - double precision, allocatable :: cp(:,:,:,:) double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) @@ -361,20 +360,23 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer, allocatable :: dot_f(:) integer, external :: zmq_delete_tasks, dress_find_sample logical :: found + integer :: worker_id + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,1) + found = .false. delta = 0d0 delta_s2 = 0d0 - allocate(cp(N_states, N_det, dress_N_cp, 2), edI(N_states, N_det)) + allocate(edI(N_states, N_det)) allocate(edI_task(N_states, N_det), edI_index(N_det)) allocate(breve_delta_m(N_states, N_det, 2)) allocate(dot_f(dress_N_cp+1)) allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) edI = 0d0 - cp = 0d0 dot_f(:dress_N_cp) = dress_dot_F(:) dot_f(dress_N_cp+1) = 1 - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() more = 1 m = 1 c = 0 @@ -408,7 +410,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, time = omp_get_wtime() print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' m += 1 - if(eqt <= 1d0*relative_error) then + if(eqt <= relative_error) then + integer, external :: zmq_put_dvector + i= zmq_put_dvector(zmq_to_qp_run_socket, worker_id, "ending", dble(m-1), 1) found = .true. end if else @@ -427,8 +431,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do i=1,n_tasks edI(:, edI_index(i)) += edI_task(:, i) end do - cp(:,:,m_task,1) += breve_delta_m(:,:,1) - cp(:,:,m_task,2) += breve_delta_m(:,:,2) dot_f(m_task) -= f end if end do @@ -439,19 +441,31 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, print *, irp_here, ': Error in sending abort signal (2)' endif endif - + + integer :: ff + + ff = dress_dot_F(m-1) + delta= 0d0 + delta_s2 = 0d0 do while(more /= 0) call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) - if(task_id == 0) cycle - if(m_task == 0) then - i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) - else - i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) - end if + + if(task_id == 0) cycle + if(m_task == 0) then + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) + else + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + end if + + + if(m_task >= 0) cycle + ff = ff - f + delta(:,:) += breve_delta_m(:,:,1) + delta_s2(:,:) += breve_delta_m(:,:,2) end do - delta(:,:) = cp(:,:,m-1,1) - delta_s2(:,:) = cp(:,:,m-1,2) dress(istate) = E(istate)+E0+avg + if(ff /= 0) stop "WRONG NUMBER OF FRAGMENTS COLLECTED" + call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 99e67e67..5f53b7fc 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -35,7 +35,9 @@ subroutine run_dress_slave(thread,iproce,energy) integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending double precision :: fac - + double precision :: ending(1) + integer, external :: zmq_get_dvector + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) @@ -45,7 +47,6 @@ subroutine run_dress_slave(thread,iproce,energy) edI = 0d0 f = 0 delta_det = 0d0 - cp = 0d0 task(:) = CHAR(0) @@ -65,7 +66,7 @@ subroutine run_dress_slave(thread,iproce,energy) logical :: purge purge_task_id = 0 hij = E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL - + ending(1) = dble(dress_N_cp+1) !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(breve_delta_m, task, task_id) & !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & @@ -92,6 +93,7 @@ subroutine run_dress_slave(thread,iproce,energy) m = dress_P(i_generator) else m = dress_N_cp + 1 + i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) end if will_send = 0 @@ -109,7 +111,7 @@ subroutine run_dress_slave(thread,iproce,energy) end if !$OMP END CRITICAL - if(will_send /= 0) then + if(will_send /= 0 .and. will_send <= int(ending(1))) then breve_delta_m = 0d0 do l=will_send, 1,-1 @@ -136,7 +138,6 @@ subroutine run_dress_slave(thread,iproce,energy) end if end do call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) - !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,purge_task_id(will_send)) call omp_unset_lock(sending) end if @@ -175,26 +176,45 @@ subroutine run_dress_slave(thread,iproce,energy) f(i_generator) += 1 !push bidon if(task_id /= 0) then - !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_id, 1) end if end if end do !$OMP BARRIER !$OMP SINGLE - !do m=1,dress_N_cp if(purge_task_id /= 0) then - !call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,purge_task_id(m)) - call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, purge_task_id, 1) - end if - !end do - !$OMP END SINGLE + do while(int(ending(1)) == dress_N_cp+1) + call sleep(1) + i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + end do + will_send = int(ending(1)) + breve_delta_m = 0d0 + + do l=will_send, 1,-1 + breve_delta_m(:,:,1) += cp(:,:,l,1) + breve_delta_m(:,:,2) += cp(:,:,l,2) + end do + + breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) + + do t=dress_dot_t(will_send)-1,0,-1 + breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1) + breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2) + end do + + sum_f = 0 + do i=1,N_det_generators + if(dress_P(i) <= will_send) sum_f = sum_f + f(i) + end do + call push_dress_results(zmq_socket_push, -will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id, 1) + end if + + !$OMP END SINGLE call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL - do i=0,dress_N_cp+1 call omp_destroy_lock(lck_sto(i)) end do @@ -231,7 +251,7 @@ subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, b rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, ZMQ_SNDMORE) if(rc /= 4*n_tasks) stop "push6" - if(m_task /= 0) then + if(m_task < 0) then rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, 0) if(rc /= 8*N_det*N_states*2) stop "push6" else @@ -275,7 +295,7 @@ subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, b rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) if(rc /= 4*n_tasks) stop "pullc" - if(m_task /= 0) then + if(m_task < 0) then rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) if(rc /= 8*N_det*N_states*2) stop "pullc" else From 8df49f394b588ca304ce2ee7d8f8095455573784 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sun, 2 Sep 2018 15:58:48 +0200 Subject: [PATCH 18/35] removed useless computation of intermediate checkpoints --- plugins/dress_zmq/run_dress_slave.irp.f | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 5f53b7fc..b3341093 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -112,20 +112,6 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP END CRITICAL if(will_send /= 0 .and. will_send <= int(ending(1))) then - breve_delta_m = 0d0 - - do l=will_send, 1,-1 - breve_delta_m(:,:,1) += cp(:,:,l,1) - breve_delta_m(:,:,2) += cp(:,:,l,2) - end do - - breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) - - do t=dress_dot_t(will_send)-1,0,-1 - breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1) - breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2) - end do - call omp_set_lock(sending) n_tasks = 0 sum_f = 0 From abb3b7e08bebdcaf84208ebbe404441d3571b1f9 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sun, 2 Sep 2018 17:18:44 +0200 Subject: [PATCH 19/35] overflow of pt2_J --- plugins/dress_zmq/dress_stoch_routines.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 7934beff..e26defe4 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -43,7 +43,7 @@ END_PROVIDER l=dress_R1(m-1)+1 do j=l, dress_R1(m) if(dress_M_mi(m, pt2_J(j)) == 0 .and. pt2_J(j) > dress_dot_n_0(m)) then - pt2_J(j) += N_det_generators**2 + pt2_J(j) += N_det_generators nmov += 1 end if end do @@ -52,7 +52,7 @@ END_PROVIDER end if dress_R1(m) -= nmov do j=dress_R1(m)+1, dress_R1(m) + nmov - pt2_J(j) -= N_det_generators**2 + pt2_J(j) -= N_det_generators end do end do END_PROVIDER From 99ea7948e0d58023f7ce02386014c8c3d493deb4 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Sep 2018 12:29:12 +0200 Subject: [PATCH 20/35] unbalanced fragmentation --- plugins/dress_zmq/alpha_factory.irp.f | 2 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 20 ++++++++++++++------ plugins/dress_zmq/run_dress_slave.irp.f | 15 +++++++++------ 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index f2902afb..bc15d788 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -70,6 +70,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate(preinteresting_det(N_int,2,N_det)) + maskInd = -1 monoAdo = .true. monoBdo = .true. @@ -192,7 +193,6 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate(counted(mo_tot_num, mo_tot_num), countedOrb(mo_tot_num, 2)) allocate (indexes(0:mo_tot_num, 0:mo_tot_num)) allocate (indexes_end(0:mo_tot_num, 0:mo_tot_num)) - maskInd = -1 integer :: nb_count do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index e26defe4..9abe9095 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -9,14 +9,14 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] implicit none pt2_F(:) = 1 - pt2_F(:N_det_generators/100+1) = 5 + !pt2_F(:N_det_generators/1000*0+50) = 1 pt2_n_tasks_max = N_det_generators/100 + 1 if(N_det_generators < 256) then pt2_minDetInFirstTeeth = 1 pt2_N_teeth = 1 else - pt2_minDetInFirstTeeth = 5 + pt2_minDetInFirstTeeth = min(5, N_det_generators) pt2_N_teeth = 16 end if END_PROVIDER @@ -24,7 +24,7 @@ END_PROVIDER BEGIN_PROVIDER[ integer, dress_N_cp_max ] - dress_N_cp_max = 100 + dress_N_cp_max = 32 END_PROVIDER BEGIN_PROVIDER[integer, pt2_J, (N_det_generators)] @@ -79,10 +79,19 @@ END_PROVIDER N_c = 0 N_j = pt2_n_0(1) d(:) = .false. - + + U = min(1, N_det_generators/(dress_N_cp_max**2/2)) do i=1,dress_N_cp_max-1 - dress_M_m(i) = N_det_generators * i / (dress_N_cp_max+1) + dress_M_m(i) = U * ((i**2-i)/2)! / (dress_N_cp_max+1) end do + + + + U = N_det_generators/((dress_N_cp_max**2+dress_N_cp_max)/2)+1 + do i=1, dress_N_cp_max + dress_M_m(i) = U * (((i*i)+i)/2) + end do + dress_M_m(1) = 1 dress_M_m(dress_N_cp_max) = N_det_generators+1 @@ -144,7 +153,6 @@ END_PROVIDER end do end do !!!!!!!!!!!!! - END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index b3341093..899cc3cf 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -37,7 +37,9 @@ subroutine run_dress_slave(thread,iproce,energy) double precision :: fac double precision :: ending(1) integer, external :: zmq_get_dvector - +! double precision, external :: omp_get_wtime +double precision :: time, time0 + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) @@ -71,7 +73,8 @@ subroutine run_dress_slave(thread,iproce,energy) !$OMP PRIVATE(breve_delta_m, task, task_id) & !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & - !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) + !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) & + !$OMP PRIVATE(time, time0) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -80,7 +83,6 @@ subroutine run_dress_slave(thread,iproce,energy) call end_zmq_push_socket(zmq_socket_push,thread) stop "WORKER -1" end if - iproc = omp_get_thread_num()+1 allocate(breve_delta_m(N_states,N_det,2)) @@ -132,9 +134,10 @@ subroutine run_dress_slave(thread,iproce,energy) breve_delta_m(:,:,:) = 0d0 call generator_start(i_generator, iproc) - - call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator)*0 + 1, iproc) - + time0 = omp_get_wtime() + call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc) + time = omp_get_wtime() + !print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 t = dress_T(i_generator) call omp_set_lock(lck_det(t)) From 997a5a1265951394753d3154a6136ca40177ba8d Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Sep 2018 14:18:04 +0200 Subject: [PATCH 21/35] buffered task_id send --- plugins/dress_zmq/dress_stoch_routines.irp.f | 17 ++- plugins/dress_zmq/run_dress_slave.irp.f | 110 +++++++++++-------- 2 files changed, 73 insertions(+), 54 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 9abe9095..fc8f031d 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -10,7 +10,7 @@ END_PROVIDER implicit none pt2_F(:) = 1 !pt2_F(:N_det_generators/1000*0+50) = 1 - pt2_n_tasks_max = N_det_generators/100 + 1 + pt2_n_tasks_max = 16 ! N_det_generators/100 + 1 if(N_det_generators < 256) then pt2_minDetInFirstTeeth = 1 @@ -176,7 +176,6 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) - print *, "ZMQ_dress" task(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) @@ -359,9 +358,9 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_pull_socket, zmq_abort - integer :: more + integer, allocatable :: task_id(:) integer :: i, c, j, k, f, t, m, p, m_task - integer :: task_id, n_tasks + integer :: more, n_tasks double precision :: E0, error, x, v, time, time0 double precision :: avg, eqt double precision, external :: omp_get_wtime @@ -376,6 +375,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, found = .false. delta = 0d0 delta_s2 = 0d0 + allocate(task_id(pt2_n_tasks_max)) allocate(edI(N_states, N_det)) allocate(edI_task(N_states, N_det), edI_index(N_det)) allocate(breve_delta_m(N_states, N_det, 2)) @@ -431,7 +431,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, stop 'Unable to delete tasks' endif else - if(task_id /= 0) stop "TASKID" + !if(task_id(1) /= 0) stop "TASKID" !i= zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) exit end if @@ -442,7 +442,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, dot_f(m_task) -= f end if end do - if (zmq_abort(zmq_to_qp_run_socket) == -1) then call sleep(1) if (zmq_abort(zmq_to_qp_run_socket) == -1) then @@ -458,10 +457,10 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, do while(more /= 0) call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) - if(task_id == 0) cycle - if(m_task == 0) then + !if(task_id(0) == 0) cycle + if(m_task == 0) then i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) - else + else if(m_task < 0) then i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) end if diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 899cc3cf..95db9d92 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -31,7 +31,8 @@ subroutine run_dress_slave(thread,iproce,energy) integer, allocatable :: f(:) integer :: cp_sent, cp_done integer :: cp_max(Nproc) - integer :: will_send, task_id, purge_task_id + integer :: will_send, task_id, purge_task_id, ntask_buf + integer, allocatable :: task_buf(:) integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending double precision :: fac @@ -74,7 +75,7 @@ double precision :: time, time0 !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) & - !$OMP PRIVATE(time, time0) + !$OMP PRIVATE(task_buf, ntask_buf,time, time0) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -85,8 +86,9 @@ double precision :: time, time0 end if iproc = omp_get_thread_num()+1 allocate(breve_delta_m(N_states,N_det,2)) - - + allocate(task_buf(pt2_n_tasks_max)) + ntask_buf = 0 + do while(cp_done > cp_sent .or. m /= dress_N_cp+1) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) task = task//" 0" @@ -110,6 +112,9 @@ double precision :: time, time0 if(purge_task_id == 0) then purge_task_id = task_id task_id = 0 + else if(task_id /= 0) then + ntask_buf += 1 + task_buf(ntask_buf) = task_id end if !$OMP END CRITICAL @@ -164,12 +169,17 @@ double precision :: time, time0 !$OMP ATOMIC f(i_generator) += 1 !push bidon - if(task_id /= 0) then - call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_id, 1) + if(ntask_buf == size(task_buf)) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + ntask_buf = 0 end if end if end do !$OMP BARRIER + if(ntask_buf /= 0) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + ntask_buf = 0 + end if !$OMP SINGLE if(purge_task_id /= 0) then do while(int(ending(1)) == dress_N_cp+1) @@ -220,33 +230,35 @@ subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, b integer(ZMQ_PTR), intent(in) :: zmq_socket_push integer, intent(in) :: m_task, f, edI_index(n_tasks) double precision, intent(in) :: breve_delta_m(N_states, N_det, 2), edI_task(n_tasks) - integer, intent(in) :: task_id, n_tasks + integer, intent(in) :: task_id(pt2_n_tasks_max), n_tasks integer :: rc, i, j, k - rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push1" - - rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push2" - rc = f77_zmq_send( zmq_socket_push, m_task, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push3" - - rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push4" - rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) - if(rc /= 8*n_tasks) stop "push5" - - rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, ZMQ_SNDMORE) - if(rc /= 4*n_tasks) stop "push6" + if(m_task > 0) then + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" - if(m_task < 0) then - rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, 0) - if(rc /= 8*N_det*N_states*2) stop "push6" + rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) + if(rc /= 8*n_tasks) stop "push5" + + rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "push6" + else if(m_task == 0) then + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" + + rc = f77_zmq_send( zmq_socket_push, task_id, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "push2" else - rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8, 0) - if(rc /= 8) stop "push6" - end if + rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, ZMQ_SNDMORE) + if(rc /= 8*N_det*N_states*2) stop "push6" + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "push6" + + end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE @@ -264,32 +276,40 @@ subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, b integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer, intent(out) :: m_task, f, edI_index(N_det_generators) double precision, intent(out) :: breve_delta_m(N_states, N_det, 2), edI_task(N_det_generators) - integer, intent(out) :: task_id, n_tasks + integer, intent(out) :: task_id(pt2_n_tasks_max), n_tasks integer :: rc, i, j, k - - rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) - if(rc /= 4) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull4" - + rc = f77_zmq_recv( zmq_socket_pull, m_task, 4, 0) if(rc /= 4) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) - if(rc /= 4) stop "pullc" + if(m_task > 0) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" + - rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0) - if(rc /= 8*n_tasks) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) - if(rc /= 4*n_tasks) stop "pullc" - if(m_task < 0) then + rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0) + if(rc /= 8*n_tasks) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pullc" + else if(m_task==0) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" + + + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pull4" + else rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) if(rc /= 8*N_det*N_states*2) stop "pullc" - else - rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8, 0) - if(rc /= 8) stop "pullc" + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "pull4" + end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH From a521f0cb82bd333ce9879b4dfd87ae1abcf2baaa Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Sep 2018 16:08:02 +0200 Subject: [PATCH 22/35] tasks get by batches of Nproc --- plugins/dress_zmq/run_dress_slave.irp.f | 52 +++++++++++++------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 95db9d92..f5398025 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -11,7 +11,7 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: rc, i, subset, i_generator integer :: worker_id, ctask, ltask - character*(5120) :: task + character*(512) :: task(Nproc) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket @@ -34,13 +34,13 @@ subroutine run_dress_slave(thread,iproce,energy) integer :: will_send, task_id, purge_task_id, ntask_buf integer, allocatable :: task_buf(:) integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) - integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending + integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending, getting_task double precision :: fac double precision :: ending(1) integer, external :: zmq_get_dvector ! double precision, external :: omp_get_wtime double precision :: time, time0 - + integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) @@ -51,9 +51,10 @@ double precision :: time, time0 f = 0 delta_det = 0d0 - task(:) = CHAR(0) + task = CHAR(0) call omp_init_lock(sending) + call omp_init_lock(getting_task) do i=0,dress_N_cp+1 call omp_init_lock(lck_sto(i)) end do @@ -70,8 +71,9 @@ double precision :: time, time0 purge_task_id = 0 hij = E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL ending(1) = dble(dress_N_cp+1) + ntask_tbd = 0 !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(breve_delta_m, task, task_id) & + !$OMP PRIVATE(breve_delta_m, task_id) & !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) & @@ -90,16 +92,27 @@ double precision :: time, time0 ntask_buf = 0 do while(cp_done > cp_sent .or. m /= dress_N_cp+1) - call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - task = task//" 0" + call omp_set_lock(getting_task) + if(ntask_tbd == 0) then + ntask_tbd = size(task_tbd) + call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd) + !task = task//" 0" + end if + + task_id = task_tbd(1) if(task_id /= 0) then - read (task,*) subset, i_generator + read (task(1),*) subset, i_generator + do i=1,size(task_tbd)-1 + task_tbd(i) = task_tbd(i+1) + task(i) = task(i+1) + end do m = dress_P(i_generator) + ntask_tbd -= 1 else m = dress_N_cp + 1 i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) end if - + call omp_unset_lock(getting_task) will_send = 0 !$OMP CRITICAL @@ -180,7 +193,7 @@ double precision :: time, time0 call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) ntask_buf = 0 end if - !$OMP SINGLE + !$OMP SINGLE if(purge_task_id /= 0) then do while(int(ending(1)) == dress_N_cp+1) call sleep(1) @@ -240,24 +253,22 @@ subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, b if(rc /= 4) stop "push1" rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push4" - rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) if(rc /= 8*n_tasks) stop "push5" - rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, 0) if(rc /= 4*n_tasks) stop "push6" else if(m_task == 0) then rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) if(rc /= 4) stop "push1" - rc = f77_zmq_send( zmq_socket_push, task_id, 4*n_tasks, 0) if(rc /= 4*n_tasks) stop "push2" else + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, ZMQ_SNDMORE) if(rc /= 8*N_det*N_states*2) stop "push6" rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) if(rc /= 4) stop "push6" - end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH @@ -285,31 +296,24 @@ subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, b if(m_task > 0) then rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) if(rc /= 4) stop "pullc" - - - rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) if(rc /= 4) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0) if(rc /= 8*n_tasks) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) if(rc /= 4*n_tasks) stop "pullc" else if(m_task==0) then rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) if(rc /= 4) stop "pullc" - - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4*n_tasks, 0) if(rc /= 4*n_tasks) stop "pull4" else + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) if(rc /= 8*N_det*N_states*2) stop "pullc" - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "pull4" - end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH From dda0dc34df82a59fac3c3abb2b14943fdb23c0f8 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 3 Sep 2018 17:48:04 +0200 Subject: [PATCH 23/35] corrected pt2_find_sample --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 29 +++++++++----------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 02058dfb..d5291a80 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -240,28 +240,22 @@ end subroutine integer function pt2_find_sample(v, w) implicit none double precision, intent(in) :: v, w(0:N_det_generators) - integer :: i,l,h - integer, parameter :: block=64 + integer :: i,l,r l = 0 - h = N_det_generators + r = N_det_generators - do while(h-l >= block) - i = ishft(h+l,-1) - if(w(i+1) > v) then - h = i-1 + do while(r-l > 1) + i = (r+l) / 2 + if(w(i) < v) then + l = i else - l = i+1 + r = i end if end do - !DIR$ LOOP COUNT (64) - do pt2_find_sample=l,h - if(w(pt2_find_sample) >= v) then - exit - end if - end do -end function + pt2_find_sample = r +end function BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] @@ -284,8 +278,11 @@ end function d(i) = .true. pt2_J(i) = i end do - + call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) call RANDOM_NUMBER(pt2_u) + call RANDOM_NUMBER(pt2_u) + + U = 0 From 9a0f900d8c57bcbca36a09c11616e234528e1643 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 14:09:51 +0200 Subject: [PATCH 24/35] tests if teeth can be built --- plugins/dress_zmq/dress_stoch_routines.irp.f | 44 ++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index fc8f031d..640d5dbc 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -8,20 +8,58 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] &BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] implicit none + logical, external :: testTeethBuilding pt2_F(:) = 1 !pt2_F(:N_det_generators/1000*0+50) = 1 pt2_n_tasks_max = 16 ! N_det_generators/100 + 1 - if(N_det_generators < 256) then + if(N_det_generators < 1024) then pt2_minDetInFirstTeeth = 1 pt2_N_teeth = 1 else - pt2_minDetInFirstTeeth = min(5, N_det_generators) - pt2_N_teeth = 16 + do pt2_N_teeth=32,1,-1 + pt2_minDetInFirstTeeth = min(5, N_det_generators) + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do end if END_PROVIDER +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,dress_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + + n0 = 0 + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + testTeethBuilding = .false. + return + end if + end do + stop "exited testTeethBuilding" +end function BEGIN_PROVIDER[ integer, dress_N_cp_max ] dress_N_cp_max = 32 From 34d9fa01657ed5d159a1c62f8f24cdb266d48153 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 14:27:10 +0200 Subject: [PATCH 25/35] potential numerical precision bug --- plugins/dress_zmq/dress_stoch_routines.irp.f | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 640d5dbc..5be8f9df 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -37,11 +37,11 @@ logical function testTeethBuilding(minF, N) allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) tilde_cW(0) = 0d0 - do i=1,N_det_generators tilde_w(i) = psi_coef_generators(i,dress_stoch_istate)**2 tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) enddo + tilde_cW(N_det_generators) = 1d0 n0 = 0 do @@ -130,7 +130,7 @@ END_PROVIDER dress_M_m(i) = U * (((i*i)+i)/2) end do - dress_M_m(1) = 1 + dress_M_m(1) = min(dress_M_m(1), 2) dress_M_m(dress_N_cp_max) = N_det_generators+1 do i=1,N_j @@ -556,6 +556,7 @@ end function tilde_w(i) = psi_coef_generators(i,dress_stoch_istate)**2 tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) enddo + tilde_cW(N_det_generators) = 1d0 pt2_n_0(1) = 0 do From 0d91b9310a7d580e8bc97a6a0af7a62a8506f8c0 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 14:35:04 +0200 Subject: [PATCH 26/35] timestamp of first pull --- plugins/dress_zmq/dress_stoch_routines.irp.f | 7 ++++++- plugins/dress_zmq/run_dress_slave.irp.f | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 5be8f9df..6ef3b298 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -428,7 +428,8 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, c = 0 S(:) = 0d0 S2(:) = 0d0 - time0 = omp_get_wtime() + time = omp_get_wtime() + time0 = -1d0 ! omp_get_wtime() more = 1 do while (.not. found) @@ -464,6 +465,10 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, else do call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + if(time0 == -1d0) then + print *, "first pull", omp_get_wtime()-time + time0 = omp_get_wtime() + end if if(m_task == 0) then if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then stop 'Unable to delete tasks' diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index f5398025..ea1d2709 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -87,9 +87,13 @@ double precision :: time, time0 stop "WORKER -1" end if iproc = omp_get_thread_num()+1 - allocate(breve_delta_m(N_states,N_det,2)) + allocate(breve_delta_m(N_states,N_det,2)) allocate(task_buf(pt2_n_tasks_max)) ntask_buf = 0 + + if(iproc==1) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + end if do while(cp_done > cp_sent .or. m /= dress_N_cp+1) call omp_set_lock(getting_task) From 03b8f353bd94b837c02e570a8d3fb68689003135 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 14:41:46 +0200 Subject: [PATCH 27/35] teeth building check for pt2_stoch --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 51 ++++++++++++++++++-- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index d5291a80..16ca17b6 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -12,20 +12,61 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] &BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] implicit none + logical, external :: testTeethBuilding pt2_F(:) = 1 - pt2_F(:N_det_generators/100 + 1) = 1 - pt2_n_tasks_max = N_det_generators/100 + 1 + !pt2_F(:N_det_generators/1000*0+50) = 1 + pt2_n_tasks_max = 16 ! N_det_generators/100 + 1 - if(N_det_generators < 256) then + if(N_det_generators < 1024) then pt2_minDetInFirstTeeth = 1 pt2_N_teeth = 1 else - pt2_minDetInFirstTeeth = 5 - pt2_N_teeth = 16 + do pt2_N_teeth=32,1,-1 + pt2_minDetInFirstTeeth = min(5, N_det_generators) + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do end if END_PROVIDER +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(N_det_generators) = 1d0 + + n0 = 0 + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + testTeethBuilding = .false. + return + end if + end do + stop "exited testTeethBuilding" +end function + + + subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) use f77_zmq use selection_types From 8529a0f3f66d2f1ba9d717b0de60b268a4a821a7 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 14:57:19 +0200 Subject: [PATCH 28/35] reduced prints in pt2_stoch --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 2 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 16ca17b6..4a8be2e7 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -258,7 +258,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2 pt2(pt2_stoch_istate) = E0-E+avg error(pt2_stoch_istate) = eqt time = omp_get_wtime() - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' + if(mod(c,10)==1 .or. n==N_det_generators) print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' end if n += 1 else if(more == 0) then diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 6ef3b298..30507f39 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -62,7 +62,7 @@ logical function testTeethBuilding(minF, N) end function BEGIN_PROVIDER[ integer, dress_N_cp_max ] - dress_N_cp_max = 32 + dress_N_cp_max = 64 END_PROVIDER BEGIN_PROVIDER[integer, pt2_J, (N_det_generators)] From 093e3fd021ca5ed73fe3be530fa18e79ceb0085e Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 16:13:00 +0200 Subject: [PATCH 29/35] removed ungodly hack --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 1 + plugins/dress_zmq/dress_general.irp.f | 6 +++--- plugins/dress_zmq/dressing.irp.f | 11 ++--------- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 4a8be2e7..def4133d 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -26,6 +26,7 @@ END_PROVIDER if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit end do end if + print *, pt2_N_teeth END_PROVIDER diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index 9c728b6a..a3af5f92 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -30,7 +30,7 @@ subroutine run_dressing(N_st,energy) iteration = 0 do while (delta_E > thresh_dress) N_det_delta_ij = N_det - touch N_det_delta_ij + touch N_det_delta_ij iteration += 1 print *, '===============================================' print *, 'Iteration', iteration, '/', n_it_dress_max @@ -38,12 +38,12 @@ subroutine run_dressing(N_st,energy) print *, '' E_old = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) !print *, "DELTA IJ", delta_ij(1,1,1) - if(.true.) dummy = delta_ij_tmp(1,1,1) + !if(.true.) provide delta_ij_tmp if(.true.) call delta_ij_done() do i=1,N_st if(.true.) call write_double(6,ci_energy_dressed(i),"Energy") enddo - if(.true.) call diagonalize_ci_dressed + call diagonalize_ci_dressed E_new = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) delta_E = (E_new - E_old)/dble(N_states) diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index efd73874..88b93a23 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -89,8 +89,8 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] !if(N_det_delta_ij /= N_det) return - cnt += 1 - if(mod(cnt,2) == 0) return + !cnt += 1 + !if(mod(cnt,2) == 0) return if(.true.) then allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) @@ -98,13 +98,6 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] delta_ij_tmp = 0d0 E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion - !threshold_selectors = 1.d0 - !:threshold_generators = 1d0 -! if(errr /= 0d0) then -! errr = errr / 2d0 -! else -! errr = 1d-4 -! end if relative_error = 1.d-5 call write_double(6,relative_error,"Convergence of the stochastic algorithm") From 873035e01635a1a0576cc0e8bf834beb3c60fc80 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Sep 2018 17:31:45 +0200 Subject: [PATCH 30/35] Squashed commit of the following: commit 4b9c435dce0f3b3078d573e66fd32b40fca26497 Merge: 74e559c8 093e3fd0 Author: Anthony Scemama Date: Tue Sep 4 16:58:51 2018 +0200 Merge branch 'thesis' of git://github.com/garniron/quantum_package into garniron-thesis commit 093e3fd021ca5ed73fe3be530fa18e79ceb0085e Author: Yann Garniron Date: Tue Sep 4 16:13:00 2018 +0200 removed ungodly hack commit 8529a0f3f66d2f1ba9d717b0de60b268a4a821a7 Author: Yann Garniron Date: Tue Sep 4 14:57:19 2018 +0200 reduced prints in pt2_stoch commit 03b8f353bd94b837c02e570a8d3fb68689003135 Author: Yann Garniron Date: Tue Sep 4 14:41:46 2018 +0200 teeth building check for pt2_stoch commit 0d91b9310a7d580e8bc97a6a0af7a62a8506f8c0 Author: Yann Garniron Date: Tue Sep 4 14:35:04 2018 +0200 timestamp of first pull commit 34d9fa01657ed5d159a1c62f8f24cdb266d48153 Author: Yann Garniron Date: Tue Sep 4 14:27:10 2018 +0200 potential numerical precision bug commit 9a0f900d8c57bcbca36a09c11616e234528e1643 Author: Yann Garniron Date: Tue Sep 4 14:09:51 2018 +0200 tests if teeth can be built commit dda0dc34df82a59fac3c3abb2b14943fdb23c0f8 Author: Yann Garniron Date: Mon Sep 3 17:48:04 2018 +0200 corrected pt2_find_sample commit a521f0cb82bd333ce9879b4dfd87ae1abcf2baaa Author: Yann Garniron Date: Mon Sep 3 16:08:02 2018 +0200 tasks get by batches of Nproc commit 997a5a1265951394753d3154a6136ca40177ba8d Author: Yann Garniron Date: Mon Sep 3 14:18:04 2018 +0200 buffered task_id send commit 99ea7948e0d58023f7ce02386014c8c3d493deb4 Author: Yann Garniron Date: Mon Sep 3 12:29:12 2018 +0200 unbalanced fragmentation commit abb3b7e08bebdcaf84208ebbe404441d3571b1f9 Author: Yann Garniron Date: Sun Sep 2 17:18:44 2018 +0200 overflow of pt2_J commit 8df49f394b588ca304ce2ee7d8f8095455573784 Author: Yann Garniron Date: Sun Sep 2 15:58:48 2018 +0200 removed useless computation of intermediate checkpoints commit 4ba5b79eb391819a4966c7cd19b2e2fdf2ed5129 Author: Yann Garniron Date: Sun Sep 2 15:50:14 2018 +0200 dressing only sent for chosen checkpoint commit a4a6a69459321b1b210c2f51ffbef4f792943919 Author: Yann Garniron Date: Sat Sep 1 17:01:56 2018 +0200 cumulative dot_F commit 6a7f04cb79ec921d5829a011aa8d8b2ea224760d Author: Yann Garniron Date: Sat Sep 1 16:58:07 2018 +0200 simpler purge commit 168ca2f2e29b902ab37afc58672b678998f0212a Author: Yann Garniron Date: Fri Aug 31 21:07:01 2018 +0200 task list optimized commit de4a0d0caf0e1d441e0244a9ba7c65ae535d58b1 Author: Yann Garniron Date: Fri Aug 31 18:57:03 2018 +0200 removed print commit fee31d4e3e70c2edf1aeda202d3f067eddc17532 Author: Yann Garniron Date: Fri Aug 31 18:56:23 2018 +0200 dress fragmentation commit 02893a419de07c922708fe8ee2b828d9ebfd2e9b Author: Yann Garniron Date: Fri Aug 31 15:52:16 2018 +0200 bug in blocked search - replaced with thesis version commit bb6e073cf10039f8a333d120b88dda109169528b Author: Yann Garniron Date: Thu Aug 30 21:24:45 2018 +0200 ungodly hack to prevent double providing commit 0609e8c627ea7bf88d617e45dffeb64c9aea37b6 Author: Yann Garniron Date: Thu Aug 30 20:52:05 2018 +0200 debugging commit a254fdd7cffd8348846c910f240965104c969a09 Author: Yann Garniron Date: Thu Aug 30 15:24:07 2018 +0200 parallel bug commit 2a6c1941d45be78083ec8c1dccb453f51a12c338 Author: Yann Garniron Date: Thu Aug 30 11:43:11 2018 +0200 corrected when relative_error=0d0 commit bac039bdf1bd00673c0795b1cb23adb593e9d426 Author: Yann Garniron Date: Thu Aug 30 10:58:17 2018 +0200 relative error 1d-5 commit aae9d203ecbf8d53accc6bc35c2ab56aeeae78a6 Author: Yann Garniron Date: Thu Aug 30 10:07:02 2018 +0200 potential fragmentation bug commit ad69f39f99d0b0dd73f556fb13d1d55337c5b066 Author: Yann Garniron Date: Wed Aug 29 20:54:58 2018 +0200 dress_zmq re-implemented commit d78f64732a5493d7f10c7c80b564005e63a133fc Author: Yann Garniron Date: Wed Aug 29 11:30:19 2018 +0200 pt2_stoch re-implemented commit 4b9b54e19ac7459589681e5ff7aa358dde9f5fd5 Author: Yann Garniron Date: Tue Aug 28 10:24:38 2018 +0200 removed test for phase_mask_bit commit 3abccca5e35948e54a659cacccea42fbfcf4c296 Author: Yann Garniron Date: Fri Aug 3 23:44:05 2018 +0200 phasemask_bit --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 707 +++++------- plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 20 +- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 14 +- plugins/dress_zmq/alpha_factory.irp.f | 15 +- plugins/dress_zmq/dress_stoch_routines.irp.f | 1004 ++++++++--------- plugins/dress_zmq/dressing.irp.f | 37 +- plugins/dress_zmq/run_dress_slave.irp.f | 527 ++++----- plugins/shiftedbk/shifted_bk_routines.irp.f | 3 +- src/Determinants/slater_rules.irp.f | 134 ++- 10 files changed, 1115 insertions(+), 1348 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index e6e2418f..b6952364 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,8 +1,73 @@ -BEGIN_PROVIDER [ integer, fragment_first ] - implicit none - fragment_first = first_det_of_teeth(1) +BEGIN_PROVIDER [ integer, pt2_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic PT2 + END_DOC + pt2_stoch_istate = 1 END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] + implicit none + logical, external :: testTeethBuilding + pt2_F(:) = 1 + !pt2_F(:N_det_generators/1000*0+50) = 1 + pt2_n_tasks_max = 16 ! N_det_generators/100 + 1 + + if(N_det_generators < 1024) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + do pt2_N_teeth=32,1,-1 + pt2_minDetInFirstTeeth = min(5, N_det_generators) + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do + end if + print *, pt2_N_teeth +END_PROVIDER + + +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(N_det_generators) = 1d0 + + n0 = 0 + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + testTeethBuilding = .false. + return + end if + end do + stop "exited testTeethBuilding" +end function + + + subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) use f77_zmq use selection_types @@ -11,22 +76,15 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) character(len=64000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull - type(selection_buffer) :: b integer, external :: omp_get_thread_num double precision, intent(in) :: relative_error, absolute_error, E(N_states) double precision, intent(out) :: pt2(N_states),error(N_states) - double precision, allocatable :: pt2_detail(:,:), comb(:) - logical, allocatable :: computed(:) - integer, allocatable :: tbc(:) - integer :: i, j, k, Ncomb, i_generator_end - integer, external :: pt2_find + integer :: i, j, k - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, external :: omp_get_wtime double precision :: state_average_weight_save(N_states), w(N_states) - double precision :: time integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket if (N_det < max(10,N_states)) then @@ -40,26 +98,9 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) state_average_weight(:) = 0.d0 state_average_weight(pt2_stoch_istate) = 1.d0 TOUCH state_average_weight pt2_stoch_istate - - allocate(pt2_detail(N_states,N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) - sumabove = 0d0 - sum2above = 0d0 - Nabove = 0d0 - - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight psi_selectors - - computed = .false. - - tbc(0) = first_det_of_comb - 1 - do i=1, tbc(0) - tbc(i) = i - computed(i) = .true. - end do - - Ncomb=size(comb) - call get_carlo_workbatch(computed, comb, Ncomb, tbc) - pt2_detail = 0d0 + provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors + print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' print *, '========== ================= ================= =================' @@ -97,41 +138,17 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) endif - call create_selection_buffer(1, 1*2, b) - - integer :: ipos - ipos=1 - integer, external :: add_task_to_taskserver - do i=1,tbc(0) - if(tbc(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 + + do i=1,N_det_generators + do j=1,pt2_F(i) !!!!!!!!!!!! + write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + stop 'Unable to add task to task server' endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if + end do end do - if (ipos > 1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - endif integer, external :: zmq_set_running if (zmq_set_running(zmq_to_qp_run_socket) == -1) then @@ -153,18 +170,16 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call pt2_collector(zmq_socket_pull,E(pt2_stoch_istate), b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, w, error) + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, absolute_error, w, error) pt2(pt2_stoch_istate) = w(pt2_stoch_istate) else call pt2_slave_inproc(i) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - call delete_selection_buffer(b) print *, '========== ================= ================= =================' - deallocate(pt2_detail, comb, computed, tbc) enddo FREE pt2_stoch_istate state_average_weight(:) = state_average_weight_save(:) @@ -177,34 +192,6 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) end subroutine -subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) - integer, intent(in) :: tbc(0:size_tbc), Ncomb - logical, intent(in) :: computed(N_det_generators) - double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states,N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - integer :: i, dets(comb_teeth) - double precision :: myVal, myVal2 - - mainLoop : do i=1,Ncomb - call get_comb(comb(i), dets, comb_teeth) - do j=1,comb_teeth - if(.not.(computed(dets(j)))) then - exit mainLoop - end if - end do - - myVal = 0d0 - myVal2 = 0d0 - do j=comb_teeth,1,-1 - myVal += pt2_detail(pt2_stoch_istate,dets(j)) * pt2_weight_inv(dets(j)) * comb_step - sumabove(j) += myVal - sum2above(j) += myVal*myVal - Nabove(j) += 1 - end do - end do mainLoop -end subroutine - - subroutine pt2_slave_inproc(i) implicit none integer, intent(in) :: i @@ -212,411 +199,249 @@ subroutine pt2_slave_inproc(i) call run_pt2_slave(1,i,pt2_e0_denominator) end -subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, pt2,error) + +subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2, error) use f77_zmq use selection_types use bitmasks implicit none - integer, intent(in) :: Ncomb integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) - double precision, intent(in) :: comb(Ncomb), relative_error, absolute_error, E - logical, intent(inout) :: computed(N_det_generators) - integer, intent(in) :: tbc(0:size_tbc) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - double precision, intent(out) :: pt2(N_states),error(N_states) + double precision, intent(in) :: relative_error, absolute_error, E + double precision, intent(out) :: pt2(N_states), error(N_states) - type(selection_buffer), intent(inout) :: b - double precision, allocatable :: pt2_mwen(:,:) + double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, external :: zmq_delete_tasks + integer, external :: pt2_find_sample - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, n_tasks - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) + integer :: more, n, i, p, c, t, n_tasks, U integer, allocatable :: task_id(:) integer, allocatable :: index(:) - double precision :: time0 - double precision :: time, timeLast, Nabove_old + double precision, external :: omp_get_wtime - integer :: tooth, firstTBDcomb, orgTBDcomb, n_tasks_max - integer, allocatable :: parts_to_get(:) - logical, allocatable :: actually_computed(:) - double precision :: eqt - character*(512) :: task - Nabove_old = -1.d0 - n_tasks_max = N_det_generators/100+1 + double precision :: v, x, avg, eqt, E0 + double precision :: time, time0 - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & - pt2_mwen(N_states, n_tasks_max) ) - - pt2_mwen(1:N_states, 1:n_tasks_max) = 0.d0 - do i=1,N_det_generators - actually_computed(i) = computed(i) - enddo - - parts_to_get(:) = 1 - if(fragment_first > 0) then - do i=1,fragment_first - parts_to_get(i) = fragment_count - enddo - endif - - do i=1,tbc(0) - actually_computed(tbc(i)) = .false. - end do - - orgTBDcomb = int(Nabove(1)) - firstTBDcomb = 1 + integer, allocatable :: f(:) + logical, allocatable :: d(:) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(n_tasks_max), index(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) + allocate(d(N_det_generators+1)) + allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + + S(:) = 0d0 + S2(:) = 0d0 + n = 1 + t = 0 + U = 0 + eI(:,:) = 0d0 + f(:) = pt2_F(:) + d(:) = .false. + n_tasks = 0 + E0 = E more = 1 - call wall_time(time0) - timeLast = time0 + time0 = omp_get_wtime() - call get_first_tooth(actually_computed, tooth) - Nabove_old = Nabove(tooth) - - logical :: loop - loop = .True. - pullLoop : do while (loop) - - call pull_pt2_results(zmq_socket_pull, index, pt2_mwen, task_id, n_tasks) - do i=1,n_tasks - pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i) - parts_to_get(index(i)) -= 1 - if(parts_to_get(index(i)) < 0) then - print *, i, index(i), parts_to_get(index(i)) - print *, parts_to_get - stop "PARTS ??" - end if - if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. - enddo - - integer, external :: zmq_delete_tasks - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then - cycle - endif - if (more == 0) then - loop = .False. - endif - - call wall_time(time) - - - if(time - timeLast > 5d0 .or. (.not.loop)) then - timeLast = time - do i=1, first_det_of_teeth(1)-1 - if(.not.(actually_computed(i))) then - cycle pullLoop + do while (n <= N_det_generators) + if(f(pt2_J(n)) == 0) then + d(pt2_J(n)) = .true. + do while(d(U+1)) + U += 1 + end do + do while(t <= pt2_N_teeth) + if(U >= pt2_n_0(t+1)) then + t=t+1 + E0 = E + do i=pt2_n_0(t),1,-1 + E0 += eI(pt2_stoch_istate, i) + end do + else + exit end if end do - - integer, external :: zmq_abort - double precision :: E0, avg, prop - - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - call get_first_tooth(actually_computed, tooth) - - if (firstTBDcomb > Ncomb) then - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (1)' - endif - endif -! exit pullLoop - endif - - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - if (tooth <= comb_teeth) then - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1.d0) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - else - eqt = 0.d0 - tooth=comb_teeth - endif - call wall_time(time) - if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 10.d0) then - ! Termination - pt2(pt2_stoch_istate) = avg + + c = pt2_R(n) + if(c /= 0) then + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = pt2_find_sample(v, pt2_cW) + x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do + avg = S(t) / dble(c) + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + pt2(pt2_stoch_istate) = E0-E+avg error(pt2_stoch_istate) = eqt - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' - endif - endif - else - if ( (Nabove(tooth) > 2.d0) .and. (Nabove(tooth) > Nabove_old) ) then - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - Nabove_old = Nabove(tooth) - endif + time = omp_get_wtime() + if(mod(c,10)==1 .or. n==N_det_generators) print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' + end if + n += 1 + else if(more == 0) then + exit + else + call pull_pt2_results(zmq_socket_pull, index, eI_task, task_id, n_tasks) + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' endif + do i=1,n_tasks + eI(:, index(i)) += eI_task(:, i) + f(index(i)) -= 1 + end do end if - end do pullLoop - - if(tooth == comb_teeth+1) then - pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:)) - error(pt2_stoch_istate) = 0d0 - else - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) - error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - end if - + end do call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call sort_selection_buffer(b) end subroutine -integer function pt2_find(v, w, sze, imin, imax) + +integer function pt2_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) - integer :: i,l,h - integer, parameter :: block=64 + double precision, intent(in) :: v, w(0:N_det_generators) + integer :: i,l,r - l = imin - h = imax-1 + l = 0 + r = N_det_generators - do while(h-l >= block) - i = ishft(h+l,-1) - if(w(i+1) > v) then - h = i-1 + do while(r-l > 1) + i = (r+l) / 2 + if(w(i) < v) then + l = i else - l = i+1 - end if - end do - !DIR$ LOOP COUNT (64) - do pt2_find=l,h - if(w(pt2_find) >= v) then - exit + r = i end if end do + + pt2_find_sample = r end function -BEGIN_PROVIDER [ integer, comb_teeth ] + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] implicit none - BEGIN_DOC -! Number of teeth in the comb - END_DOC - comb_teeth = min(1+N_det/10,100) + integer :: N_c, N_j, U, t, i + double precision :: v + logical, allocatable :: d(:) + integer, external :: pt2_find_sample + + allocate(d(N_det_generators)) + + pt2_R(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + do i=1,N_j + d(i) = .true. + pt2_J(i) = i + end do + call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) + call RANDOM_NUMBER(pt2_u) + call RANDOM_NUMBER(pt2_u) + + + + U = 0 + + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = pt2_find_sample(v, pt2_cW) + if(.not. d(i)) then + N_j += 1 + pt2_J(N_j) = i + d(i) = .true. + end if + end do + + pt2_R(N_j) = N_c + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J(N_j) = U + d(U) = .true. + exit; + end if + end do + enddo + if(N_det_generators > 1) then + pt2_R(N_det_generators-1) = 0 + pt2_R(N_det_generators) = N_c + end if END_PROVIDER - -subroutine get_first_tooth(computed, first_teeth) + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - logical, intent(in) :: computed(N_det_generators) - integer, intent(out) :: first_teeth - integer :: i, first_det + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: pt2_find_sample - first_det = N_det_generators+1+1 - first_teeth = 1 - do i=first_det_of_comb, N_det_generators - if(.not.(computed(i))) then - first_det = i + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - end do - - do i=comb_teeth+1, 1, -1 - if(first_det_of_teeth(i) < first_det) then - first_teeth = i - exit + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" end if end do - -end subroutine - - -BEGIN_PROVIDER [ integer*8, size_tbc ] - implicit none - BEGIN_DOC -! Size of the tbc array - END_DOC - size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first -END_PROVIDER - -subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) - implicit none - integer, intent(inout) :: Ncomb - double precision, intent(out) :: comb(Ncomb) - integer, intent(inout) :: tbc(0:size_tbc) - logical, intent(inout) :: computed(N_det_generators) - integer :: i, j, last_full, dets(comb_teeth) - integer :: icount, n - integer :: k, l - l=first_det_of_comb - call RANDOM_NUMBER(comb) - do i=1,size(comb) - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) - Ncomb = i - if (tbc(0) == N_det_generators) return - do while (computed(l)) - l=l+1 - enddo - k=tbc(0)+1 - tbc(k) = l - computed(l) = .True. - tbc(0) = k - enddo - -end subroutine - - - -subroutine get_comb(stato, dets, ct) - implicit none - integer, intent(in) :: ct - double precision, intent(in) :: stato - integer, intent(out) :: dets(ct) - double precision :: curs - integer :: j - integer, external :: pt2_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = pt2_find_sample(r, tilde_cW) end do -end subroutine - - -subroutine add_comb(comb, computed, tbc, stbc, ct) - implicit none - integer*8, intent(in) :: stbc - integer, intent(in) :: ct - double precision, intent(in) :: comb - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(0:stbc) - integer :: i, k, l, dets(ct) - - !DIR$ FORCEINLINE - call get_comb(comb, dets, ct) - - k=tbc(0)+1 - do i = 1, ct - l = dets(i) - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - tbc(0) = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, pt2_stoch_istate ] - implicit none - BEGIN_DOC - ! State for stochatsic PT2 - END_DOC - pt2_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: pt2_find - - pt2_weight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 - pt2_cweight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 + pt2_cW(0) = 0d0 do i=1,N_det_generators - pt2_weight(i) = psi_coef_generators(i,pt2_stoch_istate)**2 - enddo - - ! Important to loop backwards for numerical precision - pt2_cweight(N_det_generators) = pt2_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - pt2_cweight(i) = pt2_weight(i) + pt2_cweight(i+1) + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do - - do i=1,N_det_generators - pt2_weight(i) = pt2_weight(i) / pt2_cweight(1) - pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(1) - enddo - - do i=1,N_det_generators-1 - pt2_cweight(i) = 1.d0 - pt2_cweight(i+1) - end do - pt2_cweight(N_det_generators) = 1.d0 - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - first_det_of_comb = 1 - do i=1,N_det_generators - if(pt2_weight(i)/norm_left < .5d0*comb_step) then - first_det_of_comb = i - exit - end if - norm_left -= pt2_weight(i) - end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - stato -= comb_step - end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ] - implicit none - BEGIN_DOC -! Inverse of pt2_weight array - END_DOC - integer :: i - do i=1,N_det_generators - pt2_weight_inv(i) = 1.d0/pt2_weight(i) - enddo - + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 6be25846..6d8b6a8c 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -22,12 +22,11 @@ subroutine run_pt2_slave(thread,iproc,energy) logical :: done double precision,allocatable :: pt2(:,:) - integer :: n_tasks, k, n_tasks_max + integer :: n_tasks, k integer, allocatable :: i_generator(:), subset(:) - n_tasks_max = N_det_generators/100+1 - allocate(task_id(n_tasks_max), task(n_tasks_max)) - allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -47,7 +46,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do while (.not.done) n_tasks = max(1,n_tasks) - n_tasks = min(n_tasks,n_tasks_max) + n_tasks = min(n_tasks,pt2_n_tasks_max) integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then @@ -66,7 +65,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 - call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k)) + call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k))) enddo call wall_time(time1) @@ -201,12 +200,5 @@ IRP_ENDIF end subroutine - -BEGIN_PROVIDER [ double precision, pt2_workload, (N_det_generators) ] - integer :: i - do i=1,N_det_generators - pt2_workload(i) = dfloat(N_det_generators - i + 1)**2 - end do - pt2_workload = pt2_workload / sum(pt2_workload) -END_PROVIDER + diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 39f6c01c..af5e4de0 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -177,7 +177,7 @@ subroutine run_selection_slave_old(thread,iproc,energy) else ASSERT (N == buf%N) end if - call select_connected(i_generator,energy,pt2,buf,subset) + call select_connected(i_generator,energy,pt2,buf,subset,fragment_count) endif integer, external :: task_done_to_taskserver diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 277e6be5..de6bd9f5 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -46,11 +46,11 @@ subroutine get_mask_phase(det, phasemask) end subroutine -subroutine select_connected(i_generator,E0,pt2,b,subset) +subroutine select_connected(i_generator,E0,pt2,b,subset,csubset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset type(selection_buffer), intent(inout) :: b double precision, intent(inout) :: pt2(N_states) integer :: k,l @@ -71,7 +71,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset) + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset,csubset) enddo deallocate(fock_diag_tmp) end subroutine @@ -266,7 +266,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) end -subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -274,7 +274,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d ! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_tot_num) double precision, intent(in) :: E0(N_states) @@ -298,7 +298,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer(bit_kind), allocatable:: preinteresting_det(:,:,:) allocate (preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count + !PROVIDE fragment_count monoAdo = .true. monoBdo = .true. @@ -571,7 +571,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index f590f5d1..e5b670fb 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -2,10 +2,10 @@ use bitmasks -subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) +subroutine alpha_callback(delta_ij_loc, i_generator, subset, csubset, iproc) use bitmasks implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) integer, intent(in) :: iproc @@ -15,7 +15,7 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc) do l=1,N_generators_bitmask - call generate_singles_and_doubles(delta_ij_loc, i_generator,l,subset,iproc) + call generate_singles_and_doubles(delta_ij_loc,i_generator,l,subset,csubset,iproc) enddo end subroutine @@ -34,7 +34,7 @@ BEGIN_PROVIDER [ integer, psi_from_sorted_gen, (N_det) ] END_PROVIDER -subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, iproc) +subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, csubset, iproc) use bitmasks implicit none BEGIN_DOC @@ -42,7 +42,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index END_DOC double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2) - integer, intent(in) :: i_generator, subset, bitmask_index + integer, intent(in) :: i_generator, subset, csubset, bitmask_index integer, intent(in) :: iproc @@ -69,8 +69,8 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate(abuf(0:N_det*6), labuf(0:N_det)) allocate(preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count + maskInd = -1 monoAdo = .true. monoBdo = .true. @@ -193,7 +193,6 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index allocate(counted(mo_tot_num, mo_tot_num), countedOrb(mo_tot_num, 2)) allocate (indexes(0:mo_tot_num, 0:mo_tot_num)) allocate (indexes_end(0:mo_tot_num, 0:mo_tot_num)) - maskInd = -1 integer :: nb_count do s1=1,2 do i1=N_holes(s1),1,-1 ! Generate low excitations first @@ -345,7 +344,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 6b7bf396..136ecb7f 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -1,17 +1,205 @@ -BEGIN_PROVIDER [ integer, fragment_first ] +BEGIN_PROVIDER [ integer, dress_stoch_istate ] implicit none - fragment_first = first_det_of_teeth(1) + dress_stoch_istate = 1 +END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] + implicit none + logical, external :: testTeethBuilding + pt2_F(:) = 1 + !pt2_F(:N_det_generators/1000*0+50) = 1 + pt2_n_tasks_max = 16 ! N_det_generators/100 + 1 + + if(N_det_generators < 1024) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + do pt2_N_teeth=32,1,-1 + pt2_minDetInFirstTeeth = min(5, N_det_generators) + if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit + end do + end if END_PROVIDER -subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) +logical function testTeethBuilding(minF, N) + implicit none + integer, intent(in) :: minF, N + integer :: n0, i + double precision :: u0, Wt, r + + double precision, allocatable :: tilde_w(:), tilde_cW(:) + integer, external :: dress_find_sample + + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,dress_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + tilde_cW(N_det_generators) = 1d0 + + n0 = 0 + do + u0 = tilde_cW(n0) + r = tilde_cW(n0 + minF) + Wt = (1d0 - u0) / dble(N) + if(Wt >= r - u0) then + testTeethBuilding = .true. + return + end if + n0 += 1 + if(N_det_generators - n0 < minF * N) then + testTeethBuilding = .false. + return + end if + end do + stop "exited testTeethBuilding" +end function + +BEGIN_PROVIDER[ integer, dress_N_cp_max ] + dress_N_cp_max = 64 +END_PROVIDER + + BEGIN_PROVIDER[integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER [integer, dress_R1, (0:N_det_generators) ] + implicit none + integer :: m,j + integer :: l,nmov + integer, allocatable :: iorder(:) + allocate(iorder(N_det_generators)) + + pt2_J = pt2_J_ + dress_R1 = dress_R1_ + + do m=1,dress_N_cp + nmov = 0 + l=dress_R1(m-1)+1 + do j=l, dress_R1(m) + if(dress_M_mi(m, pt2_J(j)) == 0 .and. pt2_J(j) > dress_dot_n_0(m)) then + pt2_J(j) += N_det_generators + nmov += 1 + end if + end do + if(dress_R1(m)-dress_R1(m-1) > 0) then + call isort(pt2_J(l), iorder, dress_R1(m)-dress_R1(m-1)) + end if + dress_R1(m) -= nmov + do j=dress_R1(m)+1, dress_R1(m) + nmov + pt2_J(j) -= N_det_generators + end do + end do +END_PROVIDER + + BEGIN_PROVIDER[ integer, dress_M_m, (dress_N_cp_max)] +&BEGIN_PROVIDER[ integer, pt2_J_, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, dress_R1_, (0:N_det_generators)] +&BEGIN_PROVIDER[ double precision, dress_M_mi, (dress_N_cp_max, N_det_generators+1)] +&BEGIN_PROVIDER [ integer, dress_T, (N_det_generators) ] +&BEGIN_PROVIDER [ integer, dress_N_cp ] + implicit none + integer :: N_c, N_j, U, t, i, m + double precision :: v + double precision, allocatable :: tilde_M(:) + logical, allocatable :: d(:) + integer, external :: dress_find_sample + + allocate(d(N_det_generators), tilde_M(N_det_generators)) + + dress_M_mi = 0d0 + tilde_M = 0d0 + dress_R1_(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + + U = min(1, N_det_generators/(dress_N_cp_max**2/2)) + do i=1,dress_N_cp_max-1 + dress_M_m(i) = U * ((i**2-i)/2)! / (dress_N_cp_max+1) + end do + + + + U = N_det_generators/((dress_N_cp_max**2+dress_N_cp_max)/2)+1 + do i=1, dress_N_cp_max + dress_M_m(i) = U * (((i*i)+i)/2) + end do + + dress_M_m(1) = min(dress_M_m(1), 2) + dress_M_m(dress_N_cp_max) = N_det_generators+1 + + do i=1,N_j + d(i) = .true. + pt2_J_(i) = i + end do + call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) + call RANDOM_NUMBER(pt2_u) + call RANDOM_NUMBER(pt2_u) + + U = 0 + + m = 1 + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = dress_find_sample(v, pt2_cW) + tilde_M(i) += 1d0 + if(.not. d(i)) then + N_j += 1 + pt2_J_(N_j) = i + d(i) = .true. + end if + end do + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J_(N_j) = U + d(U) = .true. + exit; + end if + end do + + if(N_c == dress_M_m(m)) then + dress_R1_(m) = N_j + dress_M_mi(m, :N_det_generators) = tilde_M(:) + m += 1 + end if + enddo + + dress_N_cp = m-1 + dress_R1_(dress_N_cp) = N_j + dress_M_m(dress_N_cp) = N_c + !!!!!!!!!!!!!! + + do i=1, pt2_n_0(1) + dress_T(i) = 0 + end do + + do t=2,pt2_N_teeth+1 + do i=pt2_n_0(t-1)+1, pt2_n_0(t) + dress_T(i) = t-1 + end do + end do + !!!!!!!!!!!!! +END_PROVIDER + + +subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) use f77_zmq implicit none - integer, intent(in) :: lndet character(len=64000) :: task - character(len=3200) :: temp integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer, external :: omp_get_thread_num double precision, intent(in) :: E(N_states), relative_error @@ -24,12 +212,9 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) integer :: i, j, k, Ncp - double precision, external :: omp_get_wtime - double precision :: time integer, external :: add_task_to_taskserver double precision :: state_average_weight_save(N_states) task(:) = CHAR(0) - temp(:) = CHAR(0) allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) state_average_weight_save(:) = state_average_weight(:) do dress_stoch_istate=1,N_states @@ -39,7 +224,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) TOUCH state_average_weight !provide psi_coef_generators - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral dress_weight psi_selectors + provide nproc mo_bielec_integrals_in_map mo_mono_elec_integral psi_selectors !print *, dress_e0_denominator print *, '========== ================= ================= =================' @@ -75,65 +260,23 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket - integer :: ipos, sz - integer :: block(1), block_i, cur_tooth_reduce, ntas - logical :: flushme - block = 0 - block_i = 0 - cur_tooth_reduce = 0 - ipos=1 - ntas = 0 - do i=1,N_dress_jobs+1 - flushme = (i==N_dress_jobs+1 .or. block_i == size(block) .or. block_i >=cur_tooth_reduce ) - if(.not. flushme) flushme = (tooth_reduce(dress_jobs(i)) == 0 .or. tooth_reduce(dress_jobs(i)) /= cur_tooth_reduce) - - if(flushme .and. block_i > 0) then - if(block(1) > fragment_first) then - ntas += 1 - write(temp, '(I9,1X,60(I9,1X))') 0, block(:block_i) - sz = len(trim(temp))+1 - temp(sz:sz) = '|' - !write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) - write(task(ipos:ipos+sz), *) temp(:sz) - !ipos += 20 - ipos += sz+1 - if (ipos > 63000 .or. i==N_dress_jobs+1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - - ipos=1 - endif - else - if(block_i /= 1) stop "reduced fragmented dets" - do j=1,fragment_count - ntas += 1 - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, block(1) - ipos += 20 - if (ipos > 63000 .or. i==N_dress_jobs+1) then - ntas += 1 - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if - block_i = 0 - block = 0 - end if - - if(i /= N_dress_jobs+1) then - cur_tooth_reduce = tooth_reduce(dress_jobs(i)) - block_i += 1 - block(block_i) = dress_jobs(i) - end if + + + do i=1,N_det_generators + do j=1,pt2_F(pt2_J(i)) + write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + stop 'Unable to add task to task server' + endif + end do end do if (zmq_set_running(zmq_to_qp_run_socket) == -1) then print *, irp_here, ': Failed in zmq_set_running' endif call omp_set_nested(.true.) + +if (.false.) then !! TODO !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) & !$OMP PRIVATE(i) i = omp_get_thread_num() @@ -144,9 +287,17 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error, lndet) call dress_slave_inproc(i) endif !$OMP END PARALLEL + +else + + call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress,& + dress_stoch_istate) +endif + call omp_set_nested(.false.) delta_out(dress_stoch_istate,1:N_det) = delta(dress_stoch_istate,1:N_det) delta_s2_out(dress_stoch_istate,1:N_det) = delta_s2(dress_stoch_istate,1:N_det) + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') print *, '========== ================= ================= =================' @@ -166,6 +317,73 @@ subroutine dress_slave_inproc(i) call run_dress_slave(1,i,dress_e0_denominator) end + BEGIN_PROVIDER [integer, dress_dot_F, (dress_N_cp)] +&BEGIN_PROVIDER [ integer, dress_P, (N_det_generators) ] + implicit none + integer :: m,i + + do m=1,dress_N_cp + do i=dress_R1(m-1)+1, dress_R1(m) + dress_P(pt2_J(i)) = m + end do + end do + + dress_dot_F = 0 + do m=1,dress_N_cp + do i=dress_R1(m-1)+1,dress_R1(m) + dress_dot_F(m) += pt2_F(pt2_J(i)) + end do + end do + do m=2,dress_N_cp + dress_dot_F(m) += dress_dot_F(m-1) + end do +END_PROVIDER + +BEGIN_PROVIDER [double precision, dress_e, (N_det_generators, dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_t, (0:dress_N_cp)] +&BEGIN_PROVIDER [integer, dress_dot_n_0, (0:dress_N_cp)] + implicit none + + logical, allocatable :: d(:) + integer :: U, m, t, i + + allocate(d(N_det_generators+1)) + + dress_e(:,:) = 0d0 + dress_dot_t(:) = 0 + dress_dot_n_0(:) = 0 + d(:) = .false. + U=0 + + do m=1,dress_N_cp + do i=dress_R1_(m-1)+1,dress_R1_(m) + !dress_dot_F(m) += pt2_F(pt2_J_(i)) + d(pt2_J_(i)) = .true. + end do + + do while(d(U+1)) + U += 1 + end do + + dress_dot_t(m) = pt2_N_teeth + 1 + dress_dot_n_0(m) = N_det_generators + + do t = 2, pt2_N_teeth+1 + if(U < pt2_n_0(t)) then + dress_dot_t(m) = t-1 + dress_dot_n_0(m) = pt2_n_0(t-1) + exit + end if + end do + do i=dress_dot_n_0(m)+1, N_det_generators !pt2_n_0(t+1) + dress_e(i,m) = pt2_W_T * dress_M_mi(m,i) / pt2_w(i) + end do + end do + + do m=dress_N_cp, 2, -1 + dress_e(:,m) -= dress_e(:,m-1) + end do +END_PROVIDER subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, dress, istate) @@ -179,553 +397,221 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, double precision, intent(in) :: relative_error, E(N_states) double precision, intent(out) :: dress(N_states) - double precision, allocatable :: cp(:,:,:,:) double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det) - double precision, allocatable :: delta_loc(:,:,:) - double precision, allocatable :: dress_detail(:,:) - double precision :: dress_mwen(N_states) + double precision, allocatable :: breve_delta_m(:,:,:), S(:), S2(:) + double precision, allocatable :: edI(:,:), edI_task(:,:) + integer, allocatable :: edI_index(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer(ZMQ_PTR), external :: new_zmq_pull_socket - - integer :: more - integer :: i, j, k, i_state, N - integer :: task_id, ind - double precision, save :: time0 = -1.d0 - double precision :: time + integer(ZMQ_PTR), external :: new_zmq_pull_socket, zmq_abort + integer, allocatable :: task_id(:) + integer :: i, c, j, k, f, t, m, p, m_task + integer :: more, n_tasks + double precision :: E0, error, x, v, time, time0 + double precision :: avg, eqt double precision, external :: omp_get_wtime - integer :: cur_cp, last_cp - integer :: delta_loc_cur, is, N_buf(3) - integer, allocatable :: int_buf(:), agreg_for_cp(:) - double precision, allocatable :: double_buf(:) - integer(bit_kind), allocatable :: det_buf(:,:,:) - integer, external :: zmq_delete_tasks - last_cp = 10000000 - allocate(agreg_for_cp(N_cp)) - agreg_for_cp = 0 - allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer)) - delta_loc_cur = 1 + integer, allocatable :: dot_f(:) + integer, external :: zmq_delete_tasks, dress_find_sample + logical :: found + integer :: worker_id + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,1) + + found = .false. delta = 0d0 delta_s2 = 0d0 - allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) - allocate(delta_loc(N_states, N_det, 2)) - dress_detail = -1000d0 - cp = 0d0 - character*(512) :: task - - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + allocate(task_id(pt2_n_tasks_max)) + allocate(edI(N_states, N_det)) + allocate(edI_task(N_states, N_det), edI_index(N_det)) + allocate(breve_delta_m(N_states, N_det, 2)) + allocate(dot_f(dress_N_cp+1)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + edI = 0d0 + + dot_f(:dress_N_cp) = dress_dot_F(:) + dot_f(dress_N_cp+1) = 1 + more = 1 + m = 1 + c = 0 + S(:) = 0d0 + S2(:) = 0d0 + time = omp_get_wtime() + time0 = -1d0 ! omp_get_wtime() more = 1 - if (time0 < 0.d0) then - call wall_time(time0) - endif - logical :: loop, floop - floop = .true. - loop = .true. - - pullLoop : do while (loop) - call pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, dress_mwen) - if(floop) then - call wall_time(time) - time0 = time - floop = .false. - end if - if(cur_cp == -1 .and. ind == N_det_generators) then - call wall_time(time) - end if - - if(cur_cp == -1) then - call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then - stop 'Unable to delete tasks' - endif - if(more == 0) loop = .false. !stop 'loop = .false.' !!!!!!!!!!!!!!!! - dress_detail(:, ind) = dress_mwen(:) - !print *, "DETAIL", ind, dress_mwen - else if(cur_cp > 0) then - if(ind == 0) cycle - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,N_det - cp(:,i,cur_cp,1) += delta_loc(:,i,1) + do while (.not. found) + if(dot_f(m) == 0) then + E0 = 0 + do i=dress_dot_n_0(m),1,-1 + E0 += edI(dress_stoch_istate, i) end do - - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,N_det - cp(:,i,cur_cp,2) += delta_loc(:,i,2) + do while(c < dress_M_m(m)) + c = c+1 + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = dress_find_sample(v, pt2_cW) + x += edI(dress_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do end do - !$OMP END PARALLEL DO - agreg_for_cp(cur_cp) += ind - !print *, agreg_for_cp(cur_cp), ind, needed_by_cp(cur_cp), cur_cp - if(agreg_for_cp(cur_cp) > needed_by_cp(cur_cp)) then - stop "too much results..." + t = dress_dot_t(m) + avg = S(t) / dble(c) + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + error = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' + m += 1 + if(eqt <= relative_error) then + integer, external :: zmq_put_dvector + i= zmq_put_dvector(zmq_to_qp_run_socket, worker_id, "ending", dble(m-1), 1) + found = .true. end if - if(agreg_for_cp(cur_cp) /= needed_by_cp(cur_cp)) cycle - - call wall_time(time) - - last_cp = cur_cp - double precision :: su, su2, eqt, avg, E0, val - integer, external :: zmq_abort - - su = 0d0 - su2 = 0d0 - !$OMP PARALLEL DO DEFAULT(NONE) PRIVATE(i, val) SHARED(comb, dress_detail, & - !$OMP cur_cp,istate,cps_N) REDUCTION(+:su) REDUCTION(+:su2) - do i=1, int(cps_N(cur_cp)) - call get_comb_val(comb(i), dress_detail, cur_cp, val, istate) - su += val - su2 += val*val - end do - !$OMP END PARALLEL DO - - avg = su / cps_N(cur_cp) - eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg*avg) / cps_N(cur_cp) ) - E0 = sum(dress_detail(istate, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) - if(cp_first_tooth(cur_cp) <= comb_teeth) then - E0 = E0 + dress_detail(istate, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) - end if - - !print '(I2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', avg+E(istate)+E0, eqt, time-time0, '' - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', cps_N(cur_cp), avg+E0+E(istate), eqt, time-time0, '' - if ((dabs(eqt/(avg+E0+E(istate))) < relative_error .and. cps_N(cur_cp) >= 10)) then - ! Termination - print *, "TERMINATE" - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' + else + do + call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + if(time0 == -1d0) then + print *, "first pull", omp_get_wtime()-time + time0 = omp_get_wtime() + end if + if(m_task == 0) then + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' endif - endif - endif + else + !if(task_id(1) /= 0) stop "TASKID" + !i= zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + exit + end if + end do + do i=1,n_tasks + edI(:, edI_index(i)) += edI_task(:, i) + end do + dot_f(m_task) -= f end if - end do pullLoop + end do + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif - delta(:,:) = cp(:,:,last_cp,1) - delta_s2(:,:) = cp(:,:,last_cp,2) + integer :: ff + ff = dress_dot_F(m-1) + delta= 0d0 + delta_s2 = 0d0 + do while(more /= 0) + call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) + + !if(task_id(0) == 0) cycle + if(m_task == 0) then + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) + else if(m_task < 0) then + i = zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) + end if + + + if(m_task >= 0) cycle + ff = ff - f + delta(:,:) += breve_delta_m(:,:,1) + delta_s2(:,:) += breve_delta_m(:,:,2) + end do dress(istate) = E(istate)+E0+avg + if(ff /= 0) stop "WRONG NUMBER OF FRAGMENTS COLLECTED" + call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) end subroutine -integer function dress_find(v, w, sze, imin, imax) +integer function dress_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) - integer :: i,l,h - integer, parameter :: block=64 + double precision, intent(in) :: v, w(0:N_det_generators) + integer :: i,l,r - l = imin - h = imax-1 + l = 0 + r = N_det_generators - do while(h-l >= block) - i = ishft(h+l,-1) - if(w(i+1) > v) then - h = i-1 + do while(r-l > 1) + i = (r+l) / 2 + if(w(i) < v) then + l = i else - l = i+1 - end if - end do - !DIR$ LOOP COUNT (64) - do dress_find=l,h - if(w(dress_find) >= v) then - exit + r = i end if end do + + dress_find_sample = r end function - BEGIN_PROVIDER [ integer, gen_per_cp ] -&BEGIN_PROVIDER [ integer, comb_teeth ] -&BEGIN_PROVIDER [ integer, N_cps_max ] + + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - BEGIN_DOC -! N_cps_max : max number of checkpoints -! -! gen_per_cp : number of generators per checkpoint - END_DOC - comb_teeth = min(1+N_det/10,10) - N_cps_max = 16 - gen_per_cp = (N_det_generators / N_cps_max) + 1 -END_PROVIDER + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: dress_find_sample - - BEGIN_PROVIDER [ integer, N_cp ] -&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] -&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] -&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, done_cp_at_det, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, needed_by_cp, (0:N_cps_max) ] -&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] -&BEGIN_PROVIDER [ integer, N_dress_jobs ] -&BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] -&BEGIN_PROVIDER [ integer, tooth_reduce, (N_det_generators) ] - implicit none - logical, allocatable :: computed(:), comp_filler(:) - integer :: i, j, last_full, dets(comb_teeth) + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) - integer :: k, l, cur_cp, under_det(comb_teeth+1) - integer :: cp_limit(N_cps_max) - integer, allocatable :: iorder(:), first_cp(:) - integer, allocatable :: filler(:) - integer :: nfiller, lfiller, cfiller - logical :: fracted - - integer :: first_suspect - provide psi_coef_generators - first_suspect = 1 - - allocate(filler(n_det_generators)) - allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) - allocate(computed(N_det_generators)) - allocate(comp_filler(N_det_generators)) - first_cp = 1 - cps = 0d0 - cur_cp = 1 - done_cp_at = 0 - done_cp_at_det = 0 - needed_by_cp = 0 - comp_filler = .false. - computed = .false. - cps_N = 1d0 - tooth_reduce = 0 + tilde_cW(0) = 0d0 - integer :: fragsize - fragsize = N_det_generators / ((N_cps_max-1+1)*(N_cps_max-1+2)/2) - - do i=1,N_cps_max - cp_limit(i) = fragsize * i * (i+1) / 2 - end do - cp_limit(N_cps_max) = N_det*2 - - N_dress_jobs = first_det_of_comb - 1 - do i=1, N_dress_jobs - dress_jobs(i) = i - computed(i) = .true. - end do - - l=first_det_of_comb - call random_seed(put=(/321,654,65,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/)) - call RANDOM_NUMBER(comb) - lfiller = 1 - nfiller = 1 do i=1,N_det_generators - !print *, i, N_dress_jobs - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) - - !if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then - if(N_dress_jobs > cp_limit(cur_cp) .or. N_dress_jobs == N_det_generators) then - first_cp(cur_cp+1) = N_dress_jobs - done_cp_at(N_dress_jobs) = cur_cp - cps_N(cur_cp) = dfloat(i) - if(N_dress_jobs /= N_det_generators) then - cps(:, cur_cp+1) = cps(:, cur_cp) - cur_cp += 1 - end if - - if (N_dress_jobs == N_det_generators) then - exit - end if - end if - - !!!!!!!!!!!!!!!!!!!!!!!! - if(.TRUE.) then - do l=first_suspect,N_det_generators - if((.not. computed(l))) then - N_dress_jobs+=1 - dress_jobs(N_dress_jobs) = l - computed(l) = .true. - first_suspect = l - exit - end if - end do - - if (N_dress_jobs == N_det_generators) exit - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ELSE - !!!!!!!!!!!!!!!!!!!!!!!!!!!! - do l=first_suspect,N_det_generators - if((.not. computed(l)) .and. (.not. comp_filler(l))) exit - end do - first_suspect = l - if(l > N_det_generators) cycle - - cfiller = tooth_of_det(l)-1 - if(cfiller > lfiller) then - do j=1,nfiller-1 - if(.not. computed(filler(j))) then - k=N_dress_jobs+1 - dress_jobs(k) = filler(j) - N_dress_jobs = k - end if - computed(filler(j)) = .true. - end do - nfiller = 2 - filler(1) = l - lfiller = cfiller - else - filler(nfiller) = l - nfiller += 1 - end if - comp_filler(l) = .True. - end if - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + tilde_w(i) = psi_coef_generators(i,dress_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) enddo - + tilde_cW(N_det_generators) = 1d0 - do j=1,nfiller-1 - if(.not. computed(filler(j)))then - k=N_dress_jobs+1 - dress_jobs(k) = filler(j) - N_dress_jobs = k - end if - computed(filler(j)) = .true. - end do - - - N_cp = cur_cp - - if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then - print *, N_dress_jobs, N_det_generators, N_cp, N_cps_max - stop "error in jobs creation" - end if - - cur_cp = 0 - do i=1,N_dress_jobs - if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) - done_cp_at(i) = cur_cp - done_cp_at_det(dress_jobs(i)) = cur_cp - needed_by_cp(cur_cp) += 1 - end do - - - under_det = 0 - cp_first_tooth = 0 - do i=1,N_dress_jobs - do j=comb_teeth+1,1,-1 - if(dress_jobs(i) <= first_det_of_teeth(j)) then - under_det(j) = under_det(j) + 1 - if(under_det(j) == first_det_of_teeth(j))then - do l=done_cp_at(i)+1, N_cp - cps(:first_det_of_teeth(j)-1, l) = 0d0 - cp_first_tooth(l) = j - end do - cps(first_det_of_teeth(j), done_cp_at(i)+1) = & - cps(first_det_of_teeth(j), done_cp_at(i)+1) * fractage(j) - end if - else - exit - end if - end do - end do - cp_first_tooth(N_cp) = comb_teeth+1 - - do i=1,N_det_generators - do j=N_cp,2,-1 - cps(i,j) -= cps(i,j-1) - end do - end do - - iorder = -1 - - cps(:, N_cp) = 0d0 - - iloop : do i=fragment_first+1,N_det_generators - k = tooth_of_det(i) - if(k == 0) cycle - if (i == first_det_of_teeth(k)) cycle - - do j=1,N_cp - if(cps(i, j) /= 0d0) cycle iloop - end do - - tooth_reduce(i) = k - end do iloop - - do i=1,N_det_generators - if(tooth_reduce(dress_jobs(i)) == 0) dress_jobs(i) = dress_jobs(i)+N_det*2 - end do - - do i=1,N_cp-1 - call isort(dress_jobs(first_cp(i)+1),iorder,first_cp(i+1)-first_cp(i)-1) - end do - - do i=1,N_det_generators - if(dress_jobs(i) > N_det) dress_jobs(i) = dress_jobs(i) - N_det*2 - end do -END_PROVIDER - - -subroutine get_comb_val(stato, detail, cur_cp, val, istate) - implicit none - integer, intent(in) :: cur_cp, istate - integer :: first - double precision, intent(in) :: stato, detail(N_states, N_det_generators) - double precision, intent(out) :: val - double precision :: curs - integer :: j, k - integer, external :: dress_find - - curs = 1d0 - stato - val = 0d0 - first = cp_first_tooth(cur_cp) - - do j = comb_teeth, first, -1 - !DIR$ FORCEINLINE - k = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - if(k == first_det_of_teeth(first)) then - val += detail(istate, k) * dress_weight_inv(k) * comb_step * fractage(first) - else - val += detail(istate, k) * dress_weight_inv(k) * comb_step - end if - - curs -= comb_step - end do -end subroutine - - -subroutine get_comb(stato, dets) - implicit none - double precision, intent(in) :: stato - integer, intent(out) :: dets(comb_teeth) - double precision :: curs - integer :: j - integer, external :: dress_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step - end do -end subroutine - - -subroutine add_comb(com, computed, cp, N, tbc) - implicit none - double precision, intent(in) :: com - integer, intent(inout) :: N - double precision, intent(inout) :: cp(N_det) - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(N_det_generators) - integer :: i, k, l, dets(comb_teeth) - - !DIR$ FORCEINLINE - call get_comb(com, dets) - k=N+1 - do i = 1, comb_teeth - l = dets(i) - cp(l) += 1d0 - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if - end do - N = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, dress_stoch_istate ] - implicit none - dress_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, dress_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_weight_inv, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, dress_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, fractage, (comb_teeth) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] -&BEGIN_PROVIDER [ integer, tooth_of_det, (N_det_generators) ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: dress_find - - dress_weight(1) = psi_coef_generators(1,dress_stoch_istate)**2 - dress_cweight(1) = psi_coef_generators(1,dress_stoch_istate)**2 - - do i=1,N_det_generators - dress_weight(i) = psi_coef_generators(i,dress_stoch_istate)**2 - enddo - - ! Important to loop backwards for numerical precision - dress_cweight(N_det_generators) = dress_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - dress_cweight(i) = dress_weight(i) + dress_cweight(i+1) - end do - - do i=1,N_det_generators - dress_weight(i) = dress_weight(i) / dress_cweight(1) - dress_cweight(i) = dress_cweight(i) / dress_cweight(1) - enddo - - do i=1,N_det_generators-1 - dress_cweight(i) = 1.d0 - dress_cweight(i+1) - end do - dress_cweight(N_det_generators) = 1.d0 - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - !print *, "comb_step", comb_step - first_det_of_comb = 1 - do i=1,N_det_generators ! min(100,N_det_generators) - first_det_of_comb = i - if(dress_weight(i)/norm_left < comb_step) then + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - norm_left -= dress_weight(i) + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" + end if end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - - comb_step = (1d0 - dress_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = dress_find(stato, dress_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - fractage(i) = (dress_cweight(iloc) - stato) / dress_weight(iloc) - stato -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = dress_find_sample(r, tilde_cW) + end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - - - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif + pt2_cW(0) = 0d0 do i=1,N_det_generators - dress_weight_inv(i) = 1.d0/dress_weight(i) - enddo - - tooth_of_det(:first_det_of_teeth(1)-1) = 0 - do i=1,comb_teeth - tooth_of_det(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = i + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - - diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index 590b27c6..a3b18b26 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -85,34 +85,21 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ] double precision, allocatable :: dress(:), del(:,:), del_s2(:,:) double precision :: E_CI_before(N_states), relative_error - ! prevents re-providing if delta_ij_tmp is - ! just being copied - if(N_det_delta_ij == N_det) then + allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) + + delta_ij_tmp = 0d0 + + E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion + relative_error = 1.d-5 + + call write_double(6,relative_error,"Convergence of the stochastic algorithm") - allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) - - delta_ij_tmp = 0d0 - - E_CI_before(:) = psi_energy(:) + nuclear_repulsion - threshold_selectors = 1.d0 - threshold_generators = 1.d0 - SOFT_TOUCH threshold_selectors threshold_generators - ! if(errr /= 0d0) then - ! errr = errr / 2d0 - ! else - ! errr = 1d-4 - ! end if - relative_error = 1.d-3 - - call write_double(6,relative_error,"Relative error for the stochastic algorithm") - - call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij) - delta_ij_tmp(:,:,1) = del(:,:) - delta_ij_tmp(:,:,2) = del_s2(:,:) + call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij) + delta_ij_tmp(:,:,1) = del(:,:) + delta_ij_tmp(:,:,2) = del_s2(:,:) - deallocate(dress, del, del_s2) - end if + deallocate(dress, del, del_s2) END_PROVIDER diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 8801cb3f..184ef94c 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -1,13 +1,5 @@ use bitmasks -BEGIN_PROVIDER [ integer, fragment_count ] - implicit none - BEGIN_DOC - ! Number of fragments for the deterministic part - END_DOC - fragment_count = 1 -END_PROVIDER - subroutine run_dress_slave(thread,iproce,energy) use f77_zmq @@ -18,79 +10,73 @@ subroutine run_dress_slave(thread,iproce,energy) integer, intent(in) :: thread, iproce integer :: rc, i, subset, i_generator - integer :: worker_id, task_id, ctask, ltask - character*(5120) :: task + integer :: worker_id, ctask, ltask + character*(512) :: task(Nproc) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - - logical :: done - - double precision,allocatable :: dress_detail(:) - integer :: ind - double precision,allocatable :: delta_ij_loc(:,:,:) - integer :: h,p,n,i_state - logical :: ok - - integer, allocatable :: int_buf(:) - double precision, allocatable :: double_buf(:) - integer(bit_kind), allocatable :: det_buf(:,:,:) - integer :: N_buf(3) - logical :: last + double precision,allocatable :: breve_delta_m(:,:,:) + integer :: i_state,m,l,t,p,sum_f !integer, external :: omp_get_thread_num - double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:) - integer :: toothMwen - logical :: fracted + double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:), edI(:) + double precision, allocatable :: edI_task(:) + integer, allocatable :: edI_index(:), edI_taskID(:) + integer :: n_tasks + + integer :: iproc + integer, allocatable :: f(:) + integer :: cp_sent, cp_done + integer :: cp_max(Nproc) + integer :: will_send, task_id, purge_task_id, ntask_buf + integer, allocatable :: task_buf(:) + integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) + integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending, getting_task double precision :: fac - + double precision :: ending(1) + integer, external :: zmq_get_dvector +! double precision, external :: omp_get_wtime +double precision :: time, time0 + integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) + if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" + + allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) + allocate(cp(N_states, N_det, dress_N_cp, 2)) + allocate(edI(N_det_generators), f(N_det_generators)) + allocate(edI_index(N_det_generators), edI_task(N_det_generators)) + edI = 0d0 + f = 0 + delta_det = 0d0 -! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" + task = CHAR(0) - allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) - allocate(cp(N_states, N_det, N_cp, 2)) - delta_det = 0d9 - cp = 0d0 - - - task(:) = CHAR(0) - - - - integer :: iproc, cur_cp, done_for(0:N_cp) - integer, allocatable :: tasks(:) - integer :: lastCp(Nproc) - integer :: lastSent, lastSendable - logical :: send - integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1) - integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1) - - do i=0,N_cp+1 + call omp_init_lock(sending) + call omp_init_lock(getting_task) + do i=0,dress_N_cp+1 call omp_init_lock(lck_sto(i)) end do - do i=0,comb_teeth+1 + do i=0,pt2_N_teeth+1 call omp_init_lock(lck_det(i)) end do - lastCp = 0 - lastSent = 0 - send = .false. - done_for = 0 - - double precision :: hij, sij - !call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij) - - hij = dress_E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL + cp_done = 0 + cp_sent = 0 + will_send = 0 + double precision :: hij, sij, tmp + purge_task_id = 0 + provide psi_energy + ending(1) = dble(dress_N_cp+1) + ntask_tbd = 0 !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) & - !$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) & - !$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) & - !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) - + !$OMP PRIVATE(breve_delta_m, task_id) & + !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & + !$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) & + !$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) & + !$OMP PRIVATE(task_buf, ntask_buf,time, time0) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) @@ -99,276 +85,239 @@ subroutine run_dress_slave(thread,iproce,energy) call end_zmq_push_socket(zmq_socket_push,thread) stop "WORKER -1" end if - - iproc = omp_get_thread_num()+1 - allocate(int_buf(N_dress_int_buffer)) - allocate(double_buf(N_dress_double_buffer)) - allocate(det_buf(N_int, 2, N_dress_det_buffer)) - allocate(delta_ij_loc(N_states,N_det,2)) - do - call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) - task = task//" 0" - if(task_id == 0) exit - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - if(task_id /= 0) then - read (task,*) subset, i_generator + allocate(breve_delta_m(N_states,N_det,2)) + allocate(task_buf(pt2_n_tasks_max)) + ntask_buf = 0 + + if(iproc==1) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + end if - !$OMP ATOMIC - done_for(done_cp_at_det(i_generator)) += 1 - ! print *, "IGEN", i_generator, done_cp_at_det(i_generator) - delta_ij_loc(:,:,:) = 0d0 - call generator_start(i_generator, iproc) - call alpha_callback(delta_ij_loc, i_generator, subset, iproc) - call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc) - - do i=1,N_cp - fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step - if(fac == 0d0) cycle - call omp_set_lock(lck_sto(i)) - cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac) - cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac) - call omp_unset_lock(lck_sto(i)) - end do - - - toothMwen = tooth_of_det(i_generator) - fracted = (toothMwen /= 0) - if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen)) - if(fracted) then - call omp_set_lock(lck_det(toothMwen)) - call omp_set_lock(lck_det(toothMwen-1)) - delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen)) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen)) - call omp_unset_lock(lck_det(toothMwen)) - call omp_unset_lock(lck_det(toothMwen-1)) - else - call omp_set_lock(lck_det(toothMwen)) - delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) - delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) - call omp_unset_lock(lck_det(toothMwen)) - end if - call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id) - call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) - lastCp(iproc) = done_cp_at_det(i_generator) + do while(cp_done > cp_sent .or. m /= dress_N_cp+1) + call omp_set_lock(getting_task) + if(ntask_tbd == 0) then + ntask_tbd = size(task_tbd) + call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd) + !task = task//" 0" end if - + + task_id = task_tbd(1) + if(task_id /= 0) then + read (task(1),*) subset, i_generator + do i=1,size(task_tbd)-1 + task_tbd(i) = task_tbd(i+1) + task(i) = task(i+1) + end do + m = dress_P(i_generator) + ntask_tbd -= 1 + else + m = dress_N_cp + 1 + i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + end if + call omp_unset_lock(getting_task) + will_send = 0 + !$OMP CRITICAL - send = .false. - lastSendable = N_cp*2 - do i=1,Nproc - lastSendable = min(lastCp(i), lastSendable) - end do - lastSendable -= 1 - if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then - lastSent = lastSendable - cur_cp = lastSent - send = .true. + cp_max(iproc) = m + cp_done = minval(cp_max)-1 + if(cp_done > cp_sent) then + will_send = cp_sent + 1 + cp_sent = will_send + end if + if(purge_task_id == 0) then + purge_task_id = task_id + task_id = 0 + else if(task_id /= 0) then + ntask_buf += 1 + task_buf(ntask_buf) = task_id end if !$OMP END CRITICAL - if(send) then - N_buf = (/0,1,0/) - - delta_ij_loc = 0d0 - if(cur_cp < 1) stop "cur_cp < 1" - do i=1,cur_cp - delta_ij_loc(:,:,1) += cp(:,:,i,1) - delta_ij_loc(:,:,2) += cp(:,:,i,2) + if(will_send /= 0 .and. will_send <= int(ending(1))) then + call omp_set_lock(sending) + n_tasks = 0 + sum_f = 0 + do i=1,N_det_generators + if(dress_P(i) <= will_send) sum_f = sum_f + f(i) + if(dress_P(i) == will_send .and. f(i) /= 0) then + n_tasks += 1 + edI_task(n_tasks) = edI(i) + edI_index(n_tasks) = i + end if end do - - delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp) - do i=cp_first_tooth(cur_cp)-1,0,-1 - delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1) - delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2) - end do - call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1) + call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) + call omp_unset_lock(sending) end if + + if(m /= dress_N_cp+1) then + !UPDATE i_generator - if(task_id == 0) exit + breve_delta_m(:,:,:) = 0d0 + call generator_start(i_generator, iproc) + time0 = omp_get_wtime() + call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc) + time = omp_get_wtime() + !print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 + t = dress_T(i_generator) + + call omp_set_lock(lck_det(t)) + delta_det(:,:,t, 1) += breve_delta_m(:,:,1) + delta_det(:,:,t, 2) += breve_delta_m(:,:,2) + call omp_unset_lock(lck_det(t)) + + do p=1,dress_N_cp + if(dress_e(i_generator, p) /= 0d0) then + fac = dress_e(i_generator, p) + call omp_set_lock(lck_sto(p)) + cp(:,:,p,1) += breve_delta_m(:,:,1) * fac + cp(:,:,p,2) += breve_delta_m(:,:,2) * fac + call omp_unset_lock(lck_sto(p)) + end if + end do + + tmp = 0d0 + do i=N_det,1,-1 + tmp += psi_coef(i, dress_stoch_istate)*breve_delta_m(dress_stoch_istate, i, 1) + end do + !$OMP ATOMIC + edI(i_generator) += tmp + !$OMP ATOMIC + f(i_generator) += 1 + !push bidon + if(ntask_buf == size(task_buf)) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + ntask_buf = 0 + end if + end if end do - + !$OMP BARRIER + if(ntask_buf /= 0) then + call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) + ntask_buf = 0 + end if + !$OMP SINGLE + if(purge_task_id /= 0) then + do while(int(ending(1)) == dress_N_cp+1) + call sleep(1) + i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) + end do + + will_send = int(ending(1)) + breve_delta_m = 0d0 + + do l=will_send, 1,-1 + breve_delta_m(:,:,1) += cp(:,:,l,1) + breve_delta_m(:,:,2) += cp(:,:,l,2) + end do + + breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send) + + do t=dress_dot_t(will_send)-1,0,-1 + breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1) + breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2) + end do + + sum_f = 0 + do i=1,N_det_generators + if(dress_P(i) <= will_send) sum_f = sum_f + f(i) + end do + call push_dress_results(zmq_socket_push, -will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id, 1) + end if + + !$OMP END SINGLE call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_push_socket(zmq_socket_push,thread) !$OMP END PARALLEL - - do i=0,N_cp+1 + do i=0,dress_N_cp+1 call omp_destroy_lock(lck_sto(i)) end do - do i=0,comb_teeth+1 + do i=0,pt2_N_teeth+1 call omp_destroy_lock(lck_det(i)) end do end subroutine - -subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id) +subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) use f77_zmq implicit none - - integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - real(kind=4), allocatable :: delta_loc4(:,:,:) - double precision, intent(in) :: double_buf(*) - integer, intent(in) :: int_buf(*) - integer(bit_kind), intent(in) :: det_buf(N_int, 2, *) - integer, intent(in) :: N_bufi(3) - integer :: N_buf(3) - integer, intent(in) :: ind, cur_cp, task_id - integer :: rc, i, j, k, l - double precision :: contrib(N_states) - real(sendt), allocatable :: r4buf(:,:,:) - - rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE) - if(rc /= 4) stop "push" - - - if(cur_cp /= -1) then - allocate(r4buf(N_states, N_det, 2)) - do i=1,2 - do j=1,N_det - do k=1,N_states - r4buf(k,j,i) = real(delta_loc(k,j,i), sendt) - end do - end do - end do - - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE) - if(rc /= sendt*N_states*N_det) stop "push" - - rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE) - if(rc /= sendt*N_states*N_det) stop "push" - else - contrib = 0d0 - do i=1,N_det - contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :) - end do - - rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE) - if(rc /= 8*N_states) stop "push" + integer, intent(in) :: m_task, f, edI_index(n_tasks) + double precision, intent(in) :: breve_delta_m(N_states, N_det, 2), edI_task(n_tasks) + integer, intent(in) :: task_id(pt2_n_tasks_max), n_tasks + integer :: rc, i, j, k + rc = f77_zmq_send( zmq_socket_push, m_task, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push3" - N_buf = N_bufi - !N_buf = (/0,1,0/) - - rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) - if(rc /= 4*3) stop "push5" - - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE) - if(rc /= 4*N_buf(1)) stop "push6" - end if - - if(N_buf(2) > 0) then - rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE) - if(rc /= 8*N_buf(2)) stop "push8" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10" - end if - - rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) - if(rc /= 4) stop "push11" - end if - -! Activate is zmq_socket_push is a REQ + if(m_task > 0) then + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" + rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE) + if(rc /= 8*n_tasks) stop "push5" + rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "push6" + else if(m_task == 0) then + rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push1" + rc = f77_zmq_send( zmq_socket_push, task_id, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "push2" + else + rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push4" + rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, ZMQ_SNDMORE) + if(rc /= 8*N_det*N_states*2) stop "push6" + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "push6" + end if +! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE character*(2) :: ok rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) IRP_ENDIF - end subroutine -BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ] - -END_PROVIDER -subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib) +subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) use f77_zmq implicit none - integer, parameter :: sendt = 4 integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - integer, intent(out) :: cur_cp - double precision, intent(inout) :: delta_loc(N_states, N_det, 2) - double precision, intent(out) :: double_buf(*), contrib(N_states) - integer, intent(out) :: int_buf(*) - integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) - integer, intent(out) :: ind - integer, intent(out) :: task_id + integer, intent(out) :: m_task, f, edI_index(N_det_generators) + double precision, intent(out) :: breve_delta_m(N_states, N_det, 2), edI_task(N_det_generators) + integer, intent(out) :: task_id(pt2_n_tasks_max), n_tasks integer :: rc, i, j, k - integer, intent(out) :: N_buf(3) - - rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) - if(rc /= 4) stop "pulla" - rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0) - if(rc /= 4) stop "pulla" - - - + rc = f77_zmq_recv( zmq_socket_pull, m_task, 4, 0) + if(rc /= 4) stop "pullc" - if(cur_cp /= -1) then - - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0) - if(rc /= sendt*N_states*N_det) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0) - if(rc /= sendt*N_states*N_det) stop "pulld" - - do i=1,2 - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k) - do j=1,N_det - do k=1,N_states - delta_loc(k,j,i) = real(real4buf(k,j,i), 8) - end do - end do - end do - else - rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0) - if(rc /= 8*N_states) stop "pullc" - - rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) - if(rc /= 4*3) stop "pull" - if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?" - if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?" - - - if(N_buf(1) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0) - if(rc /= 4*N_buf(1)) stop "pull1" + if(m_task > 0) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0) + if(rc /= 8*n_tasks) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pullc" + else if(m_task==0) then + rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4*n_tasks, 0) + if(rc /= 4*n_tasks) stop "pull4" + else + rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0) + if(rc /= 4) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0) + if(rc /= 8*N_det*N_states*2) stop "pullc" + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "pull4" end if - - if(N_buf(2) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0) - if(rc /= 8*N_buf(2)) stop "pull2" - end if - - if(N_buf(3) > 0) then - rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0) - if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3" - end if - - rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) - if(rc /= 4) stop "pull4" - end if ! Activate is zmq_socket_pull is a REP IRP_IF ZMQ_PUSH IRP_ELSE diff --git a/plugins/shiftedbk/shifted_bk_routines.irp.f b/plugins/shiftedbk/shifted_bk_routines.irp.f index bab8490d..216f9ec3 100644 --- a/plugins/shiftedbk/shifted_bk_routines.irp.f +++ b/plugins/shiftedbk/shifted_bk_routines.irp.f @@ -298,9 +298,8 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int) - + call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc) - slave_sum_alpha2(:,iproc) += c_alpha(:)**2 if(contrib < sb(iproc)%mini) then call add_to_selection_buffer(sb(iproc), alpha, contrib) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 6e972114..2d42e849 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -167,8 +167,7 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) end select end - -subroutine get_double_excitation(det1,det2,exc,phase,Nint) +subroutine get_double_excitation_ref(det1,det2,exc,phase,Nint) use bitmasks implicit none BEGIN_DOC @@ -312,6 +311,137 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint) end +subroutine get_phasemask_bit(det1, pm, Nint) + use bitmasks + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(out) :: pm(Nint,2) + integer(bit_kind) :: tmp + integer :: ispin, i + do ispin=1,2 + tmp = 0_8 + do i=1,Nint + pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16)) + pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32)) + pm(i,ispin) = xor(pm(i,ispin), tmp) + if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + end do + end do +end subroutine + + +subroutine get_double_excitation(det1,det2,exc,phase,Nint) + use bitmasks + implicit none + BEGIN_DOC + ! Returns the two excitation operators between two doubly excited determinants and the phase + END_DOC + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det1(Nint,2) + integer(bit_kind), intent(in) :: det2(Nint,2) + integer, intent(out) :: exc(0:2,2,2) + double precision, intent(out) :: phase + integer :: tz + integer :: l, ispin, idx_hole, idx_particle, ishift + integer :: nperm + integer :: i,j,k,m,n + integer :: high, low + integer :: a,b,c,d + integer(bit_kind) :: hole, particle, tmp, pm(Nint,2) + double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /) + double precision :: refaz + logical :: ok + + ASSERT (Nint > 0) + + !do ispin=1,2 + !tmp = 0_8 + !do i=1,Nint + ! pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16)) + ! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32)) + ! pm(i,ispin) = xor(pm(i,ispin), tmp) + ! if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp) + !end do + !end do + call get_phasemask_bit(det1, pm, Nint) + nperm = 0 + exc(0,1,1) = 0 + exc(0,2,1) = 0 + exc(0,1,2) = 0 + exc(0,2,2) = 0 + do ispin = 1,2 + idx_particle = 0 + idx_hole = 0 + ishift = 1-bit_kind_size + !par = 0 + do l=1,Nint + ishift = ishift + bit_kind_size + if (det1(l,ispin) == det2(l,ispin)) then + cycle + endif + tmp = xor( det1(l,ispin), det2(l,ispin) ) + particle = iand(tmp, det2(l,ispin)) + hole = iand(tmp, det1(l,ispin)) + do while (particle /= 0_bit_kind) + tz = trailz(particle) + nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1) + idx_particle = idx_particle + 1 + exc(0,2,ispin) = exc(0,2,ispin) + 1 + exc(idx_particle,2,ispin) = tz+ishift + particle = iand(particle,particle-1_bit_kind) + enddo + if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)==2 + exit + endif + do while (hole /= 0_bit_kind) + tz = trailz(hole) + nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1) + idx_hole = idx_hole + 1 + exc(0,1,ispin) = exc(0,1,ispin) + 1 + exc(idx_hole,1,ispin) = tz+ishift + hole = iand(hole,hole-1_bit_kind) + enddo + if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin) + exit + endif + enddo + + select case (exc(0,1,ispin)) + case(0) + cycle + + case(1) + if(exc(1,1,ispin) < exc(1,2,ispin)) nperm = nperm+1 + + case (2) + a = exc(1,1,ispin) + b = exc(1,2,ispin) + c = exc(2,1,ispin) + d = exc(2,2,ispin) + + if(min(a,c) > max(b,d) .or. min(b,d) > max(a,c) .or. (a-b)*(c-d)<0) then + nperm = nperm + 1 + end if + exit + end select + + enddo + phase = phase_dble(iand(nperm,1)) + !call get_double_excitation_ref(det1,det2,exc,refaz,Nint) + !if(phase == refaz) then + ! print *, "phase", phase, refaz, n, exc(0,1,1) + !end if +end + subroutine get_mono_excitation(det1,det2,exc,phase,Nint) use bitmasks implicit none From 9ebb88cbf32eed90ed2c1ff985128f271bcc1c15 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Sep 2018 18:05:00 +0200 Subject: [PATCH 31/35] Cleaning --- plugins/Generators_full/generators.irp.f | 4 +--- plugins/dress_zmq/alpha_factory.irp.f | 8 ++++---- plugins/dress_zmq/dress_slave.irp.f | 4 +--- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/plugins/Generators_full/generators.irp.f b/plugins/Generators_full/generators.irp.f index c40ba2d4..2ce6f854 100644 --- a/plugins/Generators_full/generators.irp.f +++ b/plugins/Generators_full/generators.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ integer, N_det_generators ] N_det_generators = N_det do i=1,N_det norm = norm + psi_average_norm_contrib_sorted(i) - if (norm >= threshold_generators) then + if (norm > threshold_generators+1d-10) then N_det_generators = i exit endif @@ -29,7 +29,6 @@ END_PROVIDER ! For Single reference wave functions, the generator is the ! Hartree-Fock determinant END_DOC - integer :: i, k psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted(1:N_int,1:2,1:N_det) psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted(1:N_det,1:N_states) @@ -44,7 +43,6 @@ END_PROVIDER ! For Single reference wave functions, the generator is the ! Hartree-Fock determinant END_DOC - integer :: i, k psi_det_sorted_gen = psi_det_sorted psi_coef_sorted_gen = psi_coef_sorted psi_det_sorted_gen_order = psi_det_sorted_order diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index e5b670fb..ed6c7065 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -66,7 +66,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index integer(bit_kind), allocatable:: preinteresting_det(:,:,:) integer ,allocatable :: abuf(:), labuf(:) - allocate(abuf(0:N_det*6), labuf(0:N_det)) + allocate(abuf(N_det*6), labuf(N_det)) allocate(preinteresting_det(N_int,2,N_det)) @@ -386,7 +386,7 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned, integer(bit_kind), allocatable :: det_minilist(:,:,:) - allocate(abuf(0:siz), labuf(0:N_det), putten(N_det), det_minilist(N_int, 2, N_det)) + allocate(abuf(siz), labuf(N_det), putten(N_det), det_minilist(N_int, 2, N_det)) do i=1,siz abuf(i) = psi_from_sorted_gen(rabuf(i)) @@ -703,7 +703,7 @@ subroutine get_d2(i_gen, gen, banned, bannedOrb, indexes, abuf, mask, h, p, sp) implicit none integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer, intent(inout) :: abuf(0:*) + integer, intent(inout) :: abuf(*) integer, intent(in) :: i_gen logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) integer, intent(inout) :: indexes(0:mo_tot_num, 0:mo_tot_num) @@ -831,7 +831,7 @@ subroutine get_d1(i_gen, gen, banned, bannedOrb, indexes, abuf, mask, h, p, sp) implicit none integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer, intent(inout) :: abuf(0:*) + integer, intent(inout) :: abuf(*) integer,intent(in) :: i_gen logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2) integer(bit_kind) :: det(N_int, 2) diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f index 6de3e2da..33238df2 100644 --- a/plugins/dress_zmq/dress_slave.irp.f +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -50,9 +50,7 @@ subroutine run_wf else if (zmq_state(:5) == 'dress') then ! Dress ! --------- - !call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle - !TOUCH psi_det if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle @@ -60,7 +58,7 @@ subroutine run_wf if (zmq_get_dvector(zmq_to_qp_run_socket,1,'dress_stoch_istate',tmp,1) == -1) cycle dress_stoch_istate = int(tmp) psi_energy(1:N_states) = energy(1:N_states) - TOUCH psi_energy dress_stoch_istate state_average_weight + TOUCH psi_energy dress_stoch_istate state_average_weight PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order From 68458296dcc86c57bab25582e138ae057a66f019 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Sep 2018 18:43:39 +0200 Subject: [PATCH 32/35] Almost working but still broken --- plugins/dress_zmq/alpha_factory.irp.f | 2 +- plugins/dress_zmq/dress_general.irp.f | 6 ------ plugins/dress_zmq/dress_stoch_routines.irp.f | 17 +++++++++++------ plugins/dress_zmq/dressing.irp.f | 2 +- plugins/dress_zmq/run_dress_slave.irp.f | 2 +- src/Davidson/u0Hu0.irp.f | 1 + 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/plugins/dress_zmq/alpha_factory.irp.f b/plugins/dress_zmq/alpha_factory.irp.f index ed6c7065..d59ab032 100644 --- a/plugins/dress_zmq/alpha_factory.irp.f +++ b/plugins/dress_zmq/alpha_factory.irp.f @@ -637,7 +637,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, indexes, ab integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N_sel) logical, intent(inout) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2) integer, intent(inout) :: indexes(0:mo_tot_num, 0:mo_tot_num) - integer, intent(inout) :: abuf(0:*) + integer, intent(inout) :: abuf(*) integer :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt, s integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) integer :: phasemask(2,N_int*bit_kind_size) diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f index b99eb1d7..8364a61f 100644 --- a/plugins/dress_zmq/dress_general.irp.f +++ b/plugins/dress_zmq/dress_general.irp.f @@ -29,8 +29,6 @@ subroutine run_dressing(N_st,energy) delta_E = 1.d0 iteration = 0 do iteration=1,n_it_dress_max - N_det_delta_ij = N_det - touch N_det_delta_ij print *, '===============================================' print *, 'Iteration', iteration, '/', n_it_dress_max print *, '===============================================' @@ -40,9 +38,6 @@ subroutine run_dressing(N_st,energy) do i=1,N_st print *, i, psi_energy(i)+nuclear_repulsion enddo - !print *, "DELTA IJ", delta_ij(1,1,1) - PROVIDE delta_ij_tmp - if(.true.) call delta_ij_done() print *, 'Dressed energy ' do i=1,N_st print *, i, ci_energy_dressed(i) @@ -56,7 +51,6 @@ subroutine run_dressing(N_st,energy) call write_double(6,delta_E,"delta_E (undressed)") delta_E = dabs(delta_E) call save_wavefunction -! call ezfio_set_dress_zmq_energy(ci_energy_dressed(1)) if (delta_E < thresh_dress) then exit endif diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 136ecb7f..846d38f0 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -276,7 +276,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error) call omp_set_nested(.true.) -if (.false.) then !! TODO +if (.true.) then !! TODO !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) & !$OMP PRIVATE(i) i = omp_get_thread_num() @@ -462,11 +462,16 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, end do t = dress_dot_t(m) avg = S(t) / dble(c) - eqt = (S2(t) / c) - (S(t)/c)**2 - eqt = sqrt(eqt / dble(c-1)) - error = eqt - time = omp_get_wtime() - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' + if (c > 1) then + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + error = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0+E(dress_stoch_istate), eqt, time-time0, '' + else + eqt = 1.d0 + error = eqt + endif m += 1 if(eqt <= relative_error) then integer, external :: zmq_put_dvector diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f index a3b18b26..40b83037 100644 --- a/plugins/dress_zmq/dressing.irp.f +++ b/plugins/dress_zmq/dressing.irp.f @@ -65,7 +65,7 @@ END_PROVIDER BEGIN_PROVIDER [ integer , N_det_delta_ij ] implicit none - N_det_delta_ij = 1 + N_det_delta_ij = N_det END_PROVIDER BEGIN_PROVIDER [ double precision, delta_ij, (N_states, N_det, 2) ] diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 184ef94c..734c1f31 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -41,7 +41,7 @@ subroutine run_dress_slave(thread,iproce,energy) ! double precision, external :: omp_get_wtime double precision :: time, time0 integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) - if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" +! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP" allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) allocate(cp(N_states, N_det, dress_N_cp, 2)) diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 3e5610c8..38e3f293 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -28,6 +28,7 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo From 52ca18c1525cbe51f290813a4706928f227af142 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 20:47:11 +0200 Subject: [PATCH 33/35] uninitialized variable --- plugins/dress_zmq/run_dress_slave.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 734c1f31..2c358a5f 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -50,7 +50,7 @@ double precision :: time, time0 edI = 0d0 f = 0 delta_det = 0d0 - + cp = 0d0 task = CHAR(0) call omp_init_lock(sending) From bccad69c7785d79eb079bd15cd8d7143503ba9b5 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 20:47:11 +0200 Subject: [PATCH 34/35] uninitialized variable --- plugins/dress_zmq/run_dress_slave.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f index 734c1f31..2c358a5f 100644 --- a/plugins/dress_zmq/run_dress_slave.irp.f +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -50,7 +50,7 @@ double precision :: time, time0 edI = 0d0 f = 0 delta_det = 0d0 - + cp = 0d0 task = CHAR(0) call omp_init_lock(sending) From cfa8e1dc34c78f666aad677d2362a0b23e738e55 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Tue, 4 Sep 2018 20:50:09 +0200 Subject: [PATCH 35/35] restored relative_error --- plugins/dress_zmq/dress_stoch_routines.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f index 3fc128f7..0192c4da 100644 --- a/plugins/dress_zmq/dress_stoch_routines.irp.f +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -473,7 +473,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, error = eqt endif m += 1 - if(eqt <=0d0* relative_error) then + if(eqt <= relative_error) then integer, external :: zmq_put_dvector i= zmq_put_dvector(zmq_to_qp_run_socket, worker_id, "ending", dble(m-1), 1) found = .true.