From d78f64732a5493d7f10c7c80b564005e63a133fc Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 29 Aug 2018 11:30:19 +0200 Subject: [PATCH] pt2_stoch re-implemented --- plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f | 659 ++++++------------ plugins/Full_CI_ZMQ/run_pt2_slave.irp.f | 20 +- plugins/Full_CI_ZMQ/run_selection_slave.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 14 +- 4 files changed, 234 insertions(+), 461 deletions(-) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f index 73d71365..0b26ab33 100644 --- a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -1,8 +1,31 @@ -BEGIN_PROVIDER [ integer, fragment_first ] - implicit none - fragment_first = first_det_of_teeth(1) +BEGIN_PROVIDER [ integer, pt2_stoch_istate ] + implicit none + BEGIN_DOC + ! State for stochatsic PT2 + END_DOC + pt2_stoch_istate = 1 END_PROVIDER + + BEGIN_PROVIDER [ integer, pt2_N_teeth ] +&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ] +&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ] +&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ] + implicit none + pt2_F(:) = 1 + pt2_F(:N_det_generators/100 + 1) = 1 + pt2_n_tasks_max = N_det_generators/100 + 1 + + if(N_det_generators < 256) then + pt2_minDetInFirstTeeth = 1 + pt2_N_teeth = 1 + else + pt2_minDetInFirstTeeth = 5 + pt2_N_teeth = 16 + end if +END_PROVIDER + + subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) use f77_zmq use selection_types @@ -11,22 +34,15 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) character(len=64000) :: task integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull - type(selection_buffer) :: b integer, external :: omp_get_thread_num double precision, intent(in) :: relative_error, absolute_error, E(N_states) double precision, intent(out) :: pt2(N_states),error(N_states) - double precision, allocatable :: pt2_detail(:,:), comb(:) - logical, allocatable :: computed(:) - integer, allocatable :: tbc(:) - integer :: i, j, k, Ncomb, i_generator_end - integer, external :: pt2_find + integer :: i, j, k - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) double precision, external :: omp_get_wtime double precision :: state_average_weight_save(N_states), w(N_states) - double precision :: time integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket if (N_det < max(10,N_states)) then @@ -41,26 +57,9 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) state_average_weight(:) = 0.d0 state_average_weight(pt2_stoch_istate) = 1.d0 TOUCH state_average_weight - - allocate(pt2_detail(N_states,N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc)) - sumabove = 0d0 - sum2above = 0d0 - Nabove = 0d0 - - provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight psi_selectors - - computed = .false. - - tbc(0) = first_det_of_comb - 1 - do i=1, tbc(0) - tbc(i) = i - computed(i) = .true. - end do - - Ncomb=size(comb) - call get_carlo_workbatch(computed, comb, Ncomb, tbc) - pt2_detail = 0d0 + provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors + print *, '========== ================= ================= =================' print *, ' Samples Energy Stat. Error Seconds ' print *, '========== ================= ================= =================' @@ -83,41 +82,18 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then stop 'Unable to put energy on ZMQ server' endif - call create_selection_buffer(1, 1*2, b) - integer :: ipos - ipos=1 - integer, external :: add_task_to_taskserver - do i=1,tbc(0) - if(tbc(i) > fragment_first) then - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 + + do i=1,N_det_generators + do j=1,pt2_F(i) !!!!!!!!!!!! + write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then + stop 'Unable to add task to task server' endif - else - do j=1,fragment_count - write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i) - ipos += 20 - if (ipos > 63980) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - ipos=1 - endif - end do - end if + end do end do - if (ipos > 1) then - if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then - stop 'Unable to add task to task server' - endif - endif integer, external :: zmq_set_running if (zmq_set_running(zmq_to_qp_run_socket) == -1) then @@ -129,18 +105,16 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) !$OMP PRIVATE(i) i = omp_get_thread_num() if (i==0) then - call pt2_collector(zmq_socket_pull,E(pt2_stoch_istate), b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, w, error) + call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, absolute_error, w, error) pt2(pt2_stoch_istate) = w(pt2_stoch_istate) else call pt2_slave_inproc(i) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2') - call delete_selection_buffer(b) print *, '========== ================= ================= =================' - deallocate(pt2_detail, comb, computed, tbc) enddo FREE pt2_stoch_istate state_average_weight(:) = state_average_weight_save(:) @@ -153,34 +127,6 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error) end subroutine -subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) - integer, intent(in) :: tbc(0:size_tbc), Ncomb - logical, intent(in) :: computed(N_det_generators) - double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states,N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - integer :: i, dets(comb_teeth) - double precision :: myVal, myVal2 - - mainLoop : do i=1,Ncomb - call get_comb(comb(i), dets, comb_teeth) - do j=1,comb_teeth - if(.not.(computed(dets(j)))) then - exit mainLoop - end if - end do - - myVal = 0d0 - myVal2 = 0d0 - do j=comb_teeth,1,-1 - myVal += pt2_detail(pt2_stoch_istate,dets(j)) * pt2_weight_inv(dets(j)) * comb_step - sumabove(j) += myVal - sum2above(j) += myVal*myVal - Nabove(j) += 1 - end do - end do mainLoop -end subroutine - - subroutine pt2_slave_inproc(i) implicit none integer, intent(in) :: i @@ -188,197 +134,118 @@ subroutine pt2_slave_inproc(i) call run_pt2_slave(1,i,pt2_e0_denominator) end -subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, pt2,error) + +subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2, error) use f77_zmq use selection_types use bitmasks implicit none - integer, intent(in) :: Ncomb integer(ZMQ_PTR), intent(in) :: zmq_socket_pull - double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) - double precision, intent(in) :: comb(Ncomb), relative_error, absolute_error, E - logical, intent(inout) :: computed(N_det_generators) - integer, intent(in) :: tbc(0:size_tbc) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - double precision, intent(out) :: pt2(N_states),error(N_states) + double precision, intent(in) :: relative_error, absolute_error, E + double precision, intent(out) :: pt2(N_states), error(N_states) - type(selection_buffer), intent(inout) :: b - double precision, allocatable :: pt2_mwen(:,:) + double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:) integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, external :: zmq_delete_tasks + integer, external :: pt2_find_sample - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, n_tasks - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) + integer :: more, n, i, p, c, t, n_tasks, U integer, allocatable :: task_id(:) integer, allocatable :: index(:) - double precision :: time0 - double precision :: time, timeLast, Nabove_old + double precision, external :: omp_get_wtime - integer :: tooth, firstTBDcomb, orgTBDcomb, n_tasks_max - integer, allocatable :: parts_to_get(:) - logical, allocatable :: actually_computed(:) - double precision :: eqt - character*(512) :: task - Nabove_old = -1.d0 - n_tasks_max = N_det_generators/100+1 + double precision :: v, x, avg, eqt, E0 + double precision :: time, time0 - allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), & - pt2_mwen(N_states, n_tasks_max) ) - - pt2_mwen(1:N_states, 1:n_tasks_max) = 0.d0 - do i=1,N_det_generators - actually_computed(i) = computed(i) - enddo - - parts_to_get(:) = 1 - if(fragment_first > 0) then - do i=1,fragment_first - parts_to_get(i) = fragment_count - enddo - endif - - do i=1,tbc(0) - actually_computed(tbc(i)) = .false. - end do - - orgTBDcomb = int(Nabove(1)) - firstTBDcomb = 1 + integer, allocatable :: f(:) + logical, allocatable :: d(:) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(n_tasks_max), index(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators)) + allocate(d(N_det_generators+1)) + allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max)) + allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1)) + + S(:) = 0d0 + S2(:) = 0d0 + n = 1 + t = 0 + U = 0 + eI(:,:) = 0d0 + f(:) = pt2_F(:) + d(:) = .false. + n_tasks = 0 + E0 = E more = 1 - call wall_time(time0) - timeLast = time0 + time0 = omp_get_wtime() - call get_first_tooth(actually_computed, tooth) - Nabove_old = Nabove(tooth) - - logical :: loop - loop = .True. - pullLoop : do while (loop) - - call pull_pt2_results(zmq_socket_pull, index, pt2_mwen, task_id, n_tasks) - do i=1,n_tasks - pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i) - parts_to_get(index(i)) -= 1 - if(parts_to_get(index(i)) < 0) then - print *, i, index(i), parts_to_get(index(i)) - print *, parts_to_get - stop "PARTS ??" - end if - if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. - enddo - - integer, external :: zmq_delete_tasks - if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then - stop 'Unable to delete tasks' - endif - if (more == 0) then - loop = .False. - endif - - time = omp_get_wtime() - - if(time - timeLast > 10d0 .or. (.not.loop)) then - timeLast = time - do i=1, first_det_of_teeth(1)-1 - if(.not.(actually_computed(i))) then - cycle pullLoop + do while (n <= N_det_generators) + if(f(pt2_J(n)) == 0) then + d(pt2_J(n)) = .true. + do while(d(U+1)) + U += 1 + end do + do while(t <= pt2_N_teeth) + if(U >= pt2_n_0(t+1)) then + t=t+1 + E0 = E + do i=pt2_n_0(t),1,-1 + E0 += eI(pt2_stoch_istate, i) + end do + else + exit end if end do - - integer, external :: zmq_abort - double precision :: E0, avg, prop - - call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) - firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1 - call get_first_tooth(actually_computed, tooth) - - if (firstTBDcomb > Ncomb) then - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (1)' - endif - endif - exit pullLoop - endif - - !if(Nabove(1) < 5d0) cycle - - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - if (tooth <= comb_teeth) then - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - else - eqt = 0.d0 - endif - call wall_time(time) - if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 30) then - ! Termination - pt2(pt2_stoch_istate) = avg - error(pt2_stoch_istate) = eqt - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - call sleep(1) - if (zmq_abort(zmq_to_qp_run_socket) == -1) then - print *, irp_here, ': Error in sending abort signal (2)' - endif - endif - else - if (Nabove(tooth) > Nabove_old) then - print *, loop - print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' - Nabove_old = Nabove(tooth) - endif - endif - end if - end do pullLoop -!<<<<<<< HEAD - if(tooth == comb_teeth+1) then - pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:)) - error(pt2_stoch_istate) = 0d0 - else - E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) - prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) - E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop - pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) - error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - end if - -!======= -! -! E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1)) -! prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1)) -! prop = prop * pt2_weight_inv(first_det_of_teeth(tooth)) -! E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop -! pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth)) -! -!>>>>>>> master + c = pt2_R(n) + if(c /= 0) then + x = 0d0 + do p=pt2_N_teeth, 1, -1 + v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) + i = pt2_find_sample(v, pt2_cW) + x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) + S(p) += x + S2(p) += x**2 + end do + avg = S(t) / dble(c) + eqt = (S2(t) / c) - (S(t)/c)**2 + eqt = sqrt(eqt / dble(c-1)) + pt2(pt2_stoch_istate) = E0-E+avg + error(pt2_stoch_istate) = eqt + time = omp_get_wtime() + print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, '' + end if + n += 1 + else if(more == 0) then + exit + else + call pull_pt2_results(zmq_socket_pull, index, eI_task, task_id, n_tasks) + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then + stop 'Unable to delete tasks' + endif + do i=1,n_tasks + eI(:, index(i)) += eI_task(:, i) + f(index(i)) -= 1 + end do + end if + end do + print *, "TOTAL", sum(eI) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call sort_selection_buffer(b) end subroutine -integer function pt2_find(v, w, sze, imin, imax) + +integer function pt2_find_sample(v, w) implicit none - integer, intent(in) :: sze, imin, imax - double precision, intent(in) :: v, w(sze) + double precision, intent(in) :: v, w(0:N_det_generators) integer :: i,l,h integer, parameter :: block=64 - l = imin - h = imax-1 + l = 0 + h = N_det_generators do while(h-l >= block) i = ishft(h+l,-1) @@ -389,217 +256,131 @@ integer function pt2_find(v, w, sze, imin, imax) end if end do !DIR$ LOOP COUNT (64) - do pt2_find=l,h - if(w(pt2_find) >= v) then + do pt2_find_sample=l,h + if(w(pt2_find_sample) >= v) then exit end if end do end function -BEGIN_PROVIDER [ integer, comb_teeth ] + + BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] +&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] +&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] implicit none - comb_teeth = 100 + integer :: N_c, N_j, U, t, i + double precision :: v + logical, allocatable :: d(:) + integer, external :: pt2_find_sample + + allocate(d(N_det_generators)) + + pt2_R(:) = 0 + N_c = 0 + N_j = pt2_n_0(1) + d(:) = .false. + + do i=1,N_j + d(i) = .true. + pt2_J(i) = i + end do + + call RANDOM_NUMBER(pt2_u) + + U = 0 + + do while(N_j < N_det_generators) + !ADD_COMB + N_c += 1 + do t=0, pt2_N_teeth-1 + v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) + i = pt2_find_sample(v, pt2_cW) + if(.not. d(i)) then + N_j += 1 + pt2_J(N_j) = i + d(i) = .true. + end if + end do + + pt2_R(N_j) = N_c + + !FILL_TOOTH + do while(U < N_det_generators) + U += 1 + if(.not. d(U)) then + N_j += 1 + pt2_J(N_j) = U + d(U) = .true. + exit; + end if + end do + enddo + if(N_det_generators > 1) then + pt2_R(N_det_generators-1) = 0 + pt2_R(N_det_generators) = N_c + end if END_PROVIDER - -subroutine get_first_tooth(computed, first_teeth) + BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] +&BEGIN_PROVIDER [ double precision, pt2_W_T ] +&BEGIN_PROVIDER [ double precision, pt2_u_0 ] +&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ] implicit none - logical, intent(in) :: computed(N_det_generators) - integer, intent(out) :: first_teeth - integer :: i, first_det + integer :: i, t + double precision, allocatable :: tilde_w(:), tilde_cW(:) + double precision :: r, tooth_width + integer, external :: pt2_find_sample - first_det = N_det_generators+1+1 - first_teeth = 1 - do i=first_det_of_comb, N_det_generators - if(.not.(computed(i))) then - first_det = i + allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators)) + + tilde_cW(0) = 0d0 + + do i=1,N_det_generators + tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2 + tilde_cW(i) = tilde_cW(i-1) + tilde_w(i) + enddo + + pt2_n_0(1) = 0 + do + pt2_u_0 = tilde_cW(pt2_n_0(1)) + r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth) + pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth) + if(pt2_W_T >= r - pt2_u_0) then exit end if - end do - - do i=comb_teeth+1, 1, -1 - if(first_det_of_teeth(i) < first_det) then - first_teeth = i - exit + pt2_n_0(1) += 1 + if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then + stop "teeth building failed" end if end do - -end subroutine - - -BEGIN_PROVIDER [ integer*8, size_tbc ] - implicit none - BEGIN_DOC -! Size of the tbc array - END_DOC - size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first -END_PROVIDER - -subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc) - implicit none - integer, intent(inout) :: Ncomb - double precision, intent(out) :: comb(Ncomb) - integer, intent(inout) :: tbc(0:size_tbc) - logical, intent(inout) :: computed(N_det_generators) - integer :: i, j, last_full, dets(comb_teeth) - integer :: icount, n - integer :: k, l - l=first_det_of_comb - call RANDOM_NUMBER(comb) - do i=1,size(comb) - comb(i) = comb(i) * comb_step - !DIR$ FORCEINLINE - call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth) - Ncomb = i - if (tbc(0) == N_det_generators) return - do while (computed(l)) - l=l+1 - enddo - k=tbc(0)+1 - tbc(k) = l - computed(l) = .True. - tbc(0) = k - enddo - -end subroutine - - - -subroutine get_comb(stato, dets, ct) - implicit none - integer, intent(in) :: ct - double precision, intent(in) :: stato - integer, intent(out) :: dets(ct) - double precision :: curs - integer :: j - integer, external :: pt2_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - !DIR$ FORCEINLINE - dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) - curs -= comb_step + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + do t=2, pt2_N_teeth + r = pt2_u_0 + pt2_W_T * dble(t-1) + pt2_n_0(t) = pt2_find_sample(r, tilde_cW) end do -end subroutine - - -subroutine add_comb(comb, computed, tbc, stbc, ct) - implicit none - integer*8, intent(in) :: stbc - integer, intent(in) :: ct - double precision, intent(in) :: comb - logical, intent(inout) :: computed(N_det_generators) - integer, intent(inout) :: tbc(0:stbc) - integer :: i, k, l, dets(ct) - - !DIR$ FORCEINLINE - call get_comb(comb, dets, ct) - - k=tbc(0)+1 - do i = 1, ct - l = dets(i) - if(.not.(computed(l))) then - tbc(k) = l - k = k+1 - computed(l) = .true. - end if + pt2_n_0(pt2_N_teeth+1) = N_det_generators + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1)) + do t=1, pt2_N_teeth + tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t)) + do i=pt2_n_0(t)+1, pt2_n_0(t+1) + pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width + end do end do - tbc(0) = k-1 -end subroutine - - -BEGIN_PROVIDER [ integer, pt2_stoch_istate ] - implicit none - BEGIN_DOC - ! State for stochatsic PT2 - END_DOC - pt2_stoch_istate = 1 -END_PROVIDER - - - BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: pt2_find - - pt2_weight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 - pt2_cweight(1) = psi_coef_generators(1,pt2_stoch_istate)**2 + pt2_cW(0) = 0d0 do i=1,N_det_generators - pt2_weight(i) = psi_coef_generators(i,pt2_stoch_istate)**2 - enddo - - ! Important to loop backwards for numerical precision - pt2_cweight(N_det_generators) = pt2_weight(N_det_generators) - do i=N_det_generators-1,1,-1 - pt2_cweight(i) = pt2_weight(i) + pt2_cweight(i+1) + pt2_cW(i) = pt2_cW(i-1) + pt2_w(i) end do - - do i=1,N_det_generators - pt2_weight(i) = pt2_weight(i) / pt2_cweight(1) - pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(1) - enddo - - do i=1,N_det_generators-1 - pt2_cweight(i) = 1.d0 - pt2_cweight(i+1) - end do - pt2_cweight(N_det_generators) = 1.d0 - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - first_det_of_comb = 1 - do i=1,N_det_generators - if(pt2_weight(i)/norm_left < .5d0*comb_step) then - first_det_of_comb = i - exit - end if - norm_left -= pt2_weight(i) - end do - first_det_of_comb = max(2,first_det_of_comb) - call write_int(6, first_det_of_comb-1, 'Size of deterministic set') - - comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step - - stato = 1d0 - comb_step - iloc = N_det_generators - do i=comb_teeth, 1, -1 - integer :: iloc - iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc) - first_det_of_teeth(i) = iloc - stato -= comb_step - end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - if(first_det_of_teeth(1) /= first_det_of_comb) then - print *, 'Error in ', irp_here - stop "comb provider" - endif - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ] - implicit none - BEGIN_DOC -! Inverse of pt2_weight array - END_DOC - integer :: i - do i=1,N_det_generators - pt2_weight_inv(i) = 1.d0/pt2_weight(i) - enddo - + pt2_n_0(pt2_N_teeth+1) = N_det_generators END_PROVIDER - diff --git a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f index 88c8aacb..6808e553 100644 --- a/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_pt2_slave.irp.f @@ -22,12 +22,11 @@ subroutine run_pt2_slave(thread,iproc,energy) logical :: done double precision,allocatable :: pt2(:,:) - integer :: n_tasks, k, n_tasks_max + integer :: n_tasks, k integer, allocatable :: i_generator(:), subset(:) - n_tasks_max = N_det_generators/100+1 - allocate(task_id(n_tasks_max), task(n_tasks_max)) - allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max)) + allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) + allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -46,7 +45,7 @@ subroutine run_pt2_slave(thread,iproc,energy) done = .False. do while (.not.done) - n_tasks = min(n_tasks+1,n_tasks_max) + n_tasks = min(n_tasks+1,pt2_n_tasks_max) integer, external :: get_tasks_from_taskserver if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then @@ -63,7 +62,7 @@ subroutine run_pt2_slave(thread,iproc,energy) do k=1,n_tasks pt2(:,k) = 0.d0 buf%cur = 0 - call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k)) + call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k))) enddo integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then @@ -190,12 +189,5 @@ IRP_ENDIF end subroutine - -BEGIN_PROVIDER [ double precision, pt2_workload, (N_det_generators) ] - integer :: i - do i=1,N_det_generators - pt2_workload(i) = dfloat(N_det_generators - i + 1)**2 - end do - pt2_workload = pt2_workload / sum(pt2_workload) -END_PROVIDER + diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f index 464f0a9f..c6f0fbd3 100644 --- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f @@ -60,7 +60,7 @@ subroutine run_selection_slave(thread,iproc,energy) else ASSERT (N == buf%N) end if - call select_connected(i_generator,energy,pt2,buf,0) + call select_connected(i_generator,energy,pt2,buf,1,1) endif integer, external :: task_done_to_taskserver diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 2463b762..79ed1746 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -46,11 +46,11 @@ subroutine get_mask_phase(det, phasemask) end subroutine -subroutine select_connected(i_generator,E0,pt2,b,subset) +subroutine select_connected(i_generator,E0,pt2,b,subset,csubset) use bitmasks use selection_types implicit none - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset type(selection_buffer), intent(inout) :: b double precision, intent(inout) :: pt2(N_states) integer :: k,l @@ -71,7 +71,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) ) enddo - call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset) + call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset,csubset) enddo deallocate(fock_diag_tmp) end subroutine @@ -254,7 +254,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) end -subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset) +subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset,csubset) use bitmasks use selection_types implicit none @@ -262,7 +262,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d ! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC - integer, intent(in) :: i_generator, subset + integer, intent(in) :: i_generator, subset, csubset integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision, intent(in) :: fock_diag_tmp(mo_tot_num) double precision, intent(in) :: E0(N_states) @@ -286,7 +286,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d integer(bit_kind), allocatable:: preinteresting_det(:,:,:) allocate (preinteresting_det(N_int,2,N_det)) - PROVIDE fragment_count + !PROVIDE fragment_count monoAdo = .true. monoBdo = .true. @@ -559,7 +559,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d end if maskInd += 1 - if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then + if(mod(maskInd, csubset) == (subset-1)) then call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting) if(fullMatch) cycle