diff --git a/plugins/Full_CI_ZMQ/energy.irp.f b/plugins/Full_CI_ZMQ/energy.irp.f index db1e7d1a..7b7cc75b 100644 --- a/plugins/Full_CI_ZMQ/energy.irp.f +++ b/plugins/Full_CI_ZMQ/energy.irp.f @@ -1,11 +1,23 @@ +BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ] + implicit none + BEGIN_DOC + ! If true, initialize pt2_E0_denominator + END_DOC + initialize_pt2_E0_denominator = .True. +END_PROVIDER + BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ] implicit none BEGIN_DOC ! E0 in the denominator of the PT2 END_DOC - pt2_E0_denominator(1:N_states) = CI_electronic_energy(1:N_states) + if (initialize_pt2_E0_denominator) then + pt2_E0_denominator(1:N_states) = CI_electronic_energy(1:N_states) ! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion ! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) - call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator') + call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator') + else + pt2_E0_denominator = -huge(1.d0) + endif END_PROVIDER diff --git a/plugins/Full_CI_ZMQ/fci_routines.irp.f b/plugins/Full_CI_ZMQ/fci_routines.irp.f new file mode 100644 index 00000000..d285912a --- /dev/null +++ b/plugins/Full_CI_ZMQ/fci_routines.irp.f @@ -0,0 +1,119 @@ +subroutine ZMQ_selection(N_in, pt2) + use f77_zmq + use selection_types + + implicit none + + character*(512) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + integer, intent(in) :: N_in + type(selection_buffer) :: b + integer :: i, N + integer, external :: omp_get_thread_num + double precision, intent(out) :: pt2(N_states) + + + N = max(N_in,1) + provide nproc + provide ci_electronic_energy + call new_parallel_job(zmq_to_qp_run_socket,"selection") + call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(N, N*8, b) + + integer :: i_generator, i_generator_start, i_generator_max, step +! step = int(max(1.,10*elec_num/mo_tot_num) + + step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) + step = max(1,step) + do i= N_det_generators, 1, -step + i_generator_start = max(i-step+1,1) + i_generator_max = i + write(task,*) i_generator_start, i_generator_max, 1, N + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + + !$OMP PARALLEL DEFAULT(none) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) shared(ci_electronic_energy_is_built, n_det_generators_is_built, n_states_is_built, n_int_is_built, nproc_is_built) + i = omp_get_thread_num() + if (i==0) then + call selection_collector(b, pt2) + else + call selection_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'selection') + if (N_in > 0) then + call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN + call copy_H_apply_buffer_to_wf() + endif +end subroutine + + +subroutine selection_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_selection_slave(1,i,ci_electronic_energy) +end + +subroutine pt2_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_pt2_slave(1,i,ci_electronic_energy) +end + +subroutine selection_collector(b, pt2) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + type(selection_buffer), intent(inout) :: b + double precision, intent(out) :: pt2(N_states) + double precision :: pt2_mwen(N_states) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, N, ntask + double precision, allocatable :: val(:) + integer(bit_kind), allocatable :: det(:,:,:) + integer, allocatable :: task_id(:) + integer :: done + real :: time, time0 + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) + done = 0 + more = 1 + pt2(:) = 0d0 + call CPU_TIME(time0) + do while (more == 1) + call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) + pt2 += pt2_mwen + do i=1, N + call add_to_selection_buffer(b, det(1,1,i), val(i)) + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) + end do + done += ntask + call CPU_TIME(time) +! print *, "DONE" , done, time - time0 + end do + + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + call sort_selection_buffer(b) +end subroutine + diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 6a89fa75..016f5ff8 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -103,7 +103,9 @@ program fci_zmq threshold_generators = 1d0 E_CI_before(1:N_states) = CI_energy(1:N_states) !call ZMQ_selection(0, pt2)! pour non-stochastic - call ZMQ_pt2(pt2) + double precision :: relative_error + relative_error=1.d-2 + call ZMQ_pt2(pt2,relative_error) print *, 'Final step' print *, 'N_det = ', N_det print *, 'N_states = ', N_states @@ -122,511 +124,3 @@ program fci_zmq end -subroutine ZMQ_pt2(pt2) - use f77_zmq - use selection_types - - implicit none - - character*(512) :: task - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - type(selection_buffer) :: b - integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - - - double precision, allocatable :: pt2_detail(:,:), comb(:) - logical, allocatable :: computed(:) - integer, allocatable :: tbc(:) - integer :: i, j, Ncomb, generator_per_task, i_generator_end - integer, external :: pt2_find - - double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - double precision, external :: omp_get_wtime - double precision :: time0, time - - allocate(pt2_detail(N_states, N_det_generators), comb(100000), computed(N_det_generators), tbc(0:N_det_generators)) - provide nproc - - !call random_seed() - - computed = .false. - tbc(0) = first_det_of_comb - 1 - do i=1, tbc(0) - tbc(i) = i - computed(i) = .true. - end do - - pt2_detail = 0d0 - - time0 = omp_get_wtime() - print *, "grep - time - avg - err - n_combs" - do while(.true.) - - call new_parallel_job(zmq_to_qp_run_socket,"pt2") - call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(1, 1*2, b) - - - - - - - call get_carlo_workbatch(1d-3, computed, comb, Ncomb, tbc) - generator_per_task = 1 ! tbc(0)/300 + 1 - !do i=1,tbc(0),generator_per_task - do i=1,tbc(0) ! generator_per_task - i_generator_end = min(i+generator_per_task-1, tbc(0)) - !print *, "TASK", (i_generator_end-i+1), tbc(i:i_generator_end) - if(i > 10) then - integer :: zero - zero = 0 - write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - else - do j=1,8 - write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - end if - end do - - !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) - i = omp_get_thread_num() - if (i==0) then - call pt2_collector(b, pt2_detail) - else - call pt2_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'pt2') - double precision :: E0, avg, eqt - call do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) - !END LOOP? - integer :: tooth - call get_first_tooth(computed, tooth) - !print *, "TOOTH ", tooth - - !!! ASSERT - !do i=1,first_det_of_teeth(tooth) - ! if(not(computed(i))) stop "deter non calc" - !end do - !logical :: ok - !ok = .false. - !do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1) - ! if(not(computed(i))) ok = .true. - !end do - !if(not(ok)) stop "not OK..." - !!!!! - double precision :: prop - if(Nabove(tooth) >= 30) then - E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) - prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) - prop = prop / weight(first_det_of_teeth(tooth)) - E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop - avg = E0 + (sumabove(tooth) / Nabove(tooth)) - eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) - time = omp_get_wtime() - print *, 'PT2stoch ', real (time - time0), real(avg), real(eqt), real(Nabove(tooth)) - else - print *, Nabove(tooth), "< 30 combs" - end if - tbc(0) = 0 - end do - - pt2 = 0d0 -end subroutine - - -subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, sumabove, sum2above, Nabove) - integer, intent(in) :: tbc(0:N_det_generators), Ncomb - double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators) - double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) - integer :: i, dets(comb_teeth) - double precision :: myVal, myVal2 - - - do i=1,Ncomb - call get_comb(comb(i), dets) - myVal = 0d0 - myVal2 = 0d0 - do j=comb_teeth,1,-1 - myVal += pt2_detail(1, dets(j)) / weight(dets(j)) * comb_step - sumabove(j) += myVal - sum2above(j) += myVal**2 - Nabove(j) += 1 - end do - end do -end subroutine - - -subroutine ZMQ_selection(N_in, pt2) - use f77_zmq - use selection_types - - implicit none - - character*(512) :: task - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - integer, intent(in) :: N_in - type(selection_buffer) :: b - integer :: i, N - integer, external :: omp_get_thread_num - double precision, intent(out) :: pt2(N_states) - - - N = max(N_in,1) - provide nproc - provide ci_electronic_energy - call new_parallel_job(zmq_to_qp_run_socket,"selection") - call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) - call zmq_set_running(zmq_to_qp_run_socket) - call create_selection_buffer(N, N*8, b) - - integer :: i_generator, i_generator_start, i_generator_max, step -! step = int(max(1.,10*elec_num/mo_tot_num) - - step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) - step = max(1,step) - do i= N_det_generators, 1, -step - i_generator_start = max(i-step+1,1) - i_generator_max = i - write(task,*) i_generator_start, i_generator_max, 1, N - call add_task_to_taskserver(zmq_to_qp_run_socket,task) - end do - - !$OMP PARALLEL DEFAULT(none) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) shared(ci_electronic_energy_is_built, n_det_generators_is_built, n_states_is_built, n_int_is_built, nproc_is_built) - i = omp_get_thread_num() - if (i==0) then - call selection_collector(b, pt2) - else - call selection_slave_inproc(i) - endif - !$OMP END PARALLEL - call end_parallel_job(zmq_to_qp_run_socket, 'selection') - if (N_in > 0) then - call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN - call copy_H_apply_buffer_to_wf() - endif -end subroutine - - -subroutine selection_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_selection_slave(1,i,ci_electronic_energy) -end - -subroutine pt2_slave_inproc(i) - implicit none - integer, intent(in) :: i - - call run_pt2_slave(1,i,ci_electronic_energy) -end - -subroutine pt2_collector(b, pt2_detail) - use f77_zmq - use selection_types - use bitmasks - implicit none - - - type(selection_buffer), intent(inout) :: b - double precision, intent(inout) :: pt2_detail(N_states, N_det) - double precision :: pt2_mwen(N_states, N_det) - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - integer(ZMQ_PTR) :: zmq_socket_pull - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, ntask - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) - integer, allocatable :: task_id(:) - integer :: done, Nindex - integer, allocatable :: index(:) - real :: time, time0 - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) - done = 0 - more = 1 - !pt2_detail = 0d0 - call CPU_TIME(time0) - do while (more == 1) - call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) - do i=1,Nindex - pt2_detail(:, index(i)) += pt2_mwen(:,i) - end do - - !do i=1, N - ! call add_to_selection_buffer(b, det(1,1,i), val(i)) - !end do - - do i=1, ntask - if(task_id(i) == 0) then - print *, "Error in collector" - endif - call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) - end do - done += ntask - call CPU_TIME(time) -! print *, "DONE" , done, time - time0 - end do - - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_pull_socket(zmq_socket_pull) - call sort_selection_buffer(b) -end subroutine - - -subroutine selection_collector(b, pt2) - use f77_zmq - use selection_types - use bitmasks - implicit none - - - type(selection_buffer), intent(inout) :: b - double precision, intent(out) :: pt2(N_states) - double precision :: pt2_mwen(N_states) - integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket - integer(ZMQ_PTR) :: zmq_to_qp_run_socket - - integer(ZMQ_PTR), external :: new_zmq_pull_socket - integer(ZMQ_PTR) :: zmq_socket_pull - - integer :: msg_size, rc, more - integer :: acc, i, j, robin, N, ntask - double precision, allocatable :: val(:) - integer(bit_kind), allocatable :: det(:,:,:) - integer, allocatable :: task_id(:) - integer :: done - real :: time, time0 - zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - zmq_socket_pull = new_zmq_pull_socket() - allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det)) - done = 0 - more = 1 - pt2(:) = 0d0 - call CPU_TIME(time0) - do while (more == 1) - call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask) - pt2 += pt2_mwen - do i=1, N - call add_to_selection_buffer(b, det(1,1,i), val(i)) - end do - - do i=1, ntask - if(task_id(i) == 0) then - print *, "Error in collector" - endif - call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) - end do - done += ntask - call CPU_TIME(time) -! print *, "DONE" , done, time - time0 - end do - - - call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) - call end_zmq_pull_socket(zmq_socket_pull) - call sort_selection_buffer(b) -end subroutine - - - -integer function pt2_find(v, w) - implicit none - double precision :: v, w(N_det) - integer :: i,l,h - - l = 0 - h = N_det-1 - - do while(h >= l) - i = (h+l)/2 - if(w(i+1) > v) then - h = i-1 - else - l = i+1 - end if - end do - pt2_find = l+1 -end function - - -BEGIN_PROVIDER [ integer, comb_teeth ] - implicit none - comb_teeth = 100 -END_PROVIDER - - - -subroutine get_first_tooth(computed, first_teeth) - implicit none - logical, intent(in) :: computed(N_det_generators) - integer, intent(out) :: first_teeth - integer :: i - - first_teeth = 1 - do i=first_det_of_comb, N_det_generators - if(not(computed(i))) then - first_teeth = i - exit - end if - end do - - do i=comb_teeth, 1, -1 - if(first_det_of_teeth(i) < first_teeth) then - first_teeth = i - exit - end if - end do -end subroutine - - -subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) - implicit none - double precision, intent(in) :: maxWorkload - double precision, intent(out) :: comb(N_det_generators) - integer, intent(inout) :: tbc(0:N_det_generators) - integer, intent(out) :: Ncomb - logical, intent(inout) :: computed(N_det_generators) - integer :: i, dets(comb_teeth) - double precision :: myWorkload - - myWorkload = 0d0 - - do i=1,size(comb) - call RANDOM_NUMBER(comb(i)) - comb(i) = comb(i) * comb_step - call add_comb(comb(i), computed, tbc, myWorkload) - Ncomb = i - if(myWorkload > maxWorkload .and. i >= 50) exit - end do - call reorder_tbc(tbc) -end subroutine - - -subroutine reorder_tbc(tbc) - implicit none - integer, intent(inout) :: tbc(0:N_det_generators) - logical, allocatable :: ltbc(:) - integer :: i, ci - - allocate(ltbc(N_det_generators)) - ltbc = .false. - do i=1,tbc(0) - ltbc(tbc(i)) = .true. - end do - - ci = 0 - do i=1,N_det_generators - if(ltbc(i)) then - ci += 1 - tbc(ci) = i - end if - end do -end subroutine - - -subroutine get_comb(stato, dets) - implicit none - double precision, intent(in) :: stato - integer, intent(out) :: dets(comb_teeth) - double precision :: curs - integer :: j - integer, external :: pt2_find - - curs = 1d0 - stato - do j = comb_teeth, 1, -1 - dets(j) = pt2_find(curs, cweight) - curs -= comb_step - end do -end subroutine - - -subroutine add_comb(comb, computed, tbc, workload) - implicit none - double precision, intent(in) :: comb - logical, intent(inout) :: computed(N_det_generators) - double precision, intent(inout) :: workload - integer, intent(inout) :: tbc(0:N_det_generators) - integer :: i, dets(comb_teeth) - - call get_comb(comb, dets) - - do i = 1, comb_teeth - if(not(computed(dets(i)))) then - tbc(0) += 1 - tbc(tbc(0)) = dets(i) - workload += comb_workload(dets(i)) - computed(dets(i)) = .true. - end if - end do -end subroutine - - - - BEGIN_PROVIDER [ double precision, weight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, cweight, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_workload, (N_det_generators) ] -&BEGIN_PROVIDER [ double precision, comb_step ] -&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] -&BEGIN_PROVIDER [ integer, first_det_of_comb ] - implicit none - integer :: i - double precision :: norm_left, stato - integer, external :: pt2_find - - weight(1) = psi_coef_generators(1,1)**2 - cweight(1) = psi_coef_generators(1,1)**2 - - do i=2,N_det_generators - weight(i) = psi_coef_generators(i,1)**2 - cweight(i) = cweight(i-1) + psi_coef_generators(i,1)**2 - end do - - weight = weight / cweight(N_det_generators) - cweight = cweight / cweight(N_det_generators) - comb_workload = 1d0 / dfloat(N_det_generators) - - norm_left = 1d0 - - comb_step = 1d0/dfloat(comb_teeth) - do i=1,N_det_generators - if(weight(i)/norm_left < comb_step/2d0) then - first_det_of_comb = i - exit - end if - norm_left -= weight(i) - end do - - comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - cweight(first_det_of_comb-1)) - - stato = 1d0 - comb_step! + 1d-5 - do i=comb_teeth, 1, -1 - first_det_of_teeth(i) = pt2_find(stato, cweight) - stato -= comb_step - end do - first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 - first_det_of_teeth(1) = first_det_of_comb - if(first_det_of_teeth(1) /= first_det_of_comb) stop "comb provider" - -END_PROVIDER - - - - - - - - - - diff --git a/plugins/Full_CI_ZMQ/pt2_slave.irp.f b/plugins/Full_CI_ZMQ/pt2_slave.irp.f index 91c3db63..e45bbc51 100644 --- a/plugins/Full_CI_ZMQ/pt2_slave.irp.f +++ b/plugins/Full_CI_ZMQ/pt2_slave.irp.f @@ -13,7 +13,6 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context -! PROVIDE ci_electronic_energy mo_tot_num N_int end subroutine run_wf @@ -47,7 +46,7 @@ subroutine run_wf ! --------- print *, 'PT2' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() @@ -60,29 +59,6 @@ subroutine run_wf end do end -subroutine update_energy(energy) - implicit none - double precision, intent(in) :: energy(N_states_diag) - BEGIN_DOC -! Update energy when it is received from ZMQ - END_DOC - integer :: j,k - do j=1,N_states - do k=1,N_det - CI_eigenvectors(k,j) = psi_coef(k,j) - enddo - enddo - call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) - if (.True.) then - do k=1,size(ci_electronic_energy) - ci_electronic_energy(k) = energy(k) - enddo - TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors - endif - - call write_double(6,ci_energy,'Energy') -end - subroutine pt2_slave_tcp(i,energy) implicit none double precision, intent(in) :: energy(N_states_diag) diff --git a/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f new file mode 100644 index 00000000..47651e22 --- /dev/null +++ b/plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f @@ -0,0 +1,429 @@ +subroutine ZMQ_pt2(pt2,relative_error) + use f77_zmq + use selection_types + + implicit none + + character*(512) :: task + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + type(selection_buffer) :: b + integer, external :: omp_get_thread_num + double precision, intent(in) :: relative_error + double precision, intent(out) :: pt2(N_states) + + + double precision, allocatable :: pt2_detail(:,:), comb(:) + logical, allocatable :: computed(:) + integer, allocatable :: tbc(:) + integer :: i, j, Ncomb, generator_per_task, i_generator_end + integer, external :: pt2_find + + double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + double precision, external :: omp_get_wtime + double precision :: time0, time + + allocate(pt2_detail(N_states, N_det_generators), comb(100000), computed(N_det_generators), tbc(0:N_det_generators)) + sumabove = 0d0 + sum2above = 0d0 + Nabove = 0d0 + + provide nproc + + !call random_seed() + + computed = .false. + tbc(0) = first_det_of_comb - 1 + do i=1, tbc(0) + tbc(i) = i + computed(i) = .true. + end do + + pt2_detail = 0d0 + + time0 = omp_get_wtime() + print *, "grep - time - avg - err - n_combs" + do while(.true.) + + call new_parallel_job(zmq_to_qp_run_socket,"pt2") + call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator)) + call zmq_set_running(zmq_to_qp_run_socket) + call create_selection_buffer(1, 1*2, b) + + ! TODO PARAMETER : 1.d-2 + call get_carlo_workbatch(1d-2, computed, comb, Ncomb, tbc) + generator_per_task = 1 + do i=1,tbc(0) + i_generator_end = min(i+generator_per_task-1, tbc(0)) + if(tbc(i) > 10) then + integer :: zero + zero = 0 + write(task,*) (i_generator_end-i+1), zero, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + else + do j=1,8 + write(task,*) (i_generator_end-i+1), j, tbc(i:i_generator_end) + call add_task_to_taskserver(zmq_to_qp_run_socket,task) + end do + end if + end do + + !$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2, relative_error) PRIVATE(i) NUM_THREADS(nproc+1) + i = omp_get_thread_num() + if (i==0) then + call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2) + else + call pt2_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, 'pt2') + tbc(0) = 0 + if (pt2(1) /= 0.d0) then + exit + endif + end do + +end subroutine + + +subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove) + integer, intent(in) :: tbc(0:N_det_generators), Ncomb + logical, intent(in) :: computed(N_det_generators) + double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators) + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth) + integer :: i, dets(comb_teeth) + double precision :: myVal, myVal2 + + mainLoop : do i=1,Ncomb + call get_comb(comb(i), dets) + do j=1,comb_teeth + if(not(computed(dets(j)))) then + exit mainLoop + end if + end do + + myVal = 0d0 + myVal2 = 0d0 + do j=comb_teeth,1,-1 + myVal += pt2_detail(1, dets(j)) / weight(dets(j)) * comb_step + sumabove(j) += myVal + sum2above(j) += myVal**2 + Nabove(j) += 1 + end do + end do mainLoop +end subroutine + + +subroutine pt2_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_pt2_slave(1,i,pt2_e0_denominator) +end + +subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2) + use f77_zmq + use selection_types + use bitmasks + implicit none + + + integer, intent(in) :: Ncomb + double precision, intent(inout) :: pt2_detail(N_states, N_det_generators) + double precision, intent(in) :: comb(Ncomb) + logical, intent(inout) :: computed(N_det_generators) + integer, intent(in) :: tbc(0:N_det_generators) + double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth), relative_error + double precision, intent(out) :: pt2(N_states) + + + type(selection_buffer), intent(inout) :: b + double precision :: pt2_mwen(N_states, N_det) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + integer(ZMQ_PTR) :: zmq_socket_pull + + integer :: msg_size, rc, more + integer :: acc, i, j, robin, N, ntask + double precision, allocatable :: val(:) + integer(bit_kind), allocatable :: det(:,:,:) + integer, allocatable :: task_id(:) + integer :: done, Nindex + integer, allocatable :: index(:) + double precision :: time, time0, timeLast + double precision, external :: omp_get_wtime + integer :: tooth, firstTBDcomb, orgTBDcomb + integer, allocatable :: parts_to_get(:) + logical, allocatable :: actually_computed(:) + + allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) + actually_computed = computed + + parts_to_get(:) = 1 + parts_to_get(1:10) = 8 + + do i=1,tbc(0) + actually_computed(tbc(i)) = .false. + end do + + orgTBDcomb = Nabove(1) + firstTBDcomb = 1 + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_pull = new_zmq_pull_socket() + allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det), index(N_det)) + done = 0 + more = 1 + time0 = omp_get_wtime() + timeLast = time0 + + pullLoop : do while (more == 1) + call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask) + do i=1,Nindex + pt2_detail(:, index(i)) += pt2_mwen(:,i) + parts_to_get(index(i)) -= 1 + if(parts_to_get(index(i)) < 0) then + stop "PARTS ??" + end if + if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true. + end do + + do i=1, ntask + if(task_id(i) == 0) then + print *, "Error in collector" + endif + call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) + end do + done += ntask + + time = omp_get_wtime() + + if(time - timeLast > 30.0 .or. more /= 1) then + timeLast = time + do i=1, first_det_of_teeth(1)-1 + if(not(actually_computed(i))) then + print *, "PT2 : deterministic part not finished" + cycle pullLoop + end if + end do + + + double precision :: E0, avg, eqt, prop + call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove) + firstTBDcomb = Nabove(1) - orgTBDcomb + 1 + if(Nabove(1) < 2.0) cycle + call get_first_tooth(actually_computed, tooth) + + E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1)) + prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - cweight(first_det_of_teeth(tooth)-1)) + prop = prop / weight(first_det_of_teeth(tooth)) + E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop + avg = E0 + (sumabove(tooth) / Nabove(tooth)) + eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2)) + time = omp_get_wtime() + print "(A, 5(E15.7))", "PT2stoch ", time - time0, avg, eqt, Nabove(tooth) + if (dabs(eqt/avg) < relative_error) then + relative_error = 0.d0 + pt2(1) = avg + exit + endif + end if + end do pullLoop + + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_pull_socket(zmq_socket_pull) + call sort_selection_buffer(b) +end subroutine + + +integer function pt2_find(v, w) + implicit none + double precision :: v, w(N_det) + integer :: i,l,h + + l = 0 + h = N_det-1 + + do while(h >= l) + i = (h+l)/2 + if(w(i+1) > v) then + h = i-1 + else + l = i+1 + end if + end do + pt2_find = l+1 +end function + + +BEGIN_PROVIDER [ integer, comb_teeth ] + implicit none + comb_teeth = 100 +END_PROVIDER + + + +subroutine get_first_tooth(computed, first_teeth) + implicit none + logical, intent(in) :: computed(N_det_generators) + integer, intent(out) :: first_teeth + integer :: i + + first_teeth = 1 + do i=first_det_of_comb, N_det_generators + if(not(computed(i))) then + first_teeth = i + exit + end if + end do + + do i=comb_teeth, 1, -1 + if(first_det_of_teeth(i) < first_teeth) then + first_teeth = i + exit + end if + end do +end subroutine + + +subroutine get_carlo_workbatch(maxWorkload, computed, comb, Ncomb, tbc) + implicit none + double precision, intent(in) :: maxWorkload + double precision, intent(out) :: comb(N_det_generators) + integer, intent(inout) :: tbc(0:N_det_generators) + integer, intent(out) :: Ncomb + logical, intent(inout) :: computed(N_det_generators) + integer :: i, dets(comb_teeth) + double precision :: myWorkload + + myWorkload = 0d0 + + do i=1,size(comb) + call RANDOM_NUMBER(comb(i)) + comb(i) = comb(i) * comb_step + call add_comb(comb(i), computed, tbc, myWorkload) + Ncomb = i + if(myWorkload > maxWorkload .and. i >= 100) exit + end do + +end subroutine + + +subroutine reorder_tbc(tbc) + implicit none + integer, intent(inout) :: tbc(0:N_det_generators) + logical, allocatable :: ltbc(:) + integer :: i, ci + + allocate(ltbc(N_det_generators)) + ltbc = .false. + do i=1,tbc(0) + ltbc(tbc(i)) = .true. + end do + + ci = 0 + do i=1,N_det_generators + if(ltbc(i)) then + ci += 1 + tbc(ci) = i + end if + end do +end subroutine + + +subroutine get_comb(stato, dets) + implicit none + double precision, intent(in) :: stato + integer, intent(out) :: dets(comb_teeth) + double precision :: curs + integer :: j + integer, external :: pt2_find + + curs = 1d0 - stato + do j = comb_teeth, 1, -1 + dets(j) = pt2_find(curs, cweight) + curs -= comb_step + end do +end subroutine + + +subroutine add_comb(comb, computed, tbc, workload) + implicit none + double precision, intent(in) :: comb + logical, intent(inout) :: computed(N_det_generators) + double precision, intent(inout) :: workload + integer, intent(inout) :: tbc(0:N_det_generators) + integer :: i, dets(comb_teeth) + + call get_comb(comb, dets) + + do i = 1, comb_teeth + if(not(computed(dets(i)))) then + tbc(0) += 1 + tbc(tbc(0)) = dets(i) + workload += comb_workload(dets(i)) + computed(dets(i)) = .true. + end if + end do +end subroutine + + + + BEGIN_PROVIDER [ double precision, weight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, cweight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb_workload, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb_step ] +&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] +&BEGIN_PROVIDER [ integer, first_det_of_comb ] + implicit none + integer :: i + double precision :: norm_left, stato + integer, external :: pt2_find + + weight(1) = psi_coef_generators(1,1)**2 + cweight(1) = psi_coef_generators(1,1)**2 + + do i=2,N_det_generators + weight(i) = psi_coef_generators(i,1)**2 + cweight(i) = cweight(i-1) + psi_coef_generators(i,1)**2 + end do + + weight = weight / cweight(N_det_generators) + cweight = cweight / cweight(N_det_generators) + comb_workload = 1d0 / dfloat(N_det_generators) + + norm_left = 1d0 + + comb_step = 1d0/dfloat(comb_teeth) + do i=1,N_det_generators + if(weight(i)/norm_left < comb_step/2d0) then + first_det_of_comb = i + exit + end if + norm_left -= weight(i) + end do + + comb_step = 1d0 / dfloat(comb_teeth) * (1d0 - cweight(first_det_of_comb-1)) + + stato = 1d0 - comb_step! + 1d-5 + do i=comb_teeth, 1, -1 + first_det_of_teeth(i) = pt2_find(stato, cweight) + stato -= comb_step + end do + first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 + first_det_of_teeth(1) = first_det_of_comb + if(first_det_of_teeth(1) /= first_det_of_comb) stop "comb provider" + +END_PROVIDER + + + + + + + + + + diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index d6204cc3..695866ed 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -74,28 +74,6 @@ subroutine run_wf end do end -subroutine update_energy(energy) - implicit none - double precision, intent(in) :: energy(N_states) - BEGIN_DOC -! Update energy when it is received from ZMQ - END_DOC - integer :: j,k - do j=1,N_states - do k=1,N_det - CI_eigenvectors(k,j) = psi_coef(k,j) - enddo - enddo - call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) - if (.True.) then - do k=1,N_states - ci_electronic_energy(k) = energy(k) - enddo - TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors - endif - - call write_double(6,ci_energy,'Energy') -end subroutine selection_slave_tcp(i,energy) implicit none diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index 657ad63c..12a05601 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -60,28 +60,6 @@ subroutine run_wf end do end -subroutine update_energy(energy) - implicit none - double precision, intent(in) :: energy(N_states) - BEGIN_DOC -! Update energy when it is received from ZMQ - END_DOC - integer :: j,k - do j=1,N_states - do k=1,N_det - CI_eigenvectors(k,j) = psi_coef(k,j) - enddo - enddo - call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int) - if (.True.) then - do k=1,N_states - ci_electronic_energy(k) = energy(k) - enddo - TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors - endif - - call write_double(6,ci_energy,'Energy') -end subroutine selection_slave_tcp(i,energy) implicit none