From ff8bc16a9c07ccb185c99225968d1210ecd86dbd Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Fri, 15 Dec 2017 16:21:04 +0100 Subject: [PATCH] dress_zmq with null dressing --- plugins/dress_zmq/dress_general.irp.f | 134 ++++ plugins/dress_zmq/dress_slave.irp.f | 75 +++ plugins/dress_zmq/dress_stoch_routines.irp.f | 617 +++++++++++++++++++ plugins/dress_zmq/dress_zmq.irp.f | 24 + plugins/dress_zmq/dressing.irp.f | 109 ++++ plugins/dress_zmq/energy.irp.f | 23 + plugins/dress_zmq/run_dress_slave.irp.f | 174 ++++++ 7 files changed, 1156 insertions(+) create mode 100644 plugins/dress_zmq/dress_general.irp.f create mode 100644 plugins/dress_zmq/dress_slave.irp.f create mode 100644 plugins/dress_zmq/dress_stoch_routines.irp.f create mode 100644 plugins/dress_zmq/dress_zmq.irp.f create mode 100644 plugins/dress_zmq/dressing.irp.f create mode 100644 plugins/dress_zmq/energy.irp.f create mode 100644 plugins/dress_zmq/run_dress_slave.irp.f diff --git a/plugins/dress_zmq/dress_general.irp.f b/plugins/dress_zmq/dress_general.irp.f new file mode 100644 index 00000000..1f33e2d6 --- /dev/null +++ b/plugins/dress_zmq/dress_general.irp.f @@ -0,0 +1,134 @@ + + +subroutine run(N_st,energy) + implicit none + + integer, intent(in) :: N_st + double precision, intent(out) :: energy(N_st) + + integer :: i,j + + double precision :: E_new, E_old, delta_e + integer :: iteration + + integer :: n_it_dress_max + double precision :: thresh_dress + double precision, allocatable :: lambda(:) + allocate (lambda(N_states)) + + thresh_dress = thresh_dressed_ci + n_it_dress_max = n_it_max_dressed_ci + + if(n_it_dress_max == 1) then + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors_dressed(i,j) + enddo + enddo + SOFT_TOUCH psi_coef ci_energy_dressed + call write_double(6,ci_energy_dressed(1),"Final dress energy") + call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) + call save_wavefunction + else + E_new = 0.d0 + delta_E = 1.d0 + iteration = 0 + lambda = 1.d0 + do while (delta_E > thresh_dress) + iteration += 1 + print *, '===============================================' + print *, 'Iteration', iteration, '/', n_it_dress_max + print *, '===============================================' + print *, '' + E_old = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) + do i=1,N_st + call write_double(6,ci_energy_dressed(i),"Energy") + enddo + call diagonalize_ci_dressed(lambda) + E_new = dress_e0_denominator(1) !sum(ci_energy_dressed(1:N_states)) + + delta_E = (E_new - E_old)/dble(N_states) + print *, '' + call write_double(6,thresh_dress,"thresh_dress") + call write_double(6,delta_E,"delta_E") + delta_E = dabs(delta_E) + call save_wavefunction + call ezfio_set_mrcepa0_energy(ci_energy_dressed(1)) + if (iteration >= n_it_dress_max) then + exit + endif + enddo + call write_double(6,ci_energy_dressed(1),"Final energy") + endif + energy(1:N_st) = ci_energy_dressed(1:N_st) +end + + +subroutine print_cas_coefs + implicit none + + integer :: i,j + print *, 'CAS' + print *, '===' + do i=1,N_det_cas + print *, (psi_cas_coef(i,j), j=1,N_states) + call debug_det(psi_cas(1,1,i),N_int) + enddo + call write_double(6,ci_energy(1),"Initial CI energy") + +end + + +subroutine run_pt2(N_st,energy) + implicit none + integer :: i,j,k + integer, intent(in) :: N_st + double precision, intent(in) :: energy(N_st) + double precision :: pt2(N_st) + double precision :: norm_pert(N_st),H_pert_diag(N_st) + + pt2 = 0d0 + + print*,'Last iteration only to compute the PT2' + + N_det_generators = N_det_cas + N_det_selectors = N_det_non_ref + + do i=1,N_det_generators + do k=1,N_int + psi_det_generators(k,1,i) = psi_ref(k,1,i) + psi_det_generators(k,2,i) = psi_ref(k,2,i) + enddo + do k=1,N_st + psi_coef_generators(i,k) = psi_ref_coef(i,k) + enddo + enddo + do i=1,N_det + do k=1,N_int + psi_selectors(k,1,i) = psi_det_sorted(k,1,i) + psi_selectors(k,2,i) = psi_det_sorted(k,2,i) + enddo + do k=1,N_st + psi_selectors_coef(i,k) = psi_coef_sorted(i,k) + enddo + enddo + + SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed + SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized + + call H_apply_mrcepa_PT2(pt2, norm_pert, H_pert_diag, N_st) + +! call ezfio_set_full_ci_energy_pt2(energy+pt2) + + print *, 'Final step' + print *, 'N_det = ', N_det + print *, 'N_states = ', N_states + print *, 'PT2 = ', pt2 + print *, 'E = ', energy + print *, 'E+PT2 = ', energy+pt2 + print *, '-----' + + call ezfio_set_mrcepa0_energy_pt2(energy(1)+pt2(1)) + +end + diff --git a/plugins/dress_zmq/dress_slave.irp.f b/plugins/dress_zmq/dress_slave.irp.f new file mode 100644 index 00000000..b699dd73 --- /dev/null +++ b/plugins/dress_zmq/dress_slave.irp.f @@ -0,0 +1,75 @@ +program dress_slave + implicit none + BEGIN_DOC +! Helper program to compute the dress in distributed mode. + END_DOC + read_wf = .False. + distributed_davidson = .False. + SOFT_TOUCH read_wf distributed_davidson + call provide_everything + call switch_qp_run_to_master + call run_wf +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context +end + +subroutine run_wf + use f77_zmq + implicit none + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states_diag) + character*(64) :: states(1) + integer :: rc, i + + call provide_everything + + zmq_context = f77_zmq_ctx_new () + states(1) = 'dress' + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + + call wait_for_states(states,zmq_state,1) + + if(trim(zmq_state) == 'Stopped') then + + exit + + else if (trim(zmq_state) == 'dress') then + + ! Selection + ! --------- + + print *, 'dress' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + + PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique + PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order + PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns + PROVIDE psi_bilinear_matrix_transp_order + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call dress_slave_tcp(i, energy) + !$OMP END PARALLEL + print *, 'dress done' + + endif + + end do +end + +subroutine dress_slave_tcp(i,energy) + implicit none + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: i + logical :: lstop + lstop = .False. + call run_dress_slave(0,i,energy,lstop) +end + diff --git a/plugins/dress_zmq/dress_stoch_routines.irp.f b/plugins/dress_zmq/dress_stoch_routines.irp.f new file mode 100644 index 00000000..a6f2630a --- /dev/null +++ b/plugins/dress_zmq/dress_stoch_routines.irp.f @@ -0,0 +1,617 @@ +BEGIN_PROVIDER [ integer, fragment_first ] + implicit none + fragment_first = first_det_of_teeth(1) +END_PROVIDER + + +subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error) + use f77_zmq + + implicit none + + character(len=64000) :: task + + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull + integer, external :: omp_get_thread_num + double precision, intent(in) :: relative_error, E + double precision, intent(out) :: dress(N_states) + double precision, intent(out) :: delta(N_states, N_det_non_ref) + double precision, intent(out) :: delta_s2(N_states, N_det_non_ref) + + + integer :: i, j, k, Ncp + + double precision, external :: omp_get_wtime + double precision :: time + double precision :: w(N_states) + integer, external :: add_task_to_taskserver + + + provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral dress_weight psi_selectors + + !!!!!!!!!!!!!!! demander a TOTO !!!!!!! + w(:) = 0.d0 + w(dress_stoch_istate) = 1.d0 + call update_psi_average_norm_contrib(w) + + + + + print *, '========== ================= ================= =================' + print *, ' Samples Energy Stat. Error Seconds ' + print *, '========== ================= ================= =================' + + + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull, 'dress') + + integer, external :: zmq_put_psi + integer, external :: zmq_put_N_det_generators + integer, external :: zmq_put_N_det_selectors + integer, external :: zmq_put_dvector + integer, external :: zmq_set_running + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_generators on ZMQ server' + endif + if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_det_selectors on ZMQ server' + endif + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',dress_e0_denominator,size(dress_e0_denominator)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer :: ipos + ipos=1 + do i=1,N_dress_jobs + if(dress_jobs(i) > fragment_first) then + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, dress_jobs(i) + ipos += 20 + if (ipos > 63980) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + + ipos=1 + endif + else + do j=1,fragment_count + write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, dress_jobs(i) + ipos += 20 + if (ipos > 63980) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + ipos=1 + endif + end do + end if + end do + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task to task server' + endif + endif + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) & + !$OMP PRIVATE(i) + i = omp_get_thread_num() + if (i==0) then + call dress_collector(zmq_socket_pull,E, relative_error, delta, delta_s2, dress) + else + call dress_slave_inproc(i) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'dress') + + print *, '========== ================= ================= =================' +end subroutine + + +subroutine dress_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call run_dress_slave(1,i,dress_e0_denominator) +end + + + +subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2, dress) + use f77_zmq + use bitmasks + implicit none + + + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + + double precision, intent(in) :: relative_error, E + double precision, intent(out) :: dress(N_states) + double precision, allocatable :: cp(:,:,:,:) + + double precision, intent(out) :: delta(N_states, N_det_non_ref) + double precision, intent(out) :: delta_s2(N_states, N_det_non_ref) + double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:) + double precision, allocatable :: dress_detail(:,:) + double precision :: dress_mwen(N_states) + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_pull_socket + + integer :: more + integer :: i, j, k, i_state, N + integer :: task_id, ind + double precision, save :: time0 = -1.d0 + double precision :: time, timeLast, old_tooth + double precision, external :: omp_get_wtime + integer :: cur_cp, old_cur_cp + integer, allocatable :: parts_to_get(:) + logical, allocatable :: actually_computed(:) + integer :: total_computed + + delta = 0d0 + delta_s2 = 0d0 + allocate(delta_det(N_states, N_det_non_ref, 0:comb_teeth+1, 2)) + allocate(cp(N_states, N_det_non_ref, N_cp, 2), dress_detail(N_states, N_det_generators)) + allocate(delta_loc(N_states, N_det_non_ref, 2)) + dress_detail = 0d0 + delta_det = 0d0 + cp = 0d0 + total_computed = 0 + character*(512) :: task + + allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) + + dress_mwen =0.d0 + + parts_to_get(:) = 1 + if(fragment_first > 0) then + do i=1,fragment_first + parts_to_get(i) = fragment_count + enddo + endif + + actually_computed = .false. + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + more = 1 + if (time0 < 0.d0) then + call wall_time(time0) + endif + timeLast = time0 + cur_cp = 0 + old_cur_cp = 0 + logical :: loop + loop = .true. + + pullLoop : do while (loop) + call pull_dress_results(zmq_socket_pull, ind, dress_mwen, delta_loc, task_id) + + dress_detail(:, ind) += dress_mwen(:) + do j=1,N_cp !! optimizable + if(cps(ind, j) > 0d0) then + if(tooth_of_det(ind) < cp_first_tooth(j)) stop "coef on supposedely deterministic det" + double precision :: fac + integer :: toothMwen + logical :: fracted + fac = cps(ind, j) / cps_N(j) * dress_weight_inv(ind) * comb_step + do k=1,N_det_non_ref + do i_state=1,N_states + cp(i_state,k,j,1) += delta_loc(i_state,k,1) * fac + cp(i_state,k,j,2) += delta_loc(i_state,k,2) * fac + end do + end do + end if + end do + toothMwen = tooth_of_det(ind) + fracted = (toothMwen /= 0) + if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) + + if(fracted) then + delta_det(:,:,toothMwen-1, 1) += delta_loc(:,:,1) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen-1, 2) += delta_loc(:,:,2) * (1d0-fractage(toothMwen)) + delta_det(:,:,toothMwen, 1) += delta_loc(:,:,1) * (fractage(toothMwen)) + delta_det(:,:,toothMwen, 2) += delta_loc(:,:,2) * (fractage(toothMwen)) + else + delta_det(:,:,toothMwen, 1) += delta_loc(:,:,1) + delta_det(:,:,toothMwen, 2) += delta_loc(:,:,2) + end if + + parts_to_get(ind) -= 1 + if(parts_to_get(ind) == 0) then + actually_computed(ind) = .true. + total_computed += 1 + end if + + + integer, external :: zmq_delete_tasks + if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,1,more) == -1) then + stop 'Unable to delete tasks' + endif + if(more == 0) loop = .false. + + time = omp_get_wtime() + + + if(time - timeLast > 1d0 .or. (.not. loop)) then + timeLast = time + cur_cp = N_cp + if(.not. actually_computed(dress_jobs(1))) cycle pullLoop + + do i=2,N_det_generators + if(.not. actually_computed(dress_jobs(i))) then + cur_cp = done_cp_at(i-1) + exit + end if + end do + if(cur_cp == 0) cycle pullLoop + + + double precision :: su, su2, eqt, avg, E0, val + integer, external :: zmq_abort + + su = 0d0 + su2 = 0d0 + + if(N_states > 1) stop "dress_stoch : N_states == 1" + do i=1, int(cps_N(cur_cp)) + call get_comb_val(comb(i), dress_detail, cur_cp, val) + su += val + su2 += val**2 + end do + avg = su / cps_N(cur_cp) + eqt = dsqrt( ((su2 / cps_N(cur_cp)) - avg**2) / cps_N(cur_cp) ) + E0 = sum(dress_detail(1, :first_det_of_teeth(cp_first_tooth(cur_cp))-1)) + if(cp_first_tooth(cur_cp) <= comb_teeth) then + E0 = E0 + dress_detail(1, first_det_of_teeth(cp_first_tooth(cur_cp))) * (1d0-fractage(cp_first_tooth(cur_cp))) + end if + call wall_time(time) + if ((dabs(eqt) < relative_error .and. cps_N(cur_cp) >= 30) .or. total_computed == N_det_generators) then + ! Termination + !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' +! print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + call sleep(1) + if (zmq_abort(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Error in sending abort signal (2)' + endif + endif + else + if (cur_cp > old_cur_cp) then + old_cur_cp = cur_cp +! print *, "GREPME", cur_cp, E+E0+avg, eqt, time-time0, total_computed + !print '(G10.3, 2X, F16.7, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, '' + endif + endif + end if + end do pullLoop + + if(total_computed == N_det_generators) then + delta = 0d0 + delta_s2 = 0d0 + do i=comb_teeth+1,0,-1 + delta += delta_det(:,:,i,1) + delta_s2 += delta_det(:,:,i,2) + end do + else + + delta = cp(:,:,cur_cp,1) + delta_s2 = cp(:,:,cur_cp,2) + do i=cp_first_tooth(cur_cp)-1,0,-1 + delta += delta_det(:,:,i,1) + delta_s2 += delta_det(:,:,i,2) + end do + + end if + dress(1) = E + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) +end subroutine + + +integer function dress_find(v, w, sze, imin, imax) + implicit none + integer, intent(in) :: sze, imin, imax + double precision, intent(in) :: v, w(sze) + integer :: i,l,h + integer, parameter :: block=64 + + l = imin + h = imax-1 + + do while(h-l >= block) + i = ishft(h+l,-1) + if(w(i+1) > v) then + h = i-1 + else + l = i+1 + end if + end do + !DIR$ LOOP COUNT (64) + do dress_find=l,h + if(w(dress_find) >= v) then + exit + end if + end do +end function + + + BEGIN_PROVIDER [ integer, gen_per_cp ] +&BEGIN_PROVIDER [ integer, comb_teeth ] +&BEGIN_PROVIDER [ integer, N_cps_max ] + implicit none + comb_teeth = 16 + N_cps_max = 32 + gen_per_cp = (N_det_generators / N_cps_max) + 1 + N_cps_max += 1 +END_PROVIDER + + + BEGIN_PROVIDER [ integer, N_cp ] +&BEGIN_PROVIDER [ double precision, cps_N, (N_cps_max) ] +&BEGIN_PROVIDER [ integer, cp_first_tooth, (N_cps_max) ] +&BEGIN_PROVIDER [ integer, done_cp_at, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, cps, (N_det_generators, N_cps_max) ] +&BEGIN_PROVIDER [ integer, N_dress_jobs ] +&BEGIN_PROVIDER [ integer, dress_jobs, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, comb, (N_det_generators) ] + implicit none + logical, allocatable :: computed(:) + integer :: i, j, last_full, dets(comb_teeth) + integer :: k, l, cur_cp, under_det(comb_teeth+1) + integer, allocatable :: iorder(:), first_cp(:) + + allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) + allocate(computed(N_det_generators)) + first_cp = 1 + cps = 0d0 + cur_cp = 1 + done_cp_at = 0 + + computed = .false. + + N_dress_jobs = first_det_of_comb - 1 + do i=1, N_dress_jobs + dress_jobs(i) = i + computed(i) = .true. + end do + + l=first_det_of_comb + call RANDOM_NUMBER(comb) + do i=1,N_det_generators + comb(i) = comb(i) * comb_step + !DIR$ FORCEINLINE + call add_comb(comb(i), computed, cps(1, cur_cp), N_dress_jobs, dress_jobs) + + if(N_dress_jobs / gen_per_cp > (cur_cp-1) .or. N_dress_jobs == N_det_generators) then + first_cp(cur_cp+1) = N_dress_jobs + done_cp_at(N_dress_jobs) = cur_cp + cps_N(cur_cp) = dfloat(i) + if(N_dress_jobs /= N_det_generators) then + cps(:, cur_cp+1) = cps(:, cur_cp) + cur_cp += 1 + end if + + if (N_dress_jobs == N_det_generators) exit + end if + do while (computed(l)) + l=l+1 + enddo + k=N_dress_jobs+1 + dress_jobs(k) = l + computed(l) = .True. + N_dress_jobs = k + enddo + N_cp = cur_cp + if(N_dress_jobs /= N_det_generators .or. N_cp > N_cps_max) then + print *, N_dress_jobs, N_det_generators, N_cp, N_cps_max + stop "error in jobs creation" + end if + + cur_cp = 0 + do i=1,N_dress_jobs + if(done_cp_at(i) /= 0) cur_cp = done_cp_at(i) + done_cp_at(i) = cur_cp + end do + + + under_det = 0 + cp_first_tooth = 0 + do i=1,N_dress_jobs + do j=comb_teeth+1,1,-1 + if(dress_jobs(i) <= first_det_of_teeth(j)) then + under_det(j) = under_det(j) + 1 + if(under_det(j) == first_det_of_teeth(j))then + do l=done_cp_at(i)+1, N_cp + cps(:first_det_of_teeth(j)-1, l) = 0d0 + cp_first_tooth(l) = j + end do + cps(first_det_of_teeth(j), done_cp_at(i)+1) = & + cps(first_det_of_teeth(j), done_cp_at(i)+1) * fractage(j) + end if + else + exit + end if + end do + end do + cps(:, N_cp) = 0d0 + cp_first_tooth(N_cp) = comb_teeth+1 + + iorder = -1 + do i=1,N_cp-1 + call isort(dress_jobs(first_cp(i)+1:first_cp(i+1)),iorder,first_cp(i+1)-first_cp(i)) + end do +END_PROVIDER + + +subroutine get_comb_val(stato, detail, cur_cp, val) + implicit none + integer, intent(in) :: cur_cp + integer :: first + double precision, intent(in) :: stato, detail(N_states, N_det_generators) + double precision, intent(out) :: val + double precision :: curs + integer :: j, k + integer, external :: dress_find + + curs = 1d0 - stato + val = 0d0 + first = cp_first_tooth(cur_cp) + + do j = comb_teeth, first, -1 + !DIR$ FORCEINLINE + k = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) + if(k == first_det_of_teeth(first)) then + val += detail(1, k) * dress_weight_inv(k) * comb_step * fractage(first) + else + val += detail(1, k) * dress_weight_inv(k) * comb_step + end if + + curs -= comb_step + end do +end subroutine + + +subroutine get_comb(stato, dets) + implicit none + double precision, intent(in) :: stato + integer, intent(out) :: dets(comb_teeth) + double precision :: curs + integer :: j + integer, external :: dress_find + + curs = 1d0 - stato + do j = comb_teeth, 1, -1 + !DIR$ FORCEINLINE + dets(j) = dress_find(curs, dress_cweight,size(dress_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1)) + curs -= comb_step + end do +end subroutine + + +subroutine add_comb(com, computed, cp, N, tbc) + implicit none + double precision, intent(in) :: com + integer, intent(inout) :: N + double precision, intent(inout) :: cp(N_det_non_ref) + logical, intent(inout) :: computed(N_det_generators) + integer, intent(inout) :: tbc(N_det_generators) + integer :: i, k, l, dets(comb_teeth) + + !DIR$ FORCEINLINE + call get_comb(com, dets) + + k=N+1 + do i = 1, comb_teeth + l = dets(i) + cp(l) += 1d0 + if(.not.(computed(l))) then + tbc(k) = l + k = k+1 + computed(l) = .true. + end if + end do + N = k-1 +end subroutine + + + BEGIN_PROVIDER [ integer, dress_stoch_istate ] + implicit none + dress_stoch_istate = 1 + END_PROVIDER + + + BEGIN_PROVIDER [ double precision, dress_weight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, dress_weight_inv, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, dress_cweight, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, dress_cweight_cache, (N_det_generators) ] +&BEGIN_PROVIDER [ double precision, fractage, (comb_teeth) ] +&BEGIN_PROVIDER [ double precision, comb_step ] +&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ] +&BEGIN_PROVIDER [ integer, first_det_of_comb ] +&BEGIN_PROVIDER [ integer, tooth_of_det, (N_det_generators) ] + implicit none + integer :: i + double precision :: norm_left, stato + integer, external :: dress_find + + dress_weight(1) = psi_coef_generators(1,dress_stoch_istate)**2 + dress_cweight(1) = psi_coef_generators(1,dress_stoch_istate)**2 + + do i=1,N_det_generators + dress_weight(i) = psi_coef_generators(i,dress_stoch_istate)**2 + enddo + + ! Important to loop backwards for numerical precision + dress_cweight(N_det_generators) = dress_weight(N_det_generators) + do i=N_det_generators-1,1,-1 + dress_cweight(i) = dress_weight(i) + dress_cweight(i+1) + end do + + do i=1,N_det_generators + dress_weight(i) = dress_weight(i) / dress_cweight(1) + dress_cweight(i) = dress_cweight(i) / dress_cweight(1) + enddo + + do i=1,N_det_generators-1 + dress_cweight(i) = 1.d0 - dress_cweight(i+1) + end do + dress_cweight(N_det_generators) = 1.d0 + + norm_left = 1d0 + + comb_step = 1d0/dfloat(comb_teeth) + first_det_of_comb = 1 + do i=1,N_det_generators + if(dress_weight(i)/norm_left < .25d0*comb_step) then + first_det_of_comb = i + exit + end if + norm_left -= dress_weight(i) + end do + first_det_of_comb = max(2,first_det_of_comb) + call write_int(6, first_det_of_comb-1, 'Size of deterministic set') + + + comb_step = (1d0 - dress_cweight(first_det_of_comb-1)) * comb_step + + stato = 1d0 - comb_step + iloc = N_det_generators + do i=comb_teeth, 1, -1 + integer :: iloc + iloc = dress_find(stato, dress_cweight, N_det_generators, 1, iloc) + first_det_of_teeth(i) = iloc + fractage(i) = (dress_cweight(iloc) - stato) / dress_weight(iloc) + stato -= comb_step + end do + first_det_of_teeth(comb_teeth+1) = N_det_generators + 1 + first_det_of_teeth(1) = first_det_of_comb + + + if(first_det_of_teeth(1) /= first_det_of_comb) then + print *, 'Error in ', irp_here + stop "comb provider" + endif + + do i=1,N_det_generators + dress_weight_inv(i) = 1.d0/dress_weight(i) + enddo + + tooth_of_det(:first_det_of_teeth(1)-1) = 0 + do i=1,comb_teeth + tooth_of_det(first_det_of_teeth(i):first_det_of_teeth(i+1)-1) = i + end do +END_PROVIDER + + + + + + + diff --git a/plugins/dress_zmq/dress_zmq.irp.f b/plugins/dress_zmq/dress_zmq.irp.f new file mode 100644 index 00000000..1b06b391 --- /dev/null +++ b/plugins/dress_zmq/dress_zmq.irp.f @@ -0,0 +1,24 @@ +program dress_zmq + implicit none + double precision, allocatable :: energy(:) + allocate (energy(N_states)) + + read_wf = .True. + SOFT_TOUCH read_wf + call set_generators_bitmasks_as_holes_and_particles + if (.True.) then + integer :: i,j + do j=1,N_states + do i=1,N_det + psi_coef(i,j) = CI_eigenvectors(i,j) + enddo + enddo + SOFT_TOUCH psi_coef + endif + call run(N_states,energy) + if(do_pt2)then + call run_pt2(N_states,energy) + endif + deallocate(energy) +end + diff --git a/plugins/dress_zmq/dressing.irp.f b/plugins/dress_zmq/dressing.irp.f new file mode 100644 index 00000000..34c9a33d --- /dev/null +++ b/plugins/dress_zmq/dressing.irp.f @@ -0,0 +1,109 @@ +use bitmasks + + +BEGIN_PROVIDER [ integer, N_dress_teeth ] + N_dress_teeth = 10 +END_PROVIDER + +BEGIN_PROVIDER [ double precision, dress_norm_acc, (0:N_det_non_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, dress_norm, (0:N_det_non_ref, N_states) ] +&BEGIN_PROVIDER [ double precision, dress_teeth_size, (0:N_det_non_ref, N_states) ] +&BEGIN_PROVIDER [ integer, dress_teeth, (0:N_dress_teeth+1, N_states) ] + implicit none + integer :: i, j, st, nt + double precision :: norm_sto, jump, norm_mwen, norm_loc + + if(N_states /= 1) stop "dress_sto may not work with N_states /= 1" + + do st=1,N_states + dress_teeth(0,st) = 1 + norm_sto = 1d0 + do i=1,N_det + dress_teeth(1,st) = i + jump = (1d0 / dfloat(N_dress_teeth)) * norm_sto + if(psi_coef_generators(i,1)**2 < jump / 2d0) exit + norm_sto -= psi_coef_generators(i,1)**2 + end do + + norm_loc = 0d0 + dress_norm_acc(0,st) = 0d0 + nt = 1 + + do i=1,dress_teeth(1,st)-1 + dress_norm_acc(i,st) = dress_norm_acc(i-1,st) + psi_coef_generators(i,st)**2 + end do + + do i=dress_teeth(1,st), N_det_generators!-dress_teeth(1,st)+1 + norm_mwen = psi_coef_generators(i,st)**2!-1+dress_teeth(1,st),st)**2 + dress_norm_acc(i,st) = dress_norm_acc(i-1,st) + norm_mwen + norm_loc += norm_mwen + if(norm_loc > (jump*dfloat(nt))) then + nt = nt + 1 + dress_teeth(nt,st) = i + end if + end do + if(nt > N_dress_teeth+1) then + print *, "foireouse dress_teeth", nt, dress_teeth(nt,st), N_det_non_ref + stop + end if + + dress_teeth(N_dress_teeth+1,st) = N_det_non_ref+1 + norm_loc = 0d0 + do i=N_dress_teeth, 0, -1 + dress_teeth_size(i,st) = dress_norm_acc(dress_teeth(i+1,st)-1,st) - dress_norm_acc(dress_teeth(i,st)-1, st) + dress_norm_acc(dress_teeth(i,st):dress_teeth(i+1,st)-1,st) -= dress_norm_acc(dress_teeth(i,st)-1, st) + dress_norm_acc(dress_teeth(i,st):dress_teeth(i+1,st)-1,st) = & + dress_norm_acc(dress_teeth(i,st):dress_teeth(i+1,st)-1,st) / dress_teeth_size(i,st) + dress_norm(dress_teeth(i,st), st) = dress_norm_acc(dress_teeth(i,st), st) + do j=dress_teeth(i,st)+1, dress_teeth(i+1,1)-1 + dress_norm(j,1) = dress_norm_acc(j, st) - dress_norm_acc(j-1, st) + end do + end do + end do +END_PROVIDER + + + + BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref, N_det_ref) ] +&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ] + use bitmasks + implicit none + + integer :: i,j,k + + double precision, allocatable :: dress(:), del(:,:), del_s2(:,:) + double precision :: E_CI_before, relative_error + double precision, save :: errr = 0d0 + + allocate(dress(N_states), del(N_states, N_det_non_ref), del_s2(N_states, N_det_non_ref)) + + + delta_ij = 0d0 + delta_ii = 0d0 + delta_ij_s2 = 0d0 + delta_ii_s2 = 0d0 + + E_CI_before = dress_E0_denominator(1) + nuclear_repulsion + threshold_selectors = 1.d0 + threshold_generators = 1d0 + if(errr /= 0d0) then + errr = errr / 2d0 ! + else + errr = 1d-4 + end if + relative_error = errr + print *, "RELATIVE ERROR", relative_error + call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error)) + + delta_ij(:,:,1) = del(:,:) + delta_ij_s2(:,:,1) = del_s2(:,:) + do i=N_det_non_ref,1,-1 + delta_ii(dress_stoch_istate,1) -= delta_ij(dress_stoch_istate, i, 1) / psi_ref_coef(1,dress_stoch_istate) * psi_non_ref_coef(i, dress_stoch_istate) + delta_ii_s2(dress_stoch_istate,1) -= delta_ij_s2(dress_stoch_istate, i, 1) / psi_ref_coef(1,dress_stoch_istate) * psi_non_ref_coef(i, dress_stoch_istate) + end do +END_PROVIDER + + + diff --git a/plugins/dress_zmq/energy.irp.f b/plugins/dress_zmq/energy.irp.f new file mode 100644 index 00000000..0ab170f1 --- /dev/null +++ b/plugins/dress_zmq/energy.irp.f @@ -0,0 +1,23 @@ +BEGIN_PROVIDER [ logical, initialize_dress_E0_denominator ] + implicit none + BEGIN_DOC + ! If true, initialize dress_E0_denominator + END_DOC + initialize_dress_E0_denominator = .True. +END_PROVIDER + +BEGIN_PROVIDER [ double precision, dress_E0_denominator, (N_states) ] + implicit none + BEGIN_DOC + ! E0 in the denominator of the dress + END_DOC + if (initialize_dress_E0_denominator) then + dress_E0_denominator(1:N_states) = psi_energy(1:N_states) +! dress_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion +! dress_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states) + call write_double(6,dress_E0_denominator(1)+nuclear_repulsion, 'dress Energy denominator') + else + dress_E0_denominator = -huge(1.d0) + endif +END_PROVIDER + diff --git a/plugins/dress_zmq/run_dress_slave.irp.f b/plugins/dress_zmq/run_dress_slave.irp.f new file mode 100644 index 00000000..d8a5cbbe --- /dev/null +++ b/plugins/dress_zmq/run_dress_slave.irp.f @@ -0,0 +1,174 @@ +BEGIN_PROVIDER [ integer, fragment_count ] + implicit none + BEGIN_DOC + ! Number of fragments for the deterministic part + END_DOC + fragment_count = 1 +END_PROVIDER + + +subroutine run_dress_slave(thread,iproc,energy) + use f77_zmq + implicit none + + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: thread, iproc + integer :: rc, i, subset, i_generator + + integer :: worker_id, task_id, ctask, ltask + character*(512) :: task + + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + logical :: done + + double precision,allocatable :: dress_detail(:) + integer :: ind + + integer(bit_kind),allocatable :: abuf(:,:,:) + integer(bit_kind) :: mask(N_int,2), omask(N_int,2) + + double precision,allocatable :: delta_ij_loc(:,:,:) + double precision,allocatable :: delta_ii_loc(:,:) + integer :: h,p,n + logical :: ok + double precision :: contrib(N_states) + + allocate(delta_ij_loc(N_states,N_det_non_ref,2) & + ,delta_ii_loc(N_states,2)) + allocate(abuf(N_int, 2, N_det_non_ref)) + allocate(dress_detail(N_states)) + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + zmq_socket_push = new_zmq_push_socket(thread) + call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) + if(worker_id == -1) then + print *, "WORKER -1" + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + return + end if + dress_detail = 0d0 + do + call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) + + if(task_id /= 0) then + read (task,*) subset, i_generator + contrib = 0d0 + delta_ij_loc = 0d0 + delta_ii_loc = 0d0 + do h=1, hh_shortcut(0) + call apply_hole_local(psi_det_generators(1,1,i_generator), hh_exists(1, h), mask, ok, N_int) + if(.not. ok) cycle + omask = 0_bit_kind + if(hh_exists(1, h) /= 0) omask = mask + n = 1 + do p=hh_shortcut(h), hh_shortcut(h+1)-1 + call apply_particle_local(mask, pp_exists(1, p), abuf(1,1,n), ok, N_int) + if(ok) n = n + 1 + if(n > N_det_non_ref) stop "Buffer too small in dress..." + end do + n = n - 1 + + if(n /= 0) then + n = n + 1 + n = n - 1 + !! DRESS HERE !! + !call dress_part_dress_1c(delta_ij_loc(1,1,1), delta_ii_loc(1,1), delta_ij_loc(1,1,2), delta_ii_loc(1,2), & + ! i_generator,n,abuf,N_int,omask,contrib) + endif + end do + dress_detail(:) = contrib + call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) + call push_dress_results(zmq_socket_push, i_generator, dress_detail, delta_ij_loc(1,1,1), task_id) + dress_detail(:) = 0d0 + else + exit + end if + end do + call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) +end subroutine + + +subroutine push_dress_results(zmq_socket_push, ind, dress_detail, delta_loc, task_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR), intent(in) :: zmq_socket_push + double precision, intent(in) :: dress_detail(N_states, N_det_generators) + double precision, intent(in) :: delta_loc(N_states, N_det_non_ref, 2) + integer, intent(in) :: ind, task_id + integer :: rc, i + + + rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) + if(rc /= 4) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, dress_detail, 8*N_states, ZMQ_SNDMORE) + if(rc /= 8*N_states) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*N_det_non_ref, ZMQ_SNDMORE) + if(rc /= 8*N_states*N_det_non_ref) stop "push" + + rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*N_det_non_ref, ZMQ_SNDMORE) + if(rc /= 8*N_states*N_det_non_ref) stop "push" + + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) + if(rc /= 4) stop "push" + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) +IRP_ENDIF + +end subroutine + + +subroutine pull_dress_results(zmq_socket_pull, ind, dress_detail, delta_loc, task_id) + use f77_zmq + implicit none + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + double precision, intent(inout) :: dress_detail(N_states) + double precision, intent(inout) :: delta_loc(N_states, N_det_non_ref, 2) + double precision, allocatable :: dress_dress_mwen(:,:) + integer, intent(out) :: ind + integer, intent(out) :: task_id + integer :: rc, rn, i + + allocate(dress_dress_mwen(N_states, N_det_non_ref)) + + rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) + if(rc /= 4) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, dress_detail, N_states*8, 0) + if(rc /= 8*N_states) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*N_det_non_ref, 0) + if(rc /= 8*N_states*N_det_non_ref) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*N_det_non_ref, 0) + if(rc /= 8*N_states*N_det_non_ref) stop "pull" + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop "pull" + +! Activate is zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) +IRP_ENDIF + +end subroutine + + +