From 8da0509c5eec69e31f62384c46a3aed0fbe6ad2c Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 1 Aug 2016 16:05:40 +0200 Subject: [PATCH 01/11] Massively parallel selection --- ocaml/Message.ml | 15 ++- ocaml/TaskServer.ml | 28 +++- plugins/Full_CI/micro_pt2.irp.f | 2 - plugins/Full_CI_ZMQ/fci_zmq.irp.f | 150 +++------------------ plugins/Full_CI_ZMQ/selection.irp.f | 15 ++- plugins/Full_CI_ZMQ/selection_slave.irp.f | 83 ++++++++++++ plugins/Selectors_full/zmq.irp.f | 2 +- src/Determinants/H_apply_zmq.template.f | 2 +- src/Integrals_Bielec/ao_bi_integrals.irp.f | 2 + src/ZMQ/utils.irp.f | 71 +++++++--- 10 files changed, 206 insertions(+), 164 deletions(-) create mode 100644 plugins/Full_CI_ZMQ/selection_slave.irp.f diff --git a/ocaml/Message.ml b/ocaml/Message.ml index 79eca15e..98c4ef25 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -541,6 +541,9 @@ type t = | Terminate of Terminate_msg.t | Ok of Ok_msg.t | Error of Error_msg.t +| SetStopped +| SetWaiting +| SetRunning let of_string s = @@ -577,10 +580,11 @@ let of_string s = | "put_psi" :: client_id :: n_state :: n_det :: psi_det_size :: [] -> PutPsi (PutPsi_msg.create ~client_id ~n_state ~n_det ~psi_det_size ~n_det_generators:None ~n_det_selectors:None ~psi_det:None ~psi_coef:None ~energy:None) - | "ok" :: [] -> - Ok (Ok_msg.create ()) - | "error" :: rest -> - Error (Error_msg.create (String.concat ~sep:" " rest)) + | "ok" :: [] -> Ok (Ok_msg.create ()) + | "error" :: rest -> Error (Error_msg.create (String.concat ~sep:" " rest)) + | "set_stopped" :: [] -> SetStopped + | "set_running" :: [] -> SetRunning + | "set_waiting" :: [] -> SetWaiting | _ -> failwith "Message not understood" @@ -605,6 +609,9 @@ let to_string = function | Error x -> Error_msg.to_string x | PutPsi x -> PutPsi_msg.to_string x | GetPsiReply x -> GetPsiReply_msg.to_string x +| SetStopped -> "set_stopped" +| SetRunning -> "set_running" +| SetWaiting -> "set_waiting" let to_string_list = function diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 6cd77fd0..c02c8f8f 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -160,10 +160,30 @@ let new_job msg program_state rep_socket pair_socket = } in reply_ok rep_socket; - string_of_pub_state (Running (Message.State.to_string state)) + string_of_pub_state Waiting |> ZMQ.Socket.send pair_socket ; result +let change_pub_state msg program_state rep_socket pair_socket = + let msg = + match msg with + | `Waiting -> Waiting + | `Stopped -> Stopped + | `Running -> + begin + let state = + match program_state.state with + | Some x -> x + | None -> failwith "Trying to change pub state while no job is ready" + in + Running (Message.State.to_string state) + end + in + reply_ok rep_socket; + string_of_pub_state msg + |> ZMQ.Socket.send pair_socket ; + + program_state let end_job msg program_state rep_socket pair_socket = @@ -531,6 +551,9 @@ let get_psi msg program_state rep_socket = let terminate program_state rep_socket = reply_ok rep_socket; { program_state with + psi = None; + address_tcp = None; + address_inproc = None; running = false } @@ -685,6 +708,9 @@ let run ~port = | None , Message.Newjob x -> new_job x program_state rep_socket pair_socket | _ , Message.Newjob _ -> error "A job is already running" program_state rep_socket | Some _, Message.Endjob x -> end_job x program_state rep_socket pair_socket + | Some _, Message.SetRunning -> change_pub_state `Running program_state rep_socket pair_socket + | _, Message.SetWaiting -> change_pub_state `Waiting program_state rep_socket pair_socket + | _, Message.SetStopped -> change_pub_state `Stopped program_state rep_socket pair_socket | None , _ -> error "No job is running" program_state rep_socket | Some _, Message.Connect x -> connect x program_state rep_socket | Some _, Message.Disconnect x -> disconnect x program_state rep_socket diff --git a/plugins/Full_CI/micro_pt2.irp.f b/plugins/Full_CI/micro_pt2.irp.f index 25e74b28..4ce7c4be 100644 --- a/plugins/Full_CI/micro_pt2.irp.f +++ b/plugins/Full_CI/micro_pt2.irp.f @@ -58,6 +58,4 @@ subroutine run_wf i = omp_get_thread_num() call H_apply_FCI_PT2_slave_tcp(i) !$OMP END PARALLEL - - end diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index b3a89b74..f45da233 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -1,5 +1,3 @@ - - program fci_zmq implicit none integer :: i,k @@ -7,9 +5,7 @@ program fci_zmq double precision, allocatable :: pt2(:), norm_pert(:), H_pert_diag(:) integer :: N_st, degree - integer :: it, mit(0:6) - mit = (/1, 246, 1600, 17528, 112067, 519459, 2685970/) - it = 0 + integer(bit_kind) :: chk N_st = N_states allocate (pt2(N_st), norm_pert(N_st),H_pert_diag(N_st)) @@ -39,20 +35,12 @@ program fci_zmq integer :: n_det_before print*,'Beginning the selection ...' E_CI_before = CI_energy + do while (N_det < N_det_max.and.maxval(abs(pt2(1:N_st))) > pt2_max) n_det_before = N_det ! call H_apply_FCI(pt2, norm_pert, H_pert_diag, N_st) - it += 1 - if(it > 6) stop - call ZMQ_selection(mit(it) - mit(it-1), pt2) ! max(1000-N_det, N_det), pt2) + call ZMQ_selection(max(1024-N_det, N_det), pt2) - !do i=1, N_det - !if(popcnt(psi_det(1,1,i)) + popcnt(psi_det(2,1,i)) /= 23) stop "ZZ1" -2099.2504682049275 - !if(popcnt(psi_det(1,2,i)) + popcnt(psi_det(2,2,i)) /= 23) stop "ZZ2" - ! do k=1,i-1 - ! if(detEq(psi_det(1,1,i), psi_det(1,1,k), N_int)) stop "ATRRGRZER" - ! end do - !end do PROVIDE psi_coef PROVIDE psi_det PROVIDE psi_det_sorted @@ -65,6 +53,14 @@ program fci_zmq endif call diagonalize_CI call save_wavefunction + ! chk = 0_8 + ! do i=1, N_det + ! do k=1, N_int + ! chk = xor(psi_det(k,1,i), chk) + ! chk = xor(psi_det(k,2,i), chk) + ! end do + ! end do + ! print *, "CHK ", chk print *, 'N_det = ', N_det print *, 'N_states = ', N_states @@ -128,18 +124,20 @@ subroutine ZMQ_selection(N, pt2) integer :: i integer, external :: omp_get_thread_num double precision, intent(out) :: pt2(N_states) - !call flip_generators() - call new_parallel_job(zmq_to_qp_run_socket,'selection') + + provide nproc + provide ci_electronic_energy + call new_parallel_job(zmq_to_qp_run_socket,"selection") + call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) + call zmq_set_running(zmq_to_qp_run_socket) call create_selection_buffer(N, N*2, b) do i= N_det_generators, 1, -1 write(task,*) i, N call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do - provide nproc - provide ci_electronic_energy - !$OMP PARALLEL DEFAULT(none) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) + !$OMP PARALLEL DEFAULT(none) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1) shared(ci_electronic_energy_is_built, n_det_generators_is_built, n_states_is_built, n_int_is_built, nproc_is_built) i = omp_get_thread_num() if (i==0) then call selection_collector(b, pt2) @@ -148,125 +146,15 @@ subroutine ZMQ_selection(N, pt2) endif !$OMP END PARALLEL call end_parallel_job(zmq_to_qp_run_socket, 'selection') - !call flip_generators() call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN call copy_H_apply_buffer_to_wf() end subroutine -subroutine selection_dressing_slave_tcp(i) - implicit none - integer, intent(in) :: i - - call selection_slave(0,i) -end - - subroutine selection_dressing_slave_inproc(i) implicit none integer, intent(in) :: i - call selection_slave(1,i) + call selection_slaved(1,i) end - - -! subroutine ZMQ_selection() -! use f77_zmq -! implicit none -! BEGIN_DOC -! ! Massively parallel Full-CI -! END_DOC -! -! integer :: i,ithread -! integer(ZMQ_PTR) :: zmq_socket_push -! integer(ZMQ_PTR), external :: new_zmq_push_socket -! zmq_context = f77_zmq_ctx_new () -! PROVIDE H_apply_buffer_allocated -! -! PROVIDE ci_electronic_energy -! PROVIDE nproc -! !$OMP PARALLEL PRIVATE(i,ithread,zmq_socket_push) num_threads(nproc+1) -! ithread = omp_get_thread_num() -! if (ithread == 0) then -! call receive_selected_determinants() -! else -! zmq_socket_push = new_zmq_push_socket(1) -! -! do i=ithread,N_det_generators,nproc -! print *, i, "/", N_det_generators -! call select_connected(i, max(100, N_det), ci_electronic_energy,zmq_socket_push) -! enddo -! -! if (ithread == 1) then -! integer :: rc -! rc = f77_zmq_send(zmq_socket_push,0,1,0) -! if (rc /= 1) then -! stop 'Error sending termination signal' -! endif -! endif -! call end_zmq_push_socket(zmq_socket_push, 1) -! endif -! !$OMP END PARALLEL -! call copy_H_apply_buffer_to_wf() -! end - - - - - - - - - - - - - -! program Full_CI_ZMQ -! use f77_zmq -! implicit none -! BEGIN_DOC -! ! Massively parallel Full-CI -! END_DOC -! -! integer :: i,ithread -! -! integer(ZMQ_PTR) :: zmq_socket_push -! integer(ZMQ_PTR), external :: new_zmq_push_socket -! zmq_context = f77_zmq_ctx_new () -! PROVIDE H_apply_buffer_allocated -! -! do while (N_det < N_det_max) -! -! PROVIDE ci_electronic_energy -! PROVIDE nproc -! !$OMP PARALLEL PRIVATE(i,ithread,zmq_socket_push) num_threads(nproc+1) -! ithread = omp_get_thread_num() -! if (ithread == 0) then -! call receive_selected_determinants() -! else -! zmq_socket_push = new_zmq_push_socket(0) -! -! do i=ithread,N_det_generators,nproc -! print *, i , "/", N_det_generators -! call select_connected(i, 1.d-7, ci_electronic_energy,zmq_socket_push) -! enddo -! print *, "END .... " -! -! if (ithread == 1) then -! integer :: rc -! rc = f77_zmq_send(zmq_socket_push,0,1,0) -! if (rc /= 1) then -! stop 'Error sending termination signal' -! endif -! endif -! call end_zmq_push_socket(zmq_socket_push, 0) -! endif -! !$OMP END PARALLEL -! call copy_H_apply_buffer_to_wf() -! call diagonalize_CI() -! call save_wavefunction() -! end do -! -! end diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 436a6946..0cf7e576 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ double precision, integral8, (mo_tot_num, mo_tot_num, mo_tot_n END_PROVIDER -subroutine selection_slave(thread,iproc) +subroutine selection_slaved(thread,iproc) use f77_zmq use selection_types implicit none @@ -37,7 +37,13 @@ subroutine selection_slave(thread,iproc) zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() zmq_socket_push = new_zmq_push_socket(thread) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) - + if(worker_id == -1) then + print *, "WORKER -1" + !call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push,thread) + return + end if buf%N = 0 ctask = 1 pt2 = 0d0 @@ -53,7 +59,9 @@ subroutine selection_slave(thread,iproc) else if(N /= buf%N) stop "N changed... wtf man??" end if - call select_connected(i_generator,ci_electronic_energy,pt2,buf) !! ci_electronic_energy ?? + !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) + !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) + call select_connected(i_generator,ci_electronic_energy,pt2,buf) end if if(done) ctask = ctask - 1 @@ -160,7 +168,6 @@ subroutine select_connected(i_generator,E0,pt2,b) integer(bit_kind) :: hole_mask(N_int,2), particle_mask(N_int,2) double precision :: fock_diag_tmp(2,mo_tot_num+1) - call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) do l=1,N_generators_bitmask diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f new file mode 100644 index 00000000..fd7728f7 --- /dev/null +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -0,0 +1,83 @@ +program selection_slave + implicit none + BEGIN_DOC +! Helper program to compute the PT2 in distributed mode. + END_DOC + + read_wf = .False. + SOFT_TOUCH read_wf + call provide_everything + call switch_qp_run_to_master + call run_wf + +end + +subroutine provide_everything + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context +! PROVIDE ci_electronic_energy mo_tot_num N_int +end + +subroutine run_wf + use f77_zmq + implicit none + + integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + double precision :: energy(N_states_diag) + character*(64) :: state + integer :: oki + + oki = 0 + call provide_everything + + zmq_context = f77_zmq_ctx_new () + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + do + call wait_for_state("selection", zmq_state) + if(trim(zmq_state) /= "selection") exit + if(oki < 0) then + oki += 1 + cycle + end if + oki = 0 + print *, 'Getting wave function' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,size(energy)) + integer :: j,k + do j=1,N_states_diag + do k=1,N_det + CI_eigenvectors(k,j) = psi_coef(k,j) + enddo + call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),CI_eigenvectors_s2(j)) + enddo + if (.True.) then + do k=1,size(ci_electronic_energy) + ci_electronic_energy(k) = energy(k) + enddo + SOFT_TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors + endif + + call write_double(6,ci_energy,'Energy') + !zmq_state = 'selection' + + + integer :: rc, i + + print *, 'Selection slave running' + + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call selection_dressing_slave_tcp(i) + !$OMP END PARALLEL + end do +end + + +subroutine selection_dressing_slave_tcp(i) + implicit none + integer, intent(in) :: i + + call selection_slaved(0,i) +end + diff --git a/plugins/Selectors_full/zmq.irp.f b/plugins/Selectors_full/zmq.irp.f index cbdddb82..dfa94884 100644 --- a/plugins/Selectors_full/zmq.irp.f +++ b/plugins/Selectors_full/zmq.irp.f @@ -79,7 +79,7 @@ subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy) integer :: N_states_read, N_det_read, psi_det_size_read integer :: N_det_selectors_read, N_det_generators_read read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, & - N_det_selectors_read, N_det_generators_read + N_det_generators_read, N_det_selectors_read if (rc /= worker_id) then print *, 'Wrong worker ID' stop 'error' diff --git a/src/Determinants/H_apply_zmq.template.f b/src/Determinants/H_apply_zmq.template.f index 8a5f485b..d59f2994 100644 --- a/src/Determinants/H_apply_zmq.template.f +++ b/src/Determinants/H_apply_zmq.template.f @@ -136,7 +136,7 @@ subroutine $subroutine_slave(thread, iproc) pt2 = 0.d0 norm_pert = 0.d0 - H_pert_diag = 0.d0 + H_pert_diag = 0.d0 ! Create bit masks for holes and particles do ispin=1,2 diff --git a/src/Integrals_Bielec/ao_bi_integrals.irp.f b/src/Integrals_Bielec/ao_bi_integrals.irp.f index 2c46d42d..1dcea81f 100644 --- a/src/Integrals_Bielec/ao_bi_integrals.irp.f +++ b/src/Integrals_Bielec/ao_bi_integrals.irp.f @@ -372,6 +372,8 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ] write(task,*) "triangle ", l call add_task_to_taskserver(zmq_to_qp_run_socket,task) enddo + + call zmq_set_running(zmq_to_qp_run_socket) PROVIDE nproc !$OMP PARALLEL DEFAULT(private) num_threads(nproc+1) diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index d3b76f4f..6b22158e 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -143,11 +143,6 @@ function new_zmq_to_qp_run_socket() stop 'Unable to create zmq req socket' endif - rc = f77_zmq_connect(new_zmq_to_qp_run_socket, trim(qp_run_address)//':'//trim(zmq_port(0))) - if (rc /= 0) then - stop 'Unable to connect new_zmq_to_qp_run_socket' - endif - rc = f77_zmq_setsockopt(new_zmq_to_qp_run_socket, ZMQ_SNDTIMEO, 120000, 4) if (rc /= 0) then stop 'Unable to set send timout in new_zmq_to_qp_run_socket' @@ -158,6 +153,11 @@ function new_zmq_to_qp_run_socket() stop 'Unable to set recv timout in new_zmq_to_qp_run_socket' endif + rc = f77_zmq_connect(new_zmq_to_qp_run_socket, trim(qp_run_address)//':'//trim(zmq_port(0))) + if (rc /= 0) then + stop 'Unable to connect new_zmq_to_qp_run_socket' + endif + end @@ -182,18 +182,6 @@ function new_zmq_pair_socket(bind) stop 'Unable to create zmq pair socket' endif - if (bind) then - rc = f77_zmq_bind(new_zmq_pair_socket,zmq_socket_pair_inproc_address) - if (rc /= 0) then - print *, 'f77_zmq_bind(new_zmq_pair_socket, zmq_socket_pair_inproc_address)' - stop 'error' - endif - else - rc = f77_zmq_connect(new_zmq_pair_socket,zmq_socket_pair_inproc_address) - if (rc /= 0) then - stop 'Unable to connect new_zmq_pair_socket' - endif - endif rc = f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_SNDHWM, 1, 4) if (rc /= 0) then @@ -215,6 +203,19 @@ function new_zmq_pair_socket(bind) stop 'f77_zmq_setsockopt(new_zmq_pair_socket, ZMQ_LINGER, 60000, 4)' endif + if (bind) then + rc = f77_zmq_bind(new_zmq_pair_socket,zmq_socket_pair_inproc_address) + if (rc /= 0) then + print *, 'f77_zmq_bind(new_zmq_pair_socket, zmq_socket_pair_inproc_address)' + stop 'error' + endif + else + rc = f77_zmq_connect(new_zmq_pair_socket,zmq_socket_pair_inproc_address) + if (rc /= 0) then + stop 'Unable to connect new_zmq_pair_socket' + endif + endif + end @@ -535,6 +536,34 @@ subroutine new_parallel_job(zmq_to_qp_run_socket,name_in) end +subroutine zmq_set_running(zmq_to_qp_run_socket) + use f77_zmq + implicit none + BEGIN_DOC + ! Set the job to Running in QP-run + END_DOC + + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + character*(512) :: message + integer :: rc, sze + + message = 'set_running' + sze = len(trim(message)) + rc = f77_zmq_send(zmq_to_qp_run_socket,message,sze,0) + if (rc /= sze) then + print *, irp_here, ':f77_zmq_send(zmq_to_qp_run_socket,message,sze,0)' + stop 'error' + endif + rc = f77_zmq_recv(zmq_to_qp_run_socket,message,510,0) + message = trim(message(1:rc)) + if (message(1:2) /= 'ok') then + print *, 'Unable to set qp_run to Running' + stop 1 + endif + + +end + subroutine end_parallel_job(zmq_to_qp_run_socket,name_in) use f77_zmq @@ -584,7 +613,6 @@ subroutine connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) character*(512) :: message character*(128) :: reply, state, address integer :: rc - if (thread == 1) then rc = f77_zmq_send(zmq_to_qp_run_socket, "connect inproc", 14, 0) if (rc /= 14) then @@ -601,6 +629,10 @@ subroutine connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) message = trim(message(1:rc)) + if(message(1:5) == "error") then + worker_id = -1 + return + end if read(message,*) reply, state, worker_id, address if ( (trim(reply) /= 'connect_reply') .and. & (trim(state) /= trim(zmq_state)) ) then @@ -609,7 +641,6 @@ subroutine connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) print *, 'Address: ', trim(address) stop -1 endif - end subroutine disconnect_from_taskserver(zmq_to_qp_run_socket, & @@ -842,7 +873,7 @@ subroutine wait_for_state(state_wait,state) zmq_socket_sub = new_zmq_sub_socket() state = "Waiting" - do while (state /= state_wait .and. state /= "Stopped") + do while (trim(state) /= trim(state_wait) .and. trim(state) /= "Stopped") rc = f77_zmq_recv( zmq_socket_sub, state, 64, 0) if (rc > 0) then state = trim(state(1:rc)) From 3bd401513dc7ac28b1b0ec0037d5096ece333ed9 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 1 Aug 2016 17:15:03 +0200 Subject: [PATCH 02/11] Bugs --- plugins/Full_CI_ZMQ/selection.irp.f | 7 +++---- plugins/Full_CI_ZMQ/selection_slave.irp.f | 12 ++++-------- src/ZMQ/utils.irp.f | 5 +++-- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 0cf7e576..b26b3e1b 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -62,9 +62,9 @@ subroutine selection_slaved(thread,iproc) !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) call select_connected(i_generator,ci_electronic_energy,pt2,buf) - end if - - if(done) ctask = ctask - 1 + else + ctask = ctask - 1 + endif if(done .or. ctask == size(task_id)) then if(buf%N == 0 .and. ctask > 0) stop "uninitialized selection_buffer" @@ -77,7 +77,6 @@ subroutine selection_slaved(thread,iproc) buf%cur = 0 end if - ctask = 0 end if diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index fd7728f7..db18ca59 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -31,17 +31,14 @@ subroutine run_wf call provide_everything zmq_context = f77_zmq_ctx_new () + zmq_state = 'selection' + state = 'Waiting' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() do - call wait_for_state("selection", zmq_state) - if(trim(zmq_state) /= "selection") exit - if(oki < 0) then - oki += 1 - cycle - end if - oki = 0 + call wait_for_state(zmq_state,state) + if(trim(state) /= 'selection') exit print *, 'Getting wave function' call zmq_get_psi(zmq_to_qp_run_socket,1,energy,size(energy)) integer :: j,k @@ -59,7 +56,6 @@ subroutine run_wf endif call write_double(6,ci_energy,'Energy') - !zmq_state = 'selection' integer :: rc, i diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 6b22158e..4a6085e2 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -872,8 +872,8 @@ subroutine wait_for_state(state_wait,state) integer :: rc zmq_socket_sub = new_zmq_sub_socket() - state = "Waiting" - do while (trim(state) /= trim(state_wait) .and. trim(state) /= "Stopped") + state = 'Waiting' + do while (trim(state) /= trim(state_wait) .and. trim(state) /= 'Stopped') rc = f77_zmq_recv( zmq_socket_sub, state, 64, 0) if (rc > 0) then state = trim(state(1:rc)) @@ -881,6 +881,7 @@ subroutine wait_for_state(state_wait,state) print *, 'Timeout reached. Stopping' state = "Stopped" endif +print *, '|'//trim(state(1:rc))//'|'//trim(state_wait)//'|' end do call end_zmq_sub_socket(zmq_socket_sub) end From eaee625df4fd571d43c7b590dcb4765c60e5b278 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 1 Aug 2016 17:34:36 +0200 Subject: [PATCH 03/11] Almost working --- ocaml/Message.ml | 12 ++++++++---- ocaml/Queuing_system.ml | 5 ++++- ocaml/TaskServer.ml | 23 ++++++----------------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/ocaml/Message.ml b/ocaml/Message.ml index 98c4ef25..68b866d5 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -248,16 +248,20 @@ end (** GetTaskReply : Reply to the GetTask message *) module GetTaskReply_msg : sig type t - val create : task_id:Id.Task.t -> task:string -> t + val create : task_id:Id.Task.t option -> task:string option -> t val to_string : t -> string end = struct type t = - { task_id: Id.Task.t ; - task : string ; + { task_id: Id.Task.t option ; + task : string option ; } let create ~task_id ~task = { task_id ; task } let to_string x = - Printf.sprintf "get_task_reply %d %s" (Id.Task.to_int x.task_id) x.task + match x.task_id, x.task with + | Some task_id, Some task -> + Printf.sprintf "get_task_reply %d %s" (Id.Task.to_int task_id) task + | _ -> + Printf.sprintf "get_task_reply 0" end (** GetPsi : get the current variational wave function *) diff --git a/ocaml/Queuing_system.ml b/ocaml/Queuing_system.ml index acdfd439..78abb185 100644 --- a/ocaml/Queuing_system.ml +++ b/ocaml/Queuing_system.ml @@ -99,9 +99,12 @@ let del_task ~task_id q = -let number_of_queued q = +let number q = Map.length q.tasks +let number_of_queued q = + List.length q.queued + let number_of_running q = Map.length q.running diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index c02c8f8f..cb67ec26 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -305,8 +305,7 @@ let del_task msg program_state rep_socket = } in let more = - (Queuing_system.number_of_queued new_program_state.queue + - Queuing_system.number_of_running new_program_state.queue) > 0 + (Queuing_system.number new_program_state.queue > 0) in Message.DelTaskReply (Message.DelTaskReply_msg.create ~task_id ~more) |> Message.to_string @@ -427,21 +426,10 @@ let get_task msg program_state rep_socket pair_socket = } in - match (task, task_id) with - | Some task, Some task_id -> - begin - Message.GetTaskReply (Message.GetTaskReply_msg.create ~task ~task_id) - |> Message.to_string - |> ZMQ.Socket.send rep_socket ; - new_program_state - end - | _ -> - begin - Message.Terminate (Message.Terminate_msg.create ()) - |> Message.to_string - |> ZMQ.Socket.send rep_socket ; - program_state - end + Message.GetTaskReply (Message.GetTaskReply_msg.create ~task ~task_id) + |> Message.to_string + |> ZMQ.Socket.send rep_socket ; + new_program_state in @@ -601,6 +589,7 @@ let start_pub_thread ~port = else state in +print_endline (string_of_pub_state new_state); ZMQ.Socket.send pub_socket @@ string_of_pub_state new_state; match state with | Stopped -> () From 36a6bd01c61a4394450a318539de75e84f367803 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 1 Aug 2016 20:03:46 +0200 Subject: [PATCH 04/11] Removed prints --- ocaml/TaskServer.ml | 1 - plugins/Full_CI_ZMQ/selection.irp.f | 18 ++++++++---------- plugins/Full_CI_ZMQ/selection_slave.irp.f | 2 +- src/ZMQ/utils.irp.f | 1 - 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index cb67ec26..75334adc 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -589,7 +589,6 @@ let start_pub_thread ~port = else state in -print_endline (string_of_pub_state new_state); ZMQ.Socket.send pub_socket @@ string_of_pub_state new_state; match state with | Stopped -> () diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index b26b3e1b..ba6c7c73 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -3,13 +3,11 @@ BEGIN_PROVIDER [ double precision, integral8, (mo_tot_num, mo_tot_num, mo_tot_num, mo_tot_num) ] integral8 = 0d0 integer :: h1, h2 - print *, "provide int" do h1=1, mo_tot_num - do h2=1, mo_tot_num - call get_mo_bielec_integrals_ij(h1, h2 ,mo_tot_num,integral8(1,1,h1,h2),mo_integrals_map) + do h2=1, mo_tot_num + call get_mo_bielec_integrals_ij(h1, h2 ,mo_tot_num,integral8(1,1,h1,h2),mo_integrals_map) + end do end do - end do - print *, "end provide int" END_PROVIDER @@ -21,7 +19,7 @@ subroutine selection_slaved(thread,iproc) integer, intent(in) :: thread, iproc integer :: rc, i - integer :: worker_id, task_id(100), ctask, ltask + integer :: worker_id, task_id(1), ctask, ltask character*(512) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -51,7 +49,9 @@ subroutine selection_slaved(thread,iproc) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) done = task_id(ctask) == 0 - if (.not. done) then + if (done) then + ctask = ctask - 1 + else integer :: i_generator, N read (task,*) i_generator, N if(buf%N == 0) then @@ -62,8 +62,6 @@ subroutine selection_slaved(thread,iproc) !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) call select_connected(i_generator,ci_electronic_energy,pt2,buf) - else - ctask = ctask - 1 endif if(done .or. ctask == size(task_id)) then @@ -300,7 +298,7 @@ subroutine selection_collector(b, pt2) end do done += ntask call CPU_TIME(time) - print *, "DONE" , done, time - time0 +! print *, "DONE" , done, time - time0 end do diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index db18ca59..a09e1bf2 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -52,7 +52,7 @@ subroutine run_wf do k=1,size(ci_electronic_energy) ci_electronic_energy(k) = energy(k) enddo - SOFT_TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors + TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors endif call write_double(6,ci_energy,'Energy') diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 4a6085e2..30dee670 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -881,7 +881,6 @@ subroutine wait_for_state(state_wait,state) print *, 'Timeout reached. Stopping' state = "Stopped" endif -print *, '|'//trim(state(1:rc))//'|'//trim(state_wait)//'|' end do call end_zmq_sub_socket(zmq_socket_sub) end From 2dd38c0bdbebab6ca766b0784475ef7e1be1e9bc Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 1 Aug 2016 20:31:23 +0200 Subject: [PATCH 05/11] Corrected linger bug --- plugins/Full_CI_ZMQ/selection.irp.f | 4 ++-- src/ZMQ/utils.irp.f | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index ba6c7c73..e39ebd6b 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -19,7 +19,7 @@ subroutine selection_slaved(thread,iproc) integer, intent(in) :: thread, iproc integer :: rc, i - integer :: worker_id, task_id(1), ctask, ltask + integer :: worker_id, task_id(10), ctask, ltask character*(512) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -293,7 +293,7 @@ subroutine selection_collector(b, pt2) end do do i=1, ntask - if(task_id(i) == 0) stop "collector" + if(task_id(i) == 0) stop "Error in collector" call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do done += ntask diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 30dee670..9fa70d5e 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -394,10 +394,10 @@ subroutine end_zmq_pair_socket(zmq_socket_pair) ! stop 'error' ! endif - rc = f77_zmq_setsockopt(zmq_socket_pair,ZMQ_LINGER,0,4) - if (rc /= 0) then - stop 'Unable to set ZMQ_LINGER on zmq_socket_pair' - endif +! rc = f77_zmq_setsockopt(zmq_socket_pair,0ZMQ_LINGER,1000,4) +! if (rc /= 0) then +! stop 'Unable to set ZMQ_LINGER on zmq_socket_pair' +! endif rc = f77_zmq_close(zmq_socket_pair) if (rc /= 0) then @@ -433,10 +433,10 @@ subroutine end_zmq_pull_socket(zmq_socket_pull) call sleep(1) ! see https://github.com/zeromq/libzmq/issues/1922 - rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,0,4) - if (rc /= 0) then - stop 'Unable to set ZMQ_LINGER on zmq_socket_pull' - endif +! rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,10000,4) +! if (rc /= 0) then +! stop 'Unable to set ZMQ_LINGER on zmq_socket_pull' +! endif rc = f77_zmq_close(zmq_socket_pull) if (rc /= 0) then @@ -473,10 +473,10 @@ subroutine end_zmq_push_socket(zmq_socket_push,thread) endif - rc = f77_zmq_setsockopt(zmq_socket_push,ZMQ_LINGER,0,4) - if (rc /= 0) then - stop 'Unable to set ZMQ_LINGER on push socket' - endif +! rc = f77_zmq_setsockopt(zmq_socket_push,ZMQ_LINGER,20000,4) +! if (rc /= 0) then +! stop 'Unable to set ZMQ_LINGER on push socket' +! endif rc = f77_zmq_close(zmq_socket_push) if (rc /= 0) then @@ -805,7 +805,7 @@ subroutine end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) ! stop 'error' ! endif - rc = f77_zmq_setsockopt(zmq_to_qp_run_socket,ZMQ_LINGER,0,4) + rc = f77_zmq_setsockopt(zmq_to_qp_run_socket,ZMQ_LINGER,1000,4) if (rc /= 0) then stop 'Unable to set ZMQ_LINGER on zmq_to_qp_run_socket' endif From eb15a392befdcba32354a5c73f2b4a265b7934e8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 1 Aug 2016 23:08:22 +0200 Subject: [PATCH 06/11] No more davidson on clients --- ocaml/TaskServer.ml | 3 +- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 2 +- plugins/Full_CI_ZMQ/selection.irp.f | 55 ++++++++++++++--------- plugins/Full_CI_ZMQ/selection_slave.irp.f | 50 ++++++++++++--------- plugins/Selectors_full/selectors.irp.f | 18 +++++++- src/ZMQ/utils.irp.f | 25 ++++++++--- 6 files changed, 103 insertions(+), 50 deletions(-) diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 75334adc..927ac241 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -681,9 +681,10 @@ let run ~port = in (** Debug input *) - Printf.sprintf "%d %d : %s\n%!" + Printf.sprintf "q:%d r:%d n:%d : %s\n%!" (Queuing_system.number_of_queued program_state.queue) (Queuing_system.number_of_running program_state.queue) + (Queuing_system.number program_state.queue) (Message.to_string message) |> debug; diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index f45da233..02461ee3 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -155,6 +155,6 @@ subroutine selection_dressing_slave_inproc(i) implicit none integer, intent(in) :: i - call selection_slaved(1,i) + call selection_slaved(1,i,ci_electronic_energy) end diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index e39ebd6b..5af0e206 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -11,15 +11,16 @@ BEGIN_PROVIDER [ double precision, integral8, (mo_tot_num, mo_tot_num, mo_tot_n END_PROVIDER -subroutine selection_slaved(thread,iproc) +subroutine selection_slaved(thread,iproc,energy) use f77_zmq use selection_types implicit none + double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: thread, iproc integer :: rc, i - integer :: worker_id, task_id(10), ctask, ltask + integer :: worker_id, task_id(1), ctask, ltask character*(512) :: task integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket @@ -61,7 +62,7 @@ subroutine selection_slaved(thread,iproc) end if !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) - call select_connected(i_generator,ci_electronic_energy,pt2,buf) + call select_connected(i_generator,energy,pt2,buf) endif if(done .or. ctask == size(task_id)) then @@ -117,6 +118,8 @@ subroutine push_selection_results(zmq_socket_push, pt2, b, task_id, ntask) rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0) if(rc /= 4*ntask) stop "push" +! Activate is zmq_socket_push is a REQ +! rc = f77_zmq_recv( zmq_socket_push, task_id(1), ntask*4, 0) end subroutine @@ -131,23 +134,26 @@ subroutine pull_selection_results(zmq_socket_pull, pt2, val, det, N, task_id, nt integer, intent(out) :: N, ntask, task_id(*) integer :: rc, rn, i - rc = f77_zmq_recv( zmq_socket_pull, N, 4, ZMQ_SNDMORE) + rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0) if(rc /= 4) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, ZMQ_SNDMORE) + rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0) if(rc /= 8*N_states) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, ZMQ_SNDMORE) + rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0) if(rc /= 8*N) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, ZMQ_SNDMORE) + rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0) if(rc /= bit_kind*N_int*2*N) stop "pull" - rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, ZMQ_SNDMORE) + rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0) if(rc /= 4) stop "pull" rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0) if(rc /= 4*ntask) stop "pull" + +! Activate is zmq_socket_pull is a REP +! rc = f77_zmq_send( zmq_socket_pull, task_id(1), ntask*4, 0) end subroutine @@ -293,7 +299,9 @@ subroutine selection_collector(b, pt2) end do do i=1, ntask - if(task_id(i) == 0) stop "Error in collector" + if(task_id(i) == 0) then + print *, "Error in collector" + endif call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) end do done += ntask @@ -389,7 +397,7 @@ subroutine select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p call create_microlist_single(psi_selectors, i_generator, N_det_selectors, ion_det, microlist, idx_microlist, N_microlist, ptr_microlist, N_int) do j=1, ptr_microlist(mo_tot_num * 2 + 1) - 1 - psi_coef_microlist(j,:) = psi_selectors_coef(idx_microlist(j),:) + psi_coef_microlist(j,:) = psi_selectors_coef_transp(:,idx_microlist(j)) enddo if(ptr_microlist(mo_tot_num * 2 + 1) == 1) then @@ -568,10 +576,10 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do j=1, ptr_microlist(mo_tot_num * 2 + 1) - 1 - psi_coef_microlist(j,:) = psi_selectors_coef(idx_microlist(j),:) + psi_coef_microlist(j,:) = psi_selectors_coef_transp(:,idx_microlist(j)) enddo do j=1, ptr_tmicrolist(mo_tot_num * 2 + 1) - 1 - psi_coef_tmicrolist(j,:) = psi_selectors_coef(idx_tmicrolist(j),:) + psi_coef_tmicrolist(j,:) = psi_selectors_coef_transp(:,idx_tmicrolist(j)) enddo @@ -1042,7 +1050,7 @@ subroutine create_microlist_double(minilist, i_cur, N_minilist, key_mask, microl !!!! INTEGRAL DRIVEN ! !!!!!!!!!!!!!!!!!!!! call get_d0(minilist(1,1,idx(i)), banned, banned_pair, d0s, key_mask, 1+(nt2-1)/mo_tot_num, 1+(nt-1)/mo_tot_num, & - mod(nt2-1, mo_tot_num)+1, mod(nt-1, mo_tot_num)+1, psi_selectors_coef(idx(i), :)) + mod(nt2-1, mo_tot_num)+1, mod(nt-1, mo_tot_num)+1, psi_selectors_coef_transp(1,idx(i))) ! do j=1, N_states ! do nt2=1, mo_tot_num @@ -1062,7 +1070,7 @@ subroutine create_microlist_double(minilist, i_cur, N_minilist, key_mask, microl end do end do - call get_d1(minilist(1,1,idx(i)), banned, banned_pair, d0s, key_mask, pwen, psi_selectors_coef(idx(i), :)) + call get_d1(minilist(1,1,idx(i)), banned, banned_pair, d0s, key_mask, pwen, psi_selectors_coef_transp(1,idx(i))) ! do k=1, N_states ! do nt2=1, mo_tot_num @@ -1094,6 +1102,7 @@ subroutine get_d1(gen, banned, banned_pair, mat, mask, pwen, coefs) integer :: exc(0:2, 2, 2) logical :: lbanned(mo_tot_num*2) logical :: ok, mono, ab + integer :: tmp_array(4) lbanned = banned !mat = 0d0 @@ -1145,12 +1154,14 @@ subroutine get_d1(gen, banned, banned_pair, mat, mask, pwen, coefs) exc(1, 1, 2) = p(a1) exc(1, 2, sfix) = pfix - call apply_particle(mask, (/0, 0 ,s(i), p(i) /), deth, ok, N_int) + tmp_array = (/0, 0 ,s(i), p(i) /) + call apply_particle(mask, tmp_array, deth, ok, N_int) do j=1,mo_tot_num mwen = j + (sm-1)*mo_tot_num if(lbanned(mwen)) cycle - call apply_particle(deth, (/0,0,sm,j/), det, ok, N_int) + tmp_array = (/0,0,sm,j/) + call apply_particle(deth, tmp_array, det, ok, N_int) if(.not. ok) cycle mono = mwen == pwen(a1) .or. mwen == pwen(a2) @@ -1193,13 +1204,14 @@ subroutine get_d1(gen, banned, banned_pair, mat, mask, pwen, coefs) exc(1, 1, sp) = min(h1, h2) exc(2, 1, sp) = max(h1, h2) - call apply_particle(mask, (/0, 0 ,s(i), p(i) /), deth, ok, N_int) + tmp_array = (/0, 0 ,s(i), p(i) /) + call apply_particle(mask, tmp_array, deth, ok, N_int) do j=1,mo_tot_num if(j == pfix) inv = -inv mwen = j + (sm-1)*mo_tot_num if(lbanned(mwen)) cycle - call apply_particle(deth, (/0,0,sm,j/), det, ok, N_int) + call apply_particle(deth, tmp_array, det, ok, N_int) if(.not. ok) cycle mono = mwen == pwen(a1) .or. mwen == pwen(a2) @@ -1245,6 +1257,7 @@ subroutine get_d0(gen, banned, banned_pair, mat, mask, s1, s2, h1, h2, coefs) integer :: p1, p2, hmi, hma, ns1, ns2, st logical, external :: detEq integer :: exc(0:2, 2, 2), exc2(0:2,2,2) + integer :: tmp_array(4) exc = 0 ! mat_mwen = integral8(:,:,h1,h2) @@ -1268,7 +1281,8 @@ subroutine get_d0(gen, banned, banned_pair, mat, mask, s1, s2, h1, h2, coefs) if(banned(p1 + ns1)) cycle if(p1 == p2) cycle if(banned_pair(p1 + ns1, p2 + ns2)) cycle - call apply_particle(mask, (/s1,p1,s2,p2/), det2, ok, N_int) + tmp_array = (/s1,p1,s2,p2/) + call apply_particle(mask, tmp_array, det2, ok, N_int) if(.not. ok) cycle mono = (hmi == p1 .or. hma == p2 .or. hmi == p2 .or. hma == p1) if(mono) then @@ -1299,7 +1313,8 @@ subroutine get_d0(gen, banned, banned_pair, mat, mask, s1, s2, h1, h2, coefs) do p1=1, mo_tot_num if(banned(p1 + ns1)) cycle if(banned_pair(p1 + ns1, p2 + ns2)) cycle - call apply_particle(mask, (/s1,p1,s2,p2/), det2, ok, N_int) + tmp_array = (/s1,p1,s2,p2/) + call apply_particle(mask, tmp_array, det2, ok, N_int) if(.not. ok) cycle mono = (h1 == p1 .or. h2 == p2) if(mono) then diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index a09e1bf2..bdb76db4 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -40,22 +40,7 @@ subroutine run_wf call wait_for_state(zmq_state,state) if(trim(state) /= 'selection') exit print *, 'Getting wave function' - call zmq_get_psi(zmq_to_qp_run_socket,1,energy,size(energy)) - integer :: j,k - do j=1,N_states_diag - do k=1,N_det - CI_eigenvectors(k,j) = psi_coef(k,j) - enddo - call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),CI_eigenvectors_s2(j)) - enddo - if (.True.) then - do k=1,size(ci_electronic_energy) - ci_electronic_energy(k) = energy(k) - enddo - TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors - endif - - call write_double(6,ci_energy,'Energy') + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag) integer :: rc, i @@ -64,16 +49,39 @@ subroutine run_wf !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() - call selection_dressing_slave_tcp(i) + call selection_dressing_slave_tcp(i, energy) !$OMP END PARALLEL end do end - -subroutine selection_dressing_slave_tcp(i) +subroutine update_energy(energy) implicit none - integer, intent(in) :: i + double precision, intent(in) :: energy(N_states_diag) + BEGIN_DOC +! Update energy when it is received from ZMQ + END_DOC + integer :: j,k + do j=1,N_states_diag + do k=1,N_det + CI_eigenvectors(k,j) = psi_coef(k,j) + enddo + call get_s2_u0(psi_det,CI_eigenvectors(1,j),N_det,size(CI_eigenvectors,1),CI_eigenvectors_s2(j)) + enddo + if (.True.) then + do k=1,size(ci_electronic_energy) + ci_electronic_energy(k) = energy(k) + enddo + TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors + endif - call selection_slaved(0,i) + call write_double(6,ci_energy,'Energy') +end + +subroutine selection_dressing_slave_tcp(i,energy) + implicit none + double precision, intent(in) :: energy(N_states_diag) + integer, intent(in) :: i + + call selection_slaved(0,i,energy) end diff --git a/plugins/Selectors_full/selectors.irp.f b/plugins/Selectors_full/selectors.irp.f index 826dcc4b..6fbad9ec 100644 --- a/plugins/Selectors_full/selectors.irp.f +++ b/plugins/Selectors_full/selectors.irp.f @@ -48,7 +48,21 @@ END_PROVIDER enddo END_PROVIDER - BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] +BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ] + implicit none + BEGIN_DOC + ! Transposed psi_selectors + END_DOC + integer :: i,k + + do i=1,N_det_selectors + do k=1,N_states + psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) + enddo + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ] implicit none BEGIN_DOC ! Diagonal elements of the H matrix for each selectors @@ -58,6 +72,6 @@ END_PROVIDER do i = 1, N_det_selectors psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int) enddo - END_PROVIDER +END_PROVIDER diff --git a/src/ZMQ/utils.irp.f b/src/ZMQ/utils.irp.f index 9fa70d5e..e5a9f8ef 100644 --- a/src/ZMQ/utils.irp.f +++ b/src/ZMQ/utils.irp.f @@ -145,12 +145,12 @@ function new_zmq_to_qp_run_socket() rc = f77_zmq_setsockopt(new_zmq_to_qp_run_socket, ZMQ_SNDTIMEO, 120000, 4) if (rc /= 0) then - stop 'Unable to set send timout in new_zmq_to_qp_run_socket' + stop 'Unable to set send timeout in new_zmq_to_qp_run_socket' endif rc = f77_zmq_setsockopt(new_zmq_to_qp_run_socket, ZMQ_RCVTIMEO, 120000, 4) if (rc /= 0) then - stop 'Unable to set recv timout in new_zmq_to_qp_run_socket' + stop 'Unable to set recv timeout in new_zmq_to_qp_run_socket' endif rc = f77_zmq_connect(new_zmq_to_qp_run_socket, trim(qp_run_address)//':'//trim(zmq_port(0))) @@ -247,7 +247,12 @@ function new_zmq_pull_socket() stop 'Unable to set ZMQ_LINGER on pull socket' endif - rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,1000,4) + rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVBUF,100000000,4) + if (rc /= 0) then + stop 'Unable to set ZMQ_RCVBUF on pull socket' + endif + + rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,1,4) if (rc /= 0) then stop 'Unable to set ZMQ_RCVHWM on pull socket' endif @@ -295,11 +300,16 @@ function new_zmq_push_socket(thread) stop 'Unable to set ZMQ_LINGER on push socket' endif - rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,1000,4) + rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,1,4) if (rc /= 0) then stop 'Unable to set ZMQ_SNDHWM on push socket' endif + rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDBUF,100000000,4) + if (rc /= 0) then + stop 'Unable to set ZMQ_RCVBUF on push socket' + endif + rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_IMMEDIATE,1,4) if (rc /= 0) then stop 'Unable to set ZMQ_IMMEDIATE on push socket' @@ -347,6 +357,11 @@ function new_zmq_sub_socket() stop 'Unable to set timeout in new_zmq_sub_socket' endif + rc = f77_zmq_setsockopt(new_zmq_sub_socket,ZMQ_CONFLATE,1,4) + if (rc /= 0) then + stop 'Unable to set conflate in new_zmq_sub_socket' + endif + rc = f77_zmq_setsockopt(new_zmq_sub_socket,ZMQ_SUBSCRIBE,"",0) if (rc /= 0) then stop 'Unable to subscribe new_zmq_sub_socket' @@ -431,7 +446,7 @@ subroutine end_zmq_pull_socket(zmq_socket_pull) ! stop 'error' ! endif - call sleep(1) ! see https://github.com/zeromq/libzmq/issues/1922 +! call sleep(1) ! see https://github.com/zeromq/libzmq/issues/1922 ! rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,10000,4) ! if (rc /= 0) then From 33f5f44fe57d6aa22eaa062fff79611016cfb33d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Aug 2016 13:45:17 +0200 Subject: [PATCH 07/11] Refactoring MRCC --- plugins/MRCC_Utils/mrcc_utils.irp.f | 392 ++++++++++++++-------------- 1 file changed, 200 insertions(+), 192 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 315006ff..85fd40e8 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -225,6 +225,7 @@ logical function is_generable(det1, det2, Nint) integer, external :: searchExc logical, external :: excEq double precision :: phase + integer*2 :: tmp_array(4) is_generable = .false. call get_excitation(det1, det2, exc, degree, phase, Nint) @@ -247,19 +248,20 @@ logical function is_generable(det1, det2, Nint) end if if(h1 + (s1-1)*mo_tot_num < h2 + (s2-1)*mo_tot_num) then - f = searchExc(hh_exists(1,1), (/s1, h1, s2, h2/), hh_shortcut(0)) + tmp_array = (/s1, h1, s2, h2/) else - f = searchExc(hh_exists(1,1), (/s2, h2, s1, h1/), hh_shortcut(0)) + tmp_array = (/s2, h2, s1, h1/) end if - if(f == -1) return - + f = searchExc(hh_exists(1,1), tmp_array, hh_shortcut(0)) + if(p1 + (s1-1)*mo_tot_num < p2 + (s2-1)*mo_tot_num) then - f = searchExc(pp_exists(1,hh_shortcut(f)), (/s1, p1, s2, p2/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s1, p1, s2, p2/) else - f = searchExc(pp_exists(1,hh_shortcut(f)), (/s2, p2, s1, p1/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s2, p2, s1, p1/) end if - - if(f /= -1) is_generable = .true. + f = searchExc(pp_exists(1,hh_shortcut(f)), tmp_array, hh_shortcut(f+1)-hh_shortcut(f)) + + is_generable = (f /= -1) end function @@ -542,187 +544,189 @@ END_PROVIDER END_PROVIDER -BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] - implicit none - logical :: ok - integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row - integer, external :: searchDet, unsortedSearchDet - integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) - integer :: N, INFO, AtA_size, r1, r2 - double precision , allocatable:: B(:), AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) - double precision :: t, norm, cx - integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) - - - - nex = hh_shortcut(hh_shortcut(0)+1)-1 - print *, "TI", nex, N_det_non_ref - allocate(A_ind(N_det_ref+1, nex), A_val(N_det_ref+1, nex)) - allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! - allocate(x(nex), AtB(nex)) - allocate(A_val_mwen(nex), A_ind_mwen(nex)) - allocate(N_col(nex), col_shortcut(nex), B(N_det_non_ref)) - allocate (x_new(nex)) - - do s = 1, N_states - - A_val = 0d0 - A_ind = 0 - AtA_ind = 0 - AtA_val = 0d0 - x = 0d0 - AtB = 0d0 - A_val_mwen = 0d0 - A_ind_mwen = 0 - N_col = 0 - col_shortcut = 0 - B = 0d0 - x_new = 0d0 - - !$OMP PARALLEL DO schedule(static,10) default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind) & - !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref) & - !$OMP private(lref, pp, II, ok, myMask, myDet, ind, wk) - do hh = 1, hh_shortcut(0) - do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 - allocate(lref(N_det_non_ref)) - lref = 0 - do II = 1, N_det_ref - call apply_hole(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) - if(.not. ok) cycle - call apply_particle(myMask, pp_exists(1, pp), myDet, ok, N_int) - if(.not. ok) cycle - ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) - if(ind /= -1) then - lref(psi_non_ref_sorted_idx(ind)) = II - end if - end do - wk = 0 - do i=1, N_det_non_ref - if(lref(i) /= 0) then - wk += 1 - A_val(wk, pp) = psi_ref_coef(lref(i), s) - A_ind(wk, pp) = i - end if - end do - deallocate(lref) - end do - end do - !$OMP END PARALLEL DO - - AtB = 0d0 - AtA_size = 0 - wk = 0 - col_shortcut = 0 - N_col = 0 - !$OMP PARALLEL DO schedule(dynamic, 100) default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref) & - !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen) & - !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) - do at_row = 1, nex - wk = 0 - if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex - do i=1,N_det_ref - if(A_ind(i, at_row) == 0) exit - AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) - end do - do a_col = 1, nex - t = 0d0 - r1 = 1 - r2 = 1 - do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) - if(A_ind(r1, at_row) < A_ind(r2, a_col)) then - r1 += 1 - else if(A_ind(r1, at_row) > A_ind(r2, a_col)) then - r2 += 1 - else - t = t - A_val(r1, at_row) * A_val(r2, a_col) - r1 += 1 - r2 += 1 - end if - end do - - if(a_col == at_row) then - t = (t + 1d0) - end if - if(t /= 0d0) then - wk += 1 - A_ind_mwen(wk) = a_col - A_val_mwen(wk) = t - end if - end do - - if(wk /= 0) then - !$OMP CRITICAL - col_shortcut(at_row) = AtA_size+1 - N_col(at_row) = wk - AtA_ind(AtA_size+1:AtA_size+wk) = A_ind_mwen(:wk) - AtA_val(AtA_size+1:AtA_size+wk) = A_val_mwen(:wk) - AtA_size += wk - !$OMP END CRITICAL - end if - end do - - x = AtB - if(AtA_size > size(AtA_val)) stop "SIZA" - print *, "ATA SIZE", ata_size - integer :: iproc, omp_get_thread_num - iproc = omp_get_thread_num() - do i=1,nex - x_new(i) = 0.D0 - enddo - - do k=0,100000 - !$OMP PARALLEL DO default(shared) - do i=1,nex - x_new(i) = AtB(i) - enddo - - !$OMP PARALLEL DO default(shared) private(cx, i) - do a_col = 1, nex - cx = 0d0 - do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 - cx += x(AtA_ind(i)) * AtA_val(i) + BEGIN_PROVIDER [ double precision, dIj_unique, (hh_shortcut(hh_shortcut(0)+1)-1, N_states) ] +&BEGIN_PROVIDER [ double precision, rho_mrcc, (N_det_non_ref, N_states) ] + implicit none + logical :: ok + integer :: i, j, k, s, II, pp, hh, ind, wk, nex, a_col, at_row + integer, external :: searchDet, unsortedSearchDet + integer(bit_kind) :: myDet(N_int, 2), myMask(N_int, 2) + integer :: N, INFO, AtA_size, r1, r2 + double precision , allocatable :: AtB(:), AtA_val(:), A_val(:,:), x(:), x_new(:), A_val_mwen(:) + double precision :: t, norm, cx, res + integer, allocatable :: A_ind(:,:), lref(:), AtA_ind(:), A_ind_mwen(:), col_shortcut(:), N_col(:) + + + + nex = hh_shortcut(hh_shortcut(0)+1)-1 + print *, "TI", nex, N_det_non_ref + allocate(A_ind(N_det_ref+1, nex), A_val(N_det_ref+1, nex)) + allocate(AtA_ind(N_det_ref * nex), AtA_val(N_det_ref * nex)) !!!!! MAY BE TOO SMALL ? !!!!!!!! + allocate(x(nex), AtB(nex)) + allocate(A_val_mwen(nex), A_ind_mwen(nex)) + allocate(N_col(nex), col_shortcut(nex)) + allocate (x_new(nex)) + + do s = 1, N_states + + A_val = 0d0 + A_ind = 0 + AtA_ind = 0 + AtA_val = 0d0 + x = 0d0 + A_val_mwen = 0d0 + A_ind_mwen = 0 + N_col = 0 + col_shortcut = 0 + + !$OMP PARALLEL DO schedule(static,10) default(none) shared(psi_non_ref, hh_exists, pp_exists, N_int, A_val, A_ind)& + !$OMP shared(s, hh_shortcut, psi_ref_coef, N_det_non_ref, psi_non_ref_sorted, psi_non_ref_sorted_idx, psi_ref, N_det_ref)& + !$OMP private(lref, pp, II, ok, myMask, myDet, ind, wk) + do hh = 1, hh_shortcut(0) + do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1 + allocate(lref(N_det_non_ref)) + lref = 0 + do II = 1, N_det_ref + call apply_hole(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int) + if(.not. ok) cycle + call apply_particle(myMask, pp_exists(1, pp), myDet, ok, N_int) + if(.not. ok) cycle + ind = searchDet(psi_non_ref_sorted(1,1,1), myDet(1,1), N_det_non_ref, N_int) + if(ind /= -1) then + lref(psi_non_ref_sorted_idx(ind)) = II + end if + end do + wk = 0 + do i=1, N_det_non_ref + if(lref(i) /= 0) then + wk += 1 + A_val(wk, pp) = psi_ref_coef(lref(i), s) + A_ind(wk, pp) = i + end if + end do + deallocate(lref) + end do end do - x_new(a_col) += cx + !$OMP END PARALLEL DO + + AtB = 0d0 + AtA_size = 0 + wk = 0 + col_shortcut = 0 + N_col = 0 + !$OMP PARALLEL DO schedule(dynamic, 100) default(none) shared(k, psi_non_ref_coef, A_ind, A_val, x, N_det_ref, nex, N_det_non_ref)& + !$OMP private(at_row, a_col, t, i, r1, r2, wk, A_ind_mwen, A_val_mwen)& + !$OMP shared(col_shortcut, N_col, AtB, AtA_size, AtA_val, AtA_ind, s) + do at_row = 1, nex + wk = 0 + if(mod(at_row, 10000) == 0) print *, "AtA", at_row, "/", nex + do i=1,N_det_ref + if(A_ind(i, at_row) == 0) exit + AtB(at_row) = AtB(at_row) + psi_non_ref_coef(A_ind(i, at_row), s) * A_val(i, at_row) + end do + do a_col = 1, nex + t = 0d0 + r1 = 1 + r2 = 1 + do while ((A_ind(r1, at_row) /= 0).and.(A_ind(r2, a_col) /= 0)) + if(A_ind(r1, at_row) < A_ind(r2, a_col)) then + r1 += 1 + else if(A_ind(r1, at_row) > A_ind(r2, a_col)) then + r2 += 1 + else + t = t - A_val(r1, at_row) * A_val(r2, a_col) + r1 += 1 + r2 += 1 + end if + end do + + if(a_col == at_row) then + t = (t + 1d0) + end if + if(t /= 0d0) then + wk += 1 + A_ind_mwen(wk) = a_col + A_val_mwen(wk) = t + end if + end do + + if(wk /= 0) then + !$OMP CRITICAL + col_shortcut(at_row) = AtA_size+1 + N_col(at_row) = wk + AtA_ind(AtA_size+1:AtA_size+wk) = A_ind_mwen(:wk) + AtA_val(AtA_size+1:AtA_size+wk) = A_val_mwen(:wk) + AtA_size += wk + !$OMP END CRITICAL + end if + end do + + if(AtA_size > size(AtA_val)) stop "SIZA" + print *, "ATA SIZE", ata_size + integer :: iproc, omp_get_thread_num + iproc = omp_get_thread_num() + do i=1,nex + x(i) = AtB(i) + enddo + + do k=0,100000 + !$OMP PARALLEL default(shared) private(cx, i, j, a_col) + + !$OMP DO + do i=1,N_det_non_ref + rho_mrcc(i,s) = 0.d0 + enddo + !$OMP END DO + + !$OMP DO + do a_col = 1, nex + cx = 0d0 + do i=col_shortcut(a_col), col_shortcut(a_col) + N_col(a_col) - 1 + cx = cx + x(AtA_ind(i)) * AtA_val(i) + end do + x_new(a_col) = AtB(a_col) + cx + end do + !$OMP END DO + + !$OMP END PARALLEL + + res = 0.d0 + do a_col=1,nex + do j=1,N_det_non_ref + i = A_ind(j,a_col) + if (i==0) exit + rho_mrcc(i,s) = rho_mrcc(i,s) + A_val(j,a_col) * X_new(a_col) + enddo + res = res + (X_new(a_col) - X(a_col))**2 + X(a_col) = X_new(a_col) + end do + + if(mod(k, 100) == 0) then + print *, "residu ", k, res + end if + + if(res < 1d-16) exit + end do + + norm = 0.d0 + do i=1,N_det_non_ref + norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) + enddo + do i=1,N_det_ref + norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) + enddo + + print *, k, "res : ", res, "norm : ", sqrt(norm) + + dIj_unique(:size(X), s) = X(:) + + do i=1,N_det_non_ref + rho_mrcc(i,s) = psi_non_ref_coef(i,s) / rho_mrcc(i,s) + enddo + end do - !$OMP END PARALLEL DO - double precision :: norm_cas - norm_cas = 0d0 - do i = 1, N_det_ref - norm_cas += psi_ref_coef(i,s)**2 - end do - - norm = 0d0 - t = 0d0 - - do j=1, size(X) - t = t + X_new(j) * X_new(j) - end do - - - t = (1d0 - norm_cas ) / t - x_new = x_new * sqrt(t) - - do j=1, size(X) - norm += (X_new(j) - X(j))**2 - x(j) = x_new(j) - end do - - - if(mod(k, 100) == 0) then - print *, "residu ", k, norm, "norm t", sqrt(t) - end if - - if(norm < 1d-16) exit - end do - print *, "CONVERGENCE : ", norm - - dIj_unique(:size(X), s) = X(:) - - - end do - - - print *, "done" + + print *, "done" END_PROVIDER @@ -767,6 +771,7 @@ double precision function get_dij(det1, det2, s, Nint) integer, external :: searchExc logical, external :: excEq double precision :: phase + integer*2 :: tmp_array(4) get_dij = 0d0 call get_excitation(det1, det2, exc, degree, phase, Nint) @@ -787,18 +792,21 @@ double precision function get_dij(det1, det2, s, Nint) end if if(h1 + (s1-1)*mo_tot_num < h2 + (s2-1)*mo_tot_num) then - f = searchExc(hh_exists(1,1), (/s1, h1, s2, h2/), hh_shortcut(0)) + tmp_array = (/s1, h1, s2, h2/) else - f = searchExc(hh_exists(1,1), (/s2, h2, s1, h1/), hh_shortcut(0)) + tmp_array = (/s2, h2, s1, h1/) end if + f = searchExc(hh_exists(1,1), tmp_array, hh_shortcut(0)) + if(f == -1) return if(p1 + (s1-1)*mo_tot_num < p2 + (s2-1)*mo_tot_num) then - t = searchExc(pp_exists(1,hh_shortcut(f)), (/s1, p1, s2, p2/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s1, p1, s2, p2/) else - t = searchExc(pp_exists(1,hh_shortcut(f)), (/s2, p2, s1, p1/), hh_shortcut(f+1)-hh_shortcut(f)) + tmp_array = (/s2, p2, s1, p1/) end if - + t = searchExc(pp_exists(1,hh_shortcut(f)), tmp_array, hh_shortcut(f+1)-hh_shortcut(f)) + if(t /= -1) then get_dij = dIj_unique(t - 1 + hh_shortcut(f), s) end if From cfaf2e14e09c3436c7fda0e50528cce7755b3547 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Aug 2016 14:03:34 +0200 Subject: [PATCH 08/11] Added rho in MRCC-nolambda --- plugins/MRCC_Utils/mrcc_utils.irp.f | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 85fd40e8..4b6203cb 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -663,8 +663,6 @@ END_PROVIDER if(AtA_size > size(AtA_val)) stop "SIZA" print *, "ATA SIZE", ata_size - integer :: iproc, omp_get_thread_num - iproc = omp_get_thread_num() do i=1,nex x(i) = AtB(i) enddo @@ -705,7 +703,7 @@ END_PROVIDER print *, "residu ", k, res end if - if(res < 1d-16) exit + if(res < 1d-10) exit end do norm = 0.d0 @@ -754,6 +752,7 @@ double precision function get_dij_index(II, i, s, Nint) if(lambda_type == 0) then get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) + get_dij_index = get_dij_index * rho_mrcc(i,s) else call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) get_dij_index = HIi * lambda_mrcc(s, i) From 76f2953a02d433e8cea356b7429af11ef2a8e7e0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Aug 2016 17:23:39 +0200 Subject: [PATCH 09/11] Select doubles before singles in zmq --- plugins/Full_CI_ZMQ/selection.irp.f | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 5af0e206..13dcdc26 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -29,7 +29,7 @@ subroutine selection_slaved(thread,iproc,energy) integer(ZMQ_PTR), external :: new_zmq_push_socket integer(ZMQ_PTR) :: zmq_socket_push - type(selection_buffer) :: buf + type(selection_buffer) :: buf, buf2 logical :: done double precision :: pt2(N_states) @@ -56,7 +56,9 @@ subroutine selection_slaved(thread,iproc,energy) integer :: i_generator, N read (task,*) i_generator, N if(buf%N == 0) then + ! Only first time call create_selection_buffer(N, N*2, buf) + call create_selection_buffer(N, N*3, buf2) else if(N /= buf%N) stop "N changed... wtf man??" end if @@ -72,10 +74,14 @@ subroutine selection_slaved(thread,iproc,energy) end do if(ctask > 0) then call push_selection_results(zmq_socket_push, pt2, buf, task_id(1), ctask) + do i=1,buf%cur + call add_to_selection_buffer(buf2, buf%det(1,1,i), buf%val(i)) + enddo + call sort_selection_buffer(buf2) + buf%mini = buf2%mini pt2 = 0d0 buf%cur = 0 end if - ctask = 0 end if @@ -185,8 +191,8 @@ subroutine select_connected(i_generator,E0,pt2,b) particle_mask(k,:) = hole_mask(k,:) enddo - call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) call select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) + call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b) enddo end @@ -256,7 +262,7 @@ subroutine sort_selection_buffer(b) b%det(:,:,nmwen+1:) = 0_bit_kind b%val(:nmwen) = vals(:) b%val(nmwen+1:) = 0d0 - b%mini = dabs(b%val(b%N)) + b%mini = max(b%mini,dabs(b%val(b%N))) b%cur = nmwen end subroutine @@ -545,10 +551,13 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p do ispin1=1,2 do ispin2=1,ispin1 integer :: i_hole1, i_hole2, j_hole, k_hole - do i1=1, N_holes(ispin1) - ib = 1 - if(ispin1 == ispin2) ib = i1+1 - do i2=ib, N_holes(ispin2) + do i1=N_holes(ispin1),1,-1 ! Generate low excitations first + if(ispin1 == ispin2) then + ib = i1+1 + else + ib = 1 + endif + do i2=N_holes(ispin2),ib,-1 ! Generate low excitations first ion_det(:,:) = psi_det_generators(:,:,i_generator) i_hole1 = hole_list(i1,ispin1) @@ -721,7 +730,8 @@ subroutine select_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,p else e_pert(k) = 0.5d0 * ( dsqrt(delta_E * delta_E + 4.d0 * i_H_psi_value(k) * i_H_psi_value(k)) - delta_E) endif - if(dabs(e_pert(k)) > dabs(e_pertm)) e_pertm = e_pert(k) + e_pertm += dabs(e_pert(k)) +! if(dabs(e_pert(k)) > dabs(e_pertm)) e_pertm = e_pert(k) pt2(k) += e_pert(k) enddo if(dabs(e_pertm) > dabs(buf%mini)) then From a6fa9ddd888c5c765dbcfcab2246ba2740c1647f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 2 Aug 2016 17:44:41 +0200 Subject: [PATCH 10/11] Accelerated queue --- ocaml/Queuing_system.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ocaml/Queuing_system.ml b/ocaml/Queuing_system.ml index 78abb185..29a60538 100644 --- a/ocaml/Queuing_system.ml +++ b/ocaml/Queuing_system.ml @@ -9,6 +9,7 @@ type t = clients : Id.Client.t Set.Poly.t; next_client_id : Id.Client.t; next_task_id : Id.Task.t; + number_of_queued : int; } @@ -20,6 +21,7 @@ let create () = clients = Set.Poly.empty; next_client_id = Id.Client.of_int 1; next_task_id = Id.Task.of_int 1; + number_of_queued = 0; } @@ -33,6 +35,7 @@ let add_task ~task q = queued = task_id :: q.queued ; tasks = Map.add q.tasks ~key:task_id ~data:task ; next_task_id = Id.Task.increment task_id ; + number_of_queued = q.number_of_queued + 1; } @@ -59,6 +62,7 @@ let pop_task ~client_id q = { q with queued = new_queue ; running = Map.add running ~key:task_id ~data:client_id ; + number_of_queued = q.number_of_queued - 1; } in new_q, Some task_id, (Map.find q.tasks task_id) | [] -> q, None, None @@ -103,7 +107,7 @@ let number q = Map.length q.tasks let number_of_queued q = - List.length q.queued + q.number_of_queued let number_of_running q = Map.length q.running From ff2dcf069ecfb908b9e9ed72387906885f893bf6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 3 Aug 2016 00:18:53 +0200 Subject: [PATCH 11/11] Multiple generators per task --- plugins/Full_CI_ZMQ/fci_zmq.irp.f | 12 ++++++++-- plugins/Full_CI_ZMQ/selection.irp.f | 8 ++++--- plugins/Full_CI_ZMQ/selection_slave.irp.f | 2 -- plugins/MRCC_Utils/mrcc_utils.irp.f | 28 ++++++++++++++++++++--- plugins/mrcepa0/dressing.irp.f | 2 +- 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f index 02461ee3..0577a408 100644 --- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f +++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f @@ -132,8 +132,16 @@ subroutine ZMQ_selection(N, pt2) call zmq_put_psi(zmq_to_qp_run_socket,1,ci_electronic_energy,size(ci_electronic_energy)) call zmq_set_running(zmq_to_qp_run_socket) call create_selection_buffer(N, N*2, b) - do i= N_det_generators, 1, -1 - write(task,*) i, N + + integer :: i_generator, i_generator_start, i_generator_max, step +! step = int(max(1.,10*elec_num/mo_tot_num) + + step = int(10000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num )) + step = max(1,step) + do i= N_det_generators, 1, -step + i_generator_start = max(i-step+1,1) + i_generator_max = i + write(task,*) i_generator_start, i_generator_max, 1, N call add_task_to_taskserver(zmq_to_qp_run_socket,task) end do diff --git a/plugins/Full_CI_ZMQ/selection.irp.f b/plugins/Full_CI_ZMQ/selection.irp.f index 13dcdc26..b31e1977 100644 --- a/plugins/Full_CI_ZMQ/selection.irp.f +++ b/plugins/Full_CI_ZMQ/selection.irp.f @@ -53,8 +53,8 @@ subroutine selection_slaved(thread,iproc,energy) if (done) then ctask = ctask - 1 else - integer :: i_generator, N - read (task,*) i_generator, N + integer :: i_generator, i_generator_start, i_generator_max, step, N + read (task,*) i_generator_start, i_generator_max, step, N if(buf%N == 0) then ! Only first time call create_selection_buffer(N, N*2, buf) @@ -64,7 +64,9 @@ subroutine selection_slaved(thread,iproc,energy) end if !print *, "psi_selectors_coef ", psi_selectors_coef(N_det_selectors-5:N_det_selectors, 1) !call debug_det(psi_selectors(1,1,N_det_selectors), N_int) - call select_connected(i_generator,energy,pt2,buf) + do i_generator=i_generator_start,i_generator_max,step + call select_connected(i_generator,energy,pt2,buf) + enddo endif if(done .or. ctask == size(task_id)) then diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index bdb76db4..7eeae798 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -25,9 +25,7 @@ subroutine run_wf integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states_diag) character*(64) :: state - integer :: oki - oki = 0 call provide_everything zmq_context = f77_zmq_ctx_new () diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 4b6203cb..1d12d569 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -718,10 +718,32 @@ END_PROVIDER dIj_unique(:size(X), s) = X(:) - do i=1,N_det_non_ref - rho_mrcc(i,s) = psi_non_ref_coef(i,s) / rho_mrcc(i,s) + norm = 0.d0 + do i=1,N_det_ref + norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) enddo - + double precision :: f + do i=1,N_det_non_ref + f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) + ! Avoid numerical instabilities + f = min(f, 3.d0) + f = max(f,-3.d0) +! norm = norm + (psi_non_ref_coef(i,s) * f * rho_mrcc(i,s)) + norm = norm + ( f * rho_mrcc(i,s))**2 + rho_mrcc(i,s) = f + enddo + + print *, ' = ', norm + + f = 1.d0/norm + norm = 0.d0 + do i=1,N_det_ref + norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) + enddo + do i=1,N_det_non_ref + rho_mrcc(i,s) = rho_mrcc(i,s) * f + enddo + end do print *, "done" diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 3a91f42e..88cd2c85 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -23,7 +23,7 @@ use bitmasks do gen= 1, N_det_generators allocate(buf(N_int, 2, N_det_non_ref)) iproc = omp_get_thread_num() + 1 - if(mod(gen, 10) == 0) print *, "mrcc ", gen, "/", N_det_generators + if(mod(gen, 1000) == 0) print *, "mrcc ", gen, "/", N_det_generators do h=1, hh_shortcut(0) call apply_hole(psi_det_generators(1,1,gen), hh_exists(1, h), mask, ok, N_int) if(.not. ok) cycle