From feb9752ecb3da8bd771563d8ccf544218a6fa9d4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 1 Mar 2017 01:19:17 +0100 Subject: [PATCH] Accelerated distributed Davidson --- ocaml/qptypes_generator.ml | 8 +- .../selection_davidson_slave.irp.f | 5 +- src/Davidson/davidson_parallel.irp.f | 82 +++++++++++++------ src/Davidson/davidson_slave.irp.f | 6 +- src/Davidson/diagonalization_hs2.irp.f | 7 +- src/Davidson/u0Hu0.irp.f | 8 +- 6 files changed, 76 insertions(+), 40 deletions(-) diff --git a/ocaml/qptypes_generator.ml b/ocaml/qptypes_generator.ml index ee988ccb..160a07d0 100644 --- a/ocaml/qptypes_generator.ml +++ b/ocaml/qptypes_generator.ml @@ -42,8 +42,8 @@ let input_data = " * Det_number_max : int assert (x > 0) ; - if (x > 100000000) then - warning \"More than 100 million determinants\"; + if (x > 10000000000) then + warning \"More than 10 billion determinants\"; * States_number : int assert (x > 0) ; @@ -140,8 +140,8 @@ let input_ezfio = " * Det_number : int determinants_n_det - 1 : 100000000 - More than 100 million of determinants + 1 : 10000000000 + More than 10 billion of determinants " ;; diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f index d56df13e..58f005bc 100644 --- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f @@ -25,6 +25,7 @@ subroutine run_wf double precision :: energy(N_states) character*(64) :: states(2) integer :: rc, i + logical :: force_update call provide_everything @@ -33,6 +34,7 @@ subroutine run_wf states(2) = 'davidson' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + force_update = .True. do @@ -62,7 +64,8 @@ subroutine run_wf ! -------- print *, 'Davidson' - call davidson_miniserver_get() + call davidson_miniserver_get(force_update) + force_update = .False. !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() call davidson_slave_tcp(i) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 4ff3af03..b4b79585 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -145,32 +145,35 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t) end subroutine -subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st) +subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st,update_dets) use f77_zmq implicit none integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket - integer, intent(in) :: n0,n, n_st + integer, intent(in) :: n0,n, n_st, update_dets double precision, intent(in) :: u(n0,n_st) integer :: i,k - dav_size = n - touch dav_size - - do i=1,n - do k=1,N_int - dav_det(k,1,i) = psi_det(k,1,i) - dav_det(k,2,i) = psi_det(k,2,i) + if (update_dets == 1) then + dav_size = n + touch dav_size + do i=1,dav_size + do k=1,N_int + dav_det(k,1,i) = psi_det(k,1,i) + dav_det(k,2,i) = psi_det(k,2,i) + enddo enddo - enddo + touch dav_det + endif + do i=1,n do k=1,n_st dav_ut(k,i) = u(i,k) enddo enddo - touch dav_det dav_ut + soft_touch dav_ut call new_parallel_job(zmq_to_qp_run_socket,"davidson") end subroutine @@ -454,9 +457,10 @@ end subroutine -subroutine davidson_miniserver_run() +subroutine davidson_miniserver_run(update_dets) use f77_zmq implicit none + integer update_dets integer(ZMQ_PTR) responder character*(64) address character(len=:), allocatable :: buffer @@ -465,18 +469,23 @@ subroutine davidson_miniserver_run() allocate (character(len=20) :: buffer) address = 'tcp://*:11223' + PROVIDE dav_det dav_ut dav_size + responder = f77_zmq_socket(zmq_context, ZMQ_REP) rc = f77_zmq_bind(responder,address) do rc = f77_zmq_recv(responder, buffer, 5, 0) - if (buffer(1:rc) /= 'end') then - rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE) - rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0) - else + if (buffer(1:rc) == 'end') then rc = f77_zmq_send (responder, "end", 3, 0) exit + else if (buffer(1:rc) == 'det') then + rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, 0) + else if (buffer(1:rc) == 'ut') then + rc = f77_zmq_send (responder, update_dets, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE) + rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0) endif enddo @@ -503,34 +512,41 @@ subroutine davidson_miniserver_end() end subroutine -subroutine davidson_miniserver_get() +subroutine davidson_miniserver_get(force_update) implicit none use f77_zmq - + logical, intent(in) :: force_update integer(ZMQ_PTR) requester character*(64) address character*(20) buffer - integer rc + integer rc, update_dets address = trim(qp_run_address)//':11223' requester = f77_zmq_socket(zmq_context, ZMQ_REQ) rc = f77_zmq_connect(requester,address) - rc = f77_zmq_send(requester, "Hello", 5, 0) + rc = f77_zmq_send(requester, 'ut', 2, 0) + rc = f77_zmq_recv(requester, update_dets, 4, 0) rc = f77_zmq_recv(requester, dav_size, 4, 0) - TOUCH dav_size - rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) - rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) - TOUCH dav_det dav_ut - + if (update_dets == 1 .or. force_update) then + TOUCH dav_size + endif + rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0) + SOFT_TOUCH dav_ut + if (update_dets == 1 .or. force_update) then + rc = f77_zmq_send(requester, 'det', 3, 0) + rc = f77_zmq_recv(requester, dav_size, 4, 0) + rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0) + SOFT_TOUCH dav_det + endif + end subroutine BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ] -&BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ] use bitmasks implicit none BEGIN_DOC @@ -538,7 +554,19 @@ end subroutine ! ! Touched in davidson_miniserver_get END_DOC + integer :: i,k + dav_det = 0_bit_kind +END_PROVIDER + +BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ] + use bitmasks + implicit none + BEGIN_DOC +! Temporary arrays for parallel davidson +! +! Touched in davidson_miniserver_get + END_DOC dav_ut = -huge(1.d0) END_PROVIDER diff --git a/src/Davidson/davidson_slave.irp.f b/src/Davidson/davidson_slave.irp.f index e28712e2..4d0864e8 100644 --- a/src/Davidson/davidson_slave.irp.f +++ b/src/Davidson/davidson_slave.irp.f @@ -7,6 +7,7 @@ program davidson_slave integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states_diag) character*(64) :: state + logical :: force_update call provide_everything call switch_qp_run_to_master @@ -16,11 +17,12 @@ program davidson_slave state = 'Waiting' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() - + force_update = .True. do call wait_for_state(zmq_state,state) if(trim(state) /= "davidson") exit - call davidson_miniserver_get() + call davidson_miniserver_get(force_update) + force_update = .False. integer :: rc, i diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index 4b36e030..b50ede7c 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -110,7 +110,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2, itermax + integer :: shift, shift2, itermax, update_dets double precision :: r1, r2 logical :: state_ok(N_st_diag*davidson_sze_max) include 'constants.include.F' @@ -191,6 +191,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s ASSERT (Nint > 0) ASSERT (Nint == N_int) + update_dets = 1 + ! Davidson iterations ! =================== @@ -231,10 +233,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s if (distributed_davidson) then - call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8,update_dets) else call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) endif + update_dets = 0 ! Compute h_kl = = diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 2d1095cd..233919da 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -264,7 +264,7 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] END_PROVIDER -subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) +subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8,update_dets) use omp_lib use bitmasks use f77_zmq @@ -278,7 +278,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ ! ! S2_jj : array of END_DOC - integer, intent(in) :: N_st,n,Nint, sze_8 + integer, intent(in) :: N_st,n,Nint, sze_8, update_dets double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) double precision, intent(in) :: u_0(sze_8,N_st) double precision, intent(in) :: H_jj(n), S2_jj(n) @@ -309,7 +309,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ v_0 = 0.d0 s_0 = 0.d0 - call davidson_init(handler,u_0,size(u_0,1),n,N_st) + call davidson_init(handler,u_0,size(u_0,1),n,N_st,update_dets) ave_workload = 0.d0 do sh=1,shortcut_(0,1) @@ -369,7 +369,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_ call davidson_run(handler, v_0, s_0, size(v_0,1)) else if (ithread == 1 ) then !$OMP BARRIER - call davidson_miniserver_run () + call davidson_miniserver_run (update_dets) else !$OMP BARRIER call davidson_slave_inproc(ithread)