From 70d52c126a208ed1f36fea0ebb05b9aeb8dd5bd5 Mon Sep 17 00:00:00 2001 From: scemama Date: Wed, 24 May 2017 15:24:20 +0200 Subject: [PATCH] Larges messages for 100M --- plugins/Full_CI_ZMQ/selection_slave.irp.f | 36 +++++++++++++++-------- plugins/Selectors_full/zmq.irp.f | 6 ++-- src/Davidson/davidson_parallel.irp.f | 32 +++++++++++++++----- src/Davidson/u0Hu0.irp.f | 4 +-- 4 files changed, 53 insertions(+), 25 deletions(-) diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f index 92c6b775..ba85ca82 100644 --- a/plugins/Full_CI_ZMQ/selection_slave.irp.f +++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f @@ -5,7 +5,8 @@ program selection_slave END_DOC read_wf = .False. - SOFT_TOUCH read_wf + distributed_davidson = .False. + SOFT_TOUCH read_wf distributed_davidson call provide_everything call switch_qp_run_to_master call run_wf @@ -13,7 +14,7 @@ end subroutine provide_everything PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context - PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count + PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count end subroutine run_wf @@ -23,19 +24,21 @@ subroutine run_wf integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision :: energy(N_states) - character*(64) :: states(1) + character*(64) :: states(4) integer :: rc, i call provide_everything zmq_context = f77_zmq_ctx_new () states(1) = 'selection' + states(2) = 'davidson' + states(3) = 'pt2' zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() do - call wait_for_states(states,zmq_state,1) + call wait_for_states(states,zmq_state,3) if(trim(zmq_state) == 'Stopped') then @@ -51,21 +54,30 @@ subroutine run_wf !$OMP PARALLEL PRIVATE(i) i = omp_get_thread_num() - call selection_slave_tcp(i, energy) + call run_selection_slave(0,i,energy) !$OMP END PARALLEL print *, 'Selection done' + else if (trim(zmq_state) == 'pt2') then + + ! PT2 + ! --- + + print *, 'PT2' + call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states) + + logical :: lstop + lstop = .False. + !$OMP PARALLEL PRIVATE(i) + i = omp_get_thread_num() + call run_pt2_slave(0,i,energy,lstop) + !$OMP END PARALLEL + print *, 'PT2 done' + endif end do end -subroutine selection_slave_tcp(i,energy) - implicit none - double precision, intent(in) :: energy(N_states) - integer, intent(in) :: i - - call run_selection_slave(0,i,energy) -end diff --git a/plugins/Selectors_full/zmq.irp.f b/plugins/Selectors_full/zmq.irp.f index ca3681cb..420f0376 100644 --- a/plugins/Selectors_full/zmq.irp.f +++ b/plugins/Selectors_full/zmq.irp.f @@ -14,19 +14,19 @@ subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id, energy, size_energy) write(msg,*) 'put_psi ', worker_id, N_states, N_det, psi_det_size, n_det_generators, n_det_selectors - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,trim(msg),int(len(trim(msg)),8),ZMQ_SNDMORE) if (rc8 /= len(trim(msg))) then print *, 'f77_zmq_send8(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)' stop 'error' endif - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,int(N_int*2_8*N_det*bit_kind,8),ZMQ_SNDMORE) if (rc8 /= N_int*2_8*N_det*bit_kind) then print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)' stop 'error' endif - rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,int(psi_det_size*N_states*8,8),ZMQ_SNDMORE) if (rc8 /= psi_det_size*N_states*8_8) then print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)' stop 'error' diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 9af78b4f..ce8e4b72 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -317,32 +317,33 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) character*(512) :: task integer :: rc + integer*8 :: rc8 double precision :: energy(N_st) energy = 0.d0 task = ' ' write(task,*) 'put_psi ', 1, N_st, N_det, N_det - rc = f77_zmq_send(zmq_to_qp_run_socket,trim(task),len(trim(task)),ZMQ_SNDMORE) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,trim(task),int(len(trim(task)),8),ZMQ_SNDMORE) if (rc /= len(trim(task))) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(task),len(trim(task)),ZMQ_SNDMORE)' + print *, 'f77_zmq_send8(zmq_to_qp_run_socket,trim(task),int(len(trim(task)),8),ZMQ_SNDMORE)' stop 'error' endif - rc = f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,psi_det,int(N_int*2*N_det*bit_kind,8),ZMQ_SNDMORE) if (rc /= N_int*2*N_det*bit_kind) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)' + print *, 'f77_zmq_send8(zmq_to_qp_run_socket,psi_det,int(N_int*2*N_det*bit_kind,8),ZMQ_SNDMORE)' stop 'error' endif - rc = f77_zmq_send(zmq_to_qp_run_socket,u_t,size(u_t)*8,ZMQ_SNDMORE) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,u_t,int(size(u_t)*8,8),ZMQ_SNDMORE) if (rc /= size(u_t)*8) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,u_t,size(u_t)*8,ZMQ_SNDMORE)' + print *, 'f77_zmq_send8(zmq_to_qp_run_socket,u_t,int(size(u_t)*8,8),ZMQ_SNDMORE)' stop 'error' endif - rc = f77_zmq_send(zmq_to_qp_run_socket,energy,N_st*8,0) + rc8 = f77_zmq_send8(zmq_to_qp_run_socket,energy,int(N_st*8,8),0) if (rc /= N_st*8) then - print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)' + print *, 'f77_zmq_send8(zmq_to_qp_run_socket,energy,int(size_energy*8,8),0)' stop 'error' endif @@ -410,3 +411,18 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) enddo end + +BEGIN_PROVIDER [ integer, nthreads_davidson ] + implicit none + BEGIN_DOC + ! Number of threads for Davdison + END_DOC + nthreads_davidson = nproc + character*(32) :: env + call getenv('NTHREADS_DAVIDSON',env) + if (trim(env) /= '') then + read(env,*) nthreads_davidson + endif + call write_int(6,nthreads_davidson,'Number of threads for Diagonalization') +END_PROVIDER + diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 1fbf00e0..c67b1440 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -134,8 +134,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend, ! Prepare the array of all alpha single excitations ! ------------------------------------------------- - PROVIDE N_int - !$OMP PARALLEL DEFAULT(NONE) & + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson) & !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & !$OMP psi_bilinear_matrix_columns, & !$OMP psi_det_alpha_unique, psi_det_beta_unique, &