From cbf8c54d707dac50d54c7ecc5f9262df1b79f53c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Apr 2017 19:45:18 +0200 Subject: [PATCH] Parallelization of Davidson --- src/Davidson/davidson_parallel.irp.f | 38 +++++++++++++++++++++---- src/Davidson/u0Hu0.irp.f | 7 ++--- src/Determinants/spindeterminants.irp.f | 2 +- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 402cc561..6c16a1fa 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -271,6 +271,11 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) double precision, allocatable :: u_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique + PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc + PROVIDE ref_bitmask_energy nproc + + allocate(u_t(N_st,N_det)) do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) @@ -290,7 +295,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) ASSERT (Nint > 0) ASSERT (Nint == N_int) ASSERT (n>0) - PROVIDE ref_bitmask_energy nproc call new_parallel_job(zmq_to_qp_run_socket,'davidson') @@ -335,15 +339,39 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) deallocate(u_t) + ! Create tasks + ! ============ + integer :: istep, imin, imax, ishift - istep=2 - do imin=1,N_det, 1048576 + double precision :: w, max_workload, N_det_inv, di + max_workload = N_det_beta_unique+N_det_alpha_unique + w = 0.d0 + istep=4 + ishift=0 + imin=1 + N_det_inv = 1.d0/dble(N_det) + di = dble(N_det) + do imax=1,N_det + di = di-1.d0 + w = w + (di*N_det_inv)**2 + if (w > max_workload) then + do ishift=0,istep-1 + write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|' + call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) + enddo + istep = max(istep-1,1) + imin = imax+1 + w = 0.d0 + endif + enddo + if (w > 0.d0) then + imax = N_det do ishift=0,istep-1 - imax = min(N_det, imin+1048576-1) write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|' call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) enddo - enddo + endif + v_0 = 0.d0 s_0 = 0.d0 diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index cc1ba224..4f68f85a 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -86,7 +86,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,istart,iend,ishift, double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) - PROVIDE ref_bitmask_energy + PROVIDE ref_bitmask_energy N_int select case (N_int) case (1) @@ -136,7 +136,6 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend, integer*8 :: k8 double precision, allocatable :: v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t - PROVIDE N_int maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 allocate(idx0(maxab)) @@ -148,6 +147,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend, ! Prepare the array of all alpha single excitations ! ------------------------------------------------- + PROVIDE N_int !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & !$OMP psi_bilinear_matrix_columns, & @@ -157,9 +157,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend, !$OMP psi_bilinear_matrix_transp_columns, & !$OMP psi_bilinear_matrix_transp_order, N_st, & !$OMP psi_bilinear_matrix_order_transp_reverse, & - !$OMP singles_alpha_csc, singles_alpha_csc_idx, & !$OMP psi_bilinear_matrix_columns_loc, & - !$OMP singles_alpha_size, istart, iend, istep, & + !$OMP istart, iend, istep, & !$OMP ishift, idx0, u_t, maxab, v_0, s_0) & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP lcol, lrow, l_a, l_b, nmax, & diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 03ae031c..aa7fde29 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -541,7 +541,7 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_uniq psi_bilinear_matrix_transp_rows_loc(l) = k endif enddo - psi_bilinear_matrix_transp_rows_loc(N_det_beta_unique+1) = N_det+1 + psi_bilinear_matrix_transp_rows_loc(N_det_alpha_unique+1) = N_det+1 END_PROVIDER BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ]