From dd5933808341cbeeddb656c99d610db5fbcab17c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 19 Apr 2017 12:08:17 +0200 Subject: [PATCH] Working on davidson --- src/Davidson/davidson_parallel.irp.f | 53 ++++++++++++-------------- src/Davidson/diagonalization_hs2.irp.f | 4 +- src/Davidson/u0Hu0.irp.f | 14 +++++-- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f index 08f90639..aa761a38 100644 --- a/src/Davidson/davidson_parallel.irp.f +++ b/src/Davidson/davidson_parallel.irp.f @@ -70,8 +70,6 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_ double precision, allocatable :: v_0(:,:), s_0(:,:), u_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0 - allocate(v_0(N_det,N_st), s_0(N_det,N_st),u_t(N_st,N_det)) - ! Get wave function (u_t) ! ----------------------- @@ -92,21 +90,27 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_ integer :: N_states_read, N_det_read, psi_det_size_read integer :: N_det_selectors_read, N_det_generators_read - double precision :: energy(N_states_diag) + double precision :: energy(N_st) - read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read,& + + allocate(v_0(sze_8,N_st), s_0(sze_8,N_st),u_t(N_st,N_det)) + + read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, & N_det_generators_read, N_det_selectors_read + if (rc /= worker_id) then print *, 'Wrong worker ID' stop 'error' endif if (N_states_read /= N_st) then + print *, N_st stop 'error : N_st' endif if (N_det_read /= N_det) then N_det = N_det_read + stop 'N_det_read /= N_det' TOUCH N_det endif @@ -123,9 +127,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_ stop 'error' endif - rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_states_diag*8,0) - if (rc /= N_states_diag*8) then - print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_states_diag*8,0)' + rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0) + if (rc /= N_st*8) then + print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)' stop 'error' endif @@ -142,6 +146,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_ call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call davidson_push_results(zmq_socket_push, v_0, s_0, task_id) end do + deallocate(v_0, s_0, u_t) end subroutine @@ -189,11 +194,11 @@ subroutine davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id) integer :: rc - rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*size(v_0), 0) - if(rc /= 8*size(s_0)) stop "davidson_push_results failed to pull v_0" + rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*N_det*N_states_diag, 0) + if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull v_0" - rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*size(s_0), 0) - if(rc /= 8*size(s_0)) stop "davidson_push_results failed to pull s_0" + rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*N_det*N_states_diag, 0) + if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull s_0" rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) if(rc /= 4) stop "davidson_pull_results failed to pull task_id" @@ -209,15 +214,15 @@ end subroutine -subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, LDA) +subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze_8, N_st) use f77_zmq implicit none - integer :: LDA + integer, intent(in) :: sze_8, N_st integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket - double precision ,intent(inout) :: v0(LDA, N_states_diag) - double precision ,intent(inout) :: s0(LDA, N_states_diag) + double precision ,intent(inout) :: v0(sze_8, N_st) + double precision ,intent(inout) :: s0(sze_8, N_st) integer :: more, task_id @@ -226,14 +231,14 @@ subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, LDA) integer(ZMQ_PTR), external :: new_zmq_pull_socket integer(ZMQ_PTR) :: zmq_socket_pull - allocate(v_0(N_det,N_states_diag), s_0(N_det,N_states_diag)) + allocate(v_0(N_det,N_st), s_0(N_det,N_st)) v0 = 0.d0 s0 = 0.d0 more = 1 zmq_socket_pull = new_zmq_pull_socket() do while (more == 1) call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id) - do j=1,N_states_diag + do j=1,N_st do i=1,N_det v0(i,j) = v0(i,j) + v_0(i,j) s0(i,j) = s0(i,j) + s_0(i,j) @@ -292,9 +297,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8) ASSERT (n>0) PROVIDE ref_bitmask_energy nproc - v_0 = 0.d0 - s_0 = 0.d0 - call new_parallel_job(zmq_to_qp_run_socket,'davidson') character*(512) :: task @@ -348,22 +350,15 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8) enddo enddo -! istep=2 -! imin=1 -! imax=N_det -! do ishift=0,istep-1 -! write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|' -! call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) -! enddo v_0 = 0.d0 s_0 = 0.d0 call omp_set_nested(.True.) + call zmq_set_running(zmq_to_qp_run_socket) !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) ithread = omp_get_thread_num() if (ithread == 0 ) then - call zmq_set_running(zmq_to_qp_run_socket) - call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, size(v_0,1)) + call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, sze_8, N_st) else call davidson_slave_inproc(1) endif diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index d68d8a68..2a8272da 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -118,7 +118,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ sze_8 = align_double(sze) itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) - PROVIDE nuclear_repulsion expected_s2 + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse call write_time(iunit) call wall_time(wall) @@ -223,7 +223,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if (distributed_davidson) then - call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) + call H_S2_u_0_nstates_zmq (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) else call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) endif diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 6cb7de45..14b32da8 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -156,7 +156,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif s_t = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(static,64) do k_a=istart+ishift,iend,istep krow = psi_bilinear_matrix_rows(k_a) @@ -206,8 +206,10 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif call get_s2(tmp_det,tmp_det2,N_int,sij) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) - v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) + !$OMP ATOMIC + v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) + !$OMP ATOMIC s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) enddo enddo @@ -215,9 +217,9 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(static,64) do k_a=istart+ishift,iend,istep @@ -283,6 +285,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif lrow = psi_bilinear_matrix_rows(l_a) call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), N_int, hij) do l=1,N_st + !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! same spin => sij = 0 @@ -339,6 +342,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij) l_a = psi_bilinear_matrix_transp_order(l_b) do l=1,N_st + !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! single => sij = 0 @@ -354,6 +358,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), N_int, hij) l_a = psi_bilinear_matrix_transp_order(l_b) do l=1,N_st + !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! same spin => sij = 0 @@ -379,6 +384,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif hij = diag_H_mat_elem(tmp_det,N_int) sij = diag_S_mat_elem(tmp_det,N_int) do l=1,N_st + !$OMP ATOMIC v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) enddo