10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-05 11:00:10 +01:00

Working on davidson

This commit is contained in:
Anthony Scemama 2017-04-19 12:08:17 +02:00
parent 0af043390c
commit dd59338083
3 changed files with 36 additions and 35 deletions

View File

@ -70,8 +70,6 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_
double precision, allocatable :: v_0(:,:), s_0(:,:), u_t(:,:) double precision, allocatable :: v_0(:,:), s_0(:,:), u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0 !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0
allocate(v_0(N_det,N_st), s_0(N_det,N_st),u_t(N_st,N_det))
! Get wave function (u_t) ! Get wave function (u_t)
! ----------------------- ! -----------------------
@ -92,21 +90,27 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_
integer :: N_states_read, N_det_read, psi_det_size_read integer :: N_states_read, N_det_read, psi_det_size_read
integer :: N_det_selectors_read, N_det_generators_read integer :: N_det_selectors_read, N_det_generators_read
double precision :: energy(N_states_diag) double precision :: energy(N_st)
read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read,&
allocate(v_0(sze_8,N_st), s_0(sze_8,N_st),u_t(N_st,N_det))
read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, &
N_det_generators_read, N_det_selectors_read N_det_generators_read, N_det_selectors_read
if (rc /= worker_id) then if (rc /= worker_id) then
print *, 'Wrong worker ID' print *, 'Wrong worker ID'
stop 'error' stop 'error'
endif endif
if (N_states_read /= N_st) then if (N_states_read /= N_st) then
print *, N_st
stop 'error : N_st' stop 'error : N_st'
endif endif
if (N_det_read /= N_det) then if (N_det_read /= N_det) then
N_det = N_det_read N_det = N_det_read
stop 'N_det_read /= N_det'
TOUCH N_det TOUCH N_det
endif endif
@ -123,9 +127,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_
stop 'error' stop 'error'
endif endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_states_diag*8,0) rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)
if (rc /= N_states_diag*8) then if (rc /= N_st*8) then
print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_states_diag*8,0)' print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)'
stop 'error' stop 'error'
endif endif
@ -142,6 +146,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call davidson_push_results(zmq_socket_push, v_0, s_0, task_id) call davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
end do end do
deallocate(v_0, s_0, u_t)
end subroutine end subroutine
@ -189,11 +194,11 @@ subroutine davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
integer :: rc integer :: rc
rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*size(v_0), 0) rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*N_det*N_states_diag, 0)
if(rc /= 8*size(s_0)) stop "davidson_push_results failed to pull v_0" if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull v_0"
rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*size(s_0), 0) rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*N_det*N_states_diag, 0)
if(rc /= 8*size(s_0)) stop "davidson_push_results failed to pull s_0" if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull s_0"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id" if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
@ -209,15 +214,15 @@ end subroutine
subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, LDA) subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze_8, N_st)
use f77_zmq use f77_zmq
implicit none implicit none
integer :: LDA integer, intent(in) :: sze_8, N_st
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
double precision ,intent(inout) :: v0(LDA, N_states_diag) double precision ,intent(inout) :: v0(sze_8, N_st)
double precision ,intent(inout) :: s0(LDA, N_states_diag) double precision ,intent(inout) :: s0(sze_8, N_st)
integer :: more, task_id integer :: more, task_id
@ -226,14 +231,14 @@ subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, LDA)
integer(ZMQ_PTR), external :: new_zmq_pull_socket integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull integer(ZMQ_PTR) :: zmq_socket_pull
allocate(v_0(N_det,N_states_diag), s_0(N_det,N_states_diag)) allocate(v_0(N_det,N_st), s_0(N_det,N_st))
v0 = 0.d0 v0 = 0.d0
s0 = 0.d0 s0 = 0.d0
more = 1 more = 1
zmq_socket_pull = new_zmq_pull_socket() zmq_socket_pull = new_zmq_pull_socket()
do while (more == 1) do while (more == 1)
call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id) call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
do j=1,N_states_diag do j=1,N_st
do i=1,N_det do i=1,N_det
v0(i,j) = v0(i,j) + v_0(i,j) v0(i,j) = v0(i,j) + v_0(i,j)
s0(i,j) = s0(i,j) + s_0(i,j) s0(i,j) = s0(i,j) + s_0(i,j)
@ -292,9 +297,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8)
ASSERT (n>0) ASSERT (n>0)
PROVIDE ref_bitmask_energy nproc PROVIDE ref_bitmask_energy nproc
v_0 = 0.d0
s_0 = 0.d0
call new_parallel_job(zmq_to_qp_run_socket,'davidson') call new_parallel_job(zmq_to_qp_run_socket,'davidson')
character*(512) :: task character*(512) :: task
@ -348,22 +350,15 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8)
enddo enddo
enddo enddo
! istep=2
! imin=1
! imax=N_det
! do ishift=0,istep-1
! write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
! call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task))
! enddo
v_0 = 0.d0 v_0 = 0.d0
s_0 = 0.d0 s_0 = 0.d0
call omp_set_nested(.True.) call omp_set_nested(.True.)
call zmq_set_running(zmq_to_qp_run_socket)
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
if (ithread == 0 ) then if (ithread == 0 ) then
call zmq_set_running(zmq_to_qp_run_socket) call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, sze_8, N_st)
call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, size(v_0,1))
else else
call davidson_slave_inproc(1) call davidson_slave_inproc(1)
endif endif

View File

@ -118,7 +118,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
sze_8 = align_double(sze) sze_8 = align_double(sze)
itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
PROVIDE nuclear_repulsion expected_s2 PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
call write_time(iunit) call write_time(iunit)
call wall_time(wall) call wall_time(wall)
@ -223,7 +223,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
if (distributed_davidson) then if (distributed_davidson) then
call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) call H_S2_u_0_nstates_zmq (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8)
else else
call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8)
endif endif

View File

@ -156,7 +156,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
s_t = 0.d0 s_t = 0.d0
!$OMP DO SCHEDULE(dynamic,64) !$OMP DO SCHEDULE(static,64)
do k_a=istart+ishift,iend,istep do k_a=istart+ishift,iend,istep
krow = psi_bilinear_matrix_rows(k_a) krow = psi_bilinear_matrix_rows(k_a)
@ -206,8 +206,10 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
call get_s2(tmp_det,tmp_det2,N_int,sij) call get_s2(tmp_det,tmp_det2,N_int,sij)
do l=1,N_st do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
!$OMP ATOMIC
s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
enddo enddo
enddo enddo
@ -215,9 +217,9 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
enddo enddo
enddo enddo
!$OMP END DO NOWAIT !$OMP END DO
!$OMP DO SCHEDULE(dynamic,64) !$OMP DO SCHEDULE(static,64)
do k_a=istart+ishift,iend,istep do k_a=istart+ishift,iend,istep
@ -283,6 +285,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
lrow = psi_bilinear_matrix_rows(l_a) lrow = psi_bilinear_matrix_rows(l_a)
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), N_int, hij) call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), N_int, hij)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0 ! same spin => sij = 0
@ -339,6 +342,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij)
l_a = psi_bilinear_matrix_transp_order(l_b) l_a = psi_bilinear_matrix_transp_order(l_b)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0 ! single => sij = 0
@ -354,6 +358,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), N_int, hij) call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), N_int, hij)
l_a = psi_bilinear_matrix_transp_order(l_b) l_a = psi_bilinear_matrix_transp_order(l_b)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0 ! same spin => sij = 0
@ -379,6 +384,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
hij = diag_H_mat_elem(tmp_det,N_int) hij = diag_H_mat_elem(tmp_det,N_int)
sij = diag_S_mat_elem(tmp_det,N_int) sij = diag_S_mat_elem(tmp_det,N_int)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
enddo enddo