10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-12 05:58:24 +01:00

Optimized Davidson

This commit is contained in:
Anthony Scemama 2018-10-04 00:29:30 +02:00
parent c688c057a8
commit 152ba01c17
5 changed files with 96 additions and 66 deletions

View File

@ -140,6 +140,16 @@ subroutine run_wf
call write_double(6,(t1-t0),'Broadcast time')
endif
call wall_time(t0)
if (.True.) then
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order
endif
call wall_time(t1)
call write_double(6,(t1-t0),'Sort time')
call omp_set_nested(.True.)
call davidson_slave_tcp(0)
call omp_set_nested(.False.)

View File

@ -84,6 +84,11 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
integer, external :: zmq_get_dvector
integer, external :: zmq_get_dmatrix
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
allocate(u_t(N_st,N_det))
allocate (energy(N_st))
@ -96,11 +101,11 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
ni = 8388608
nj = int(size(u_t,kind=8)/8388608_8,4) + 1
endif
if (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
print *, irp_here, ': Unable to get u_t'
deallocate(u_t,energy)
return
endif
do while (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1)
call sleep(1)
print *, irp_here, ': waiting for u_t...'
enddo
if (zmq_get_dvector(zmq_to_qp_run_socket, worker_id, 'energy', energy, size(energy)) == -1) then
print *, irp_here, ': Unable to get energy'
@ -293,66 +298,19 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
integer :: ithread
double precision, allocatable :: u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
ASSERT (N_st == N_states_diag)
ASSERT (sze >= N_det)
call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson')
character*(512) :: task
integer :: rc, ni, nj
integer*8 :: rc8
double precision :: energy(N_st)
integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
integer, external :: zmq_put_dmatrix
energy = 0.d0
if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_states_diag on ZMQ server'
endif
if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
stop 'Unable to put psi on ZMQ server'
endif
if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_states_diag on ZMQ server'
endif
energy = 0.d0
if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then
stop 'Unable to put energy on ZMQ server'
endif
if (size(u_t) < 8388608) then
ni = size(u_t)
nj = 1
else
ni = 8388608
nj = size(u_t)/8388608 + 1
endif
! Warning : dimensions are modified for efficiency, It is OK since we get the
! full matrix
if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
stop 'Unable to put u_t on ZMQ server'
endif
deallocate(u_t)
! Create tasks
! ============
@ -390,14 +348,61 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
endif
v_0 = 0.d0
s_0 = 0.d0
integer, external :: zmq_set_running
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Failed in zmq_set_running'
endif
if (.True.) then
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
endif
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
ASSERT (N_st == N_states_diag)
ASSERT (sze >= N_det)
character*(512) :: task
integer :: rc, ni, nj
integer*8 :: rc8
double precision :: energy(N_st)
integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
integer, external :: zmq_put_dmatrix
if (size(u_t) < 8388608) then
ni = size(u_t)
nj = 1
else
ni = 8388608
nj = size(u_t)/8388608 + 1
endif
! Warning : dimensions are modified for efficiency, It is OK since we get the
! full matrix
if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
stop 'Unable to put u_t on ZMQ server'
endif
deallocate(u_t)
v_0 = 0.d0
s_0 = 0.d0
call omp_set_nested(.True.)
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num()

View File

@ -2,7 +2,7 @@ program print_energy
implicit none
read_wf = .true.
touch read_wf
provide mo_bielec_integrals_in_map
provide mo_bielec_integrals_in_map psi_coef psi_det psi_bilinear_matrix_transp_values
double precision :: time1, time0
call wall_time(time0)
call routine

View File

@ -519,7 +519,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_
do k=1,N_det
psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l)
enddo
!$OMP ENDDO
!$OMP ENDDO NOWAIT
enddo
!$OMP DO
do k=1,N_det

View File

@ -456,16 +456,16 @@ BEGIN_TEMPLATE
iorder(i) = iorder1(1_$int_type+i1-i)
enddo
endif
deallocate(x1,iorder1,stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
stop
endif
if (i2>1_$int_type) then
call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
endif
deallocate(x1,iorder1,stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
stop
endif
return
else if (iradix == -2) then ! Positive
@ -526,14 +526,24 @@ BEGIN_TEMPLATE
endif
!$OMP PARALLEL DEFAULT(SHARED) if (isize > 1000000)
!$OMP SINGLE
if (i3>1_$int_type) then
!$OMP TASK FIRSTPRIVATE(iradix_new,i3) SHARED(x,iorder) if(i3 > 1000000)
call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
!$OMP END TASK
endif
if (isize-i3>1_$int_type) then
!$OMP TASK FIRSTPRIVATE(iradix_new,i3) SHARED(x,iorder) if(isize-i3 > 1000000)
call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
!$OMP END TASK
endif
!$OMP TASKWAIT
!$OMP END SINGLE
!$OMP END PARALLEL
return
endif
@ -588,11 +598,16 @@ BEGIN_TEMPLATE
if (i1>1_$int_type) then
!$OMP TASK FIRSTPRIVATE(i0,iradix,i1) SHARED(x,iorder) if(i1 >1000000)
call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
!$OMP END TASK
endif
if (i0>1) then
!$OMP TASK FIRSTPRIVATE(i0,iradix) SHARED(x,iorder) if(i0 >1000000)
call $Xradix_sort$big(x,iorder,i0,iradix-1)
!$OMP END TASK
endif
!$OMP TASKWAIT
end