10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-12 22:18:31 +01:00

Optimized Davidson

This commit is contained in:
Anthony Scemama 2018-10-04 00:29:30 +02:00
parent c688c057a8
commit 152ba01c17
5 changed files with 96 additions and 66 deletions

View File

@ -140,6 +140,16 @@ subroutine run_wf
call write_double(6,(t1-t0),'Broadcast time') call write_double(6,(t1-t0),'Broadcast time')
endif endif
call wall_time(t0)
if (.True.) then
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order
endif
call wall_time(t1)
call write_double(6,(t1-t0),'Sort time')
call omp_set_nested(.True.) call omp_set_nested(.True.)
call davidson_slave_tcp(0) call davidson_slave_tcp(0)
call omp_set_nested(.False.) call omp_set_nested(.False.)

View File

@ -84,6 +84,11 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
integer, external :: zmq_get_dvector integer, external :: zmq_get_dvector
integer, external :: zmq_get_dmatrix integer, external :: zmq_get_dmatrix
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
allocate(u_t(N_st,N_det)) allocate(u_t(N_st,N_det))
allocate (energy(N_st)) allocate (energy(N_st))
@ -96,11 +101,11 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
ni = 8388608 ni = 8388608
nj = int(size(u_t,kind=8)/8388608_8,4) + 1 nj = int(size(u_t,kind=8)/8388608_8,4) + 1
endif endif
if (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
print *, irp_here, ': Unable to get u_t' do while (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1)
deallocate(u_t,energy) call sleep(1)
return print *, irp_here, ': waiting for u_t...'
endif enddo
if (zmq_get_dvector(zmq_to_qp_run_socket, worker_id, 'energy', energy, size(energy)) == -1) then if (zmq_get_dvector(zmq_to_qp_run_socket, worker_id, 'energy', energy, size(energy)) == -1) then
print *, irp_here, ': Unable to get energy' print *, irp_here, ': Unable to get energy'
@ -293,66 +298,19 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
integer :: ithread integer :: ithread
double precision, allocatable :: u_t(:,:) double precision, allocatable :: u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
ASSERT (N_st == N_states_diag)
ASSERT (sze >= N_det)
call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson') call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson')
character*(512) :: task
integer :: rc, ni, nj
integer*8 :: rc8
double precision :: energy(N_st)
integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
integer, external :: zmq_put_dmatrix
energy = 0.d0
if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_states_diag on ZMQ server'
endif
if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
stop 'Unable to put psi on ZMQ server' stop 'Unable to put psi on ZMQ server'
endif endif
if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_states_diag on ZMQ server'
endif
energy = 0.d0
if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then
stop 'Unable to put energy on ZMQ server' stop 'Unable to put energy on ZMQ server'
endif endif
if (size(u_t) < 8388608) then
ni = size(u_t)
nj = 1
else
ni = 8388608
nj = size(u_t)/8388608 + 1
endif
! Warning : dimensions are modified for efficiency, It is OK since we get the
! full matrix
if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
stop 'Unable to put u_t on ZMQ server'
endif
deallocate(u_t)
! Create tasks ! Create tasks
! ============ ! ============
@ -390,14 +348,61 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
endif endif
v_0 = 0.d0
s_0 = 0.d0
integer, external :: zmq_set_running integer, external :: zmq_set_running
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Failed in zmq_set_running' print *, irp_here, ': Failed in zmq_set_running'
endif endif
if (.True.) then
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
endif
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
ASSERT (N_st == N_states_diag)
ASSERT (sze >= N_det)
character*(512) :: task
integer :: rc, ni, nj
integer*8 :: rc8
double precision :: energy(N_st)
integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
integer, external :: zmq_put_dmatrix
if (size(u_t) < 8388608) then
ni = size(u_t)
nj = 1
else
ni = 8388608
nj = size(u_t)/8388608 + 1
endif
! Warning : dimensions are modified for efficiency, It is OK since we get the
! full matrix
if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
stop 'Unable to put u_t on ZMQ server'
endif
deallocate(u_t)
v_0 = 0.d0
s_0 = 0.d0
call omp_set_nested(.True.) call omp_set_nested(.True.)
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()

View File

@ -2,7 +2,7 @@ program print_energy
implicit none implicit none
read_wf = .true. read_wf = .true.
touch read_wf touch read_wf
provide mo_bielec_integrals_in_map provide mo_bielec_integrals_in_map psi_coef psi_det psi_bilinear_matrix_transp_values
double precision :: time1, time0 double precision :: time1, time0
call wall_time(time0) call wall_time(time0)
call routine call routine

View File

@ -519,7 +519,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_
do k=1,N_det do k=1,N_det
psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l) psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l)
enddo enddo
!$OMP ENDDO !$OMP ENDDO NOWAIT
enddo enddo
!$OMP DO !$OMP DO
do k=1,N_det do k=1,N_det

View File

@ -456,16 +456,16 @@ BEGIN_TEMPLATE
iorder(i) = iorder1(1_$int_type+i1-i) iorder(i) = iorder1(1_$int_type+i1-i)
enddo enddo
endif endif
deallocate(x1,iorder1,stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
stop
endif
if (i2>1_$int_type) then if (i2>1_$int_type) then
call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
endif endif
deallocate(x1,iorder1,stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
stop
endif
return return
else if (iradix == -2) then ! Positive else if (iradix == -2) then ! Positive
@ -526,14 +526,24 @@ BEGIN_TEMPLATE
endif endif
!$OMP PARALLEL DEFAULT(SHARED) if (isize > 1000000)
!$OMP SINGLE
if (i3>1_$int_type) then if (i3>1_$int_type) then
!$OMP TASK FIRSTPRIVATE(iradix_new,i3) SHARED(x,iorder) if(i3 > 1000000)
call $Xradix_sort$big(x,iorder,i3,iradix_new-1) call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
!$OMP END TASK
endif endif
if (isize-i3>1_$int_type) then if (isize-i3>1_$int_type) then
!$OMP TASK FIRSTPRIVATE(iradix_new,i3) SHARED(x,iorder) if(isize-i3 > 1000000)
call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
!$OMP END TASK
endif endif
!$OMP TASKWAIT
!$OMP END SINGLE
!$OMP END PARALLEL
return return
endif endif
@ -588,11 +598,16 @@ BEGIN_TEMPLATE
if (i1>1_$int_type) then if (i1>1_$int_type) then
!$OMP TASK FIRSTPRIVATE(i0,iradix,i1) SHARED(x,iorder) if(i1 >1000000)
call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
!$OMP END TASK
endif endif
if (i0>1) then if (i0>1) then
!$OMP TASK FIRSTPRIVATE(i0,iradix) SHARED(x,iorder) if(i0 >1000000)
call $Xradix_sort$big(x,iorder,i0,iradix-1) call $Xradix_sort$big(x,iorder,i0,iradix-1)
!$OMP END TASK
endif endif
!$OMP TASKWAIT
end end