9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-07-11 13:43:34 +02:00

Fixed OpenMP bug in selection (slave_large)

This commit is contained in:
Anthony Scemama 2021-12-08 01:47:18 +01:00
parent dc43924aa6
commit 41f9c8ea6b
5 changed files with 31 additions and 33 deletions

View File

@ -31,12 +31,11 @@ subroutine run_pt2_slave(thread,iproc,energy)
double precision, intent(in) :: energy(N_states_diag) double precision, intent(in) :: energy(N_states_diag)
integer, intent(in) :: thread, iproc integer, intent(in) :: thread, iproc
if (N_det > 100000 ) then
call run_pt2_slave_large(thread,iproc,energy) call run_pt2_slave_large(thread,iproc,energy)
! if (N_det > nproc*(elec_alpha_num * (mo_num-elec_alpha_num))**2) then else
! call run_pt2_slave_large(thread,iproc,energy) call run_pt2_slave_small(thread,iproc,energy)
! else endif
! call run_pt2_slave_small(thread,iproc,energy)
! endif
end end
subroutine run_pt2_slave_small(thread,iproc,energy) subroutine run_pt2_slave_small(thread,iproc,energy)
@ -67,7 +66,6 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
double precision, external :: memory_of_double, memory_of_int double precision, external :: memory_of_double, memory_of_int
integer :: bsize ! Size of selection buffers integer :: bsize ! Size of selection buffers
! logical :: sending
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
@ -85,7 +83,6 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
buffer_ready = .False. buffer_ready = .False.
n_tasks = 1 n_tasks = 1
! sending = .False.
done = .False. done = .False.
do while (.not.done) do while (.not.done)
@ -119,14 +116,13 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
do k=1,n_tasks do k=1,n_tasks
call pt2_alloc(pt2_data(k),N_states) call pt2_alloc(pt2_data(k),N_states)
b%cur = 0 b%cur = 0
!double precision :: time2 double precision :: time2
!call wall_time(time2) call wall_time(time2)
call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k))) call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
!call wall_time(time1) call wall_time(time1)
!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1)) ! print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
enddo enddo
call wall_time(time1) call wall_time(time1)
!print *, '-->', i_generator(1), time1-time0, n_tasks
integer, external :: tasks_done_to_taskserver integer, external :: tasks_done_to_taskserver
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
@ -164,6 +160,11 @@ end subroutine
subroutine run_pt2_slave_large(thread,iproc,energy) subroutine run_pt2_slave_large(thread,iproc,energy)
use selection_types use selection_types
use f77_zmq use f77_zmq
BEGIN_DOC
! This subroutine can miss important determinants when the PT2 is completely
! computed. It should be called only for large workloads where the PT2 is
! interrupted before the end
END_DOC
implicit none implicit none
double precision, intent(in) :: energy(N_states_diag) double precision, intent(in) :: energy(N_states_diag)
@ -234,30 +235,28 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
ASSERT (b%N == bsize) ASSERT (b%N == bsize)
endif endif
double precision :: time0, time1
call wall_time(time0)
call pt2_alloc(pt2_data,N_states) call pt2_alloc(pt2_data,N_states)
b%cur = 0 b%cur = 0
call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator)) call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
call wall_time(time1)
integer, external :: tasks_done_to_taskserver integer, external :: tasks_done_to_taskserver
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
done = .true. done = .true.
endif endif
call sort_selection_buffer(b) call sort_selection_buffer(b)
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
call omp_set_lock(global_selection_buffer_lock) call omp_set_lock(global_selection_buffer_lock)
global_selection_buffer%mini = b%mini global_selection_buffer%mini = b%mini
call merge_selection_buffers(b,global_selection_buffer) call merge_selection_buffers(b,global_selection_buffer)
b%cur=0 b%cur=0
call omp_unset_lock(global_selection_buffer_lock) call omp_unset_lock(global_selection_buffer_lock)
if ( iproc == 1 ) then if ( iproc == 1 .or. i_generator < 100 .or. done) then
call omp_set_lock(global_selection_buffer_lock) call omp_set_lock(global_selection_buffer_lock)
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
global_selection_buffer%cur = 0 global_selection_buffer%cur = 0
call omp_unset_lock(global_selection_buffer_lock) call omp_unset_lock(global_selection_buffer_lock)
else else
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending)
endif endif

View File

@ -87,8 +87,8 @@ subroutine merge_selection_buffers(b1, b2)
double precision :: rss double precision :: rss
double precision, external :: memory_of_double double precision, external :: memory_of_double
sze = max(size(b1%val), size(b2%val)) sze = max(size(b1%val), size(b2%val))
rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze) ! rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze)
call check_mem(rss,irp_here) ! call check_mem(rss,irp_here)
allocate(val(sze), detmp(N_int, 2, sze)) allocate(val(sze), detmp(N_int, 2, sze))
i1=1 i1=1
i2=1 i2=1

View File

@ -330,12 +330,12 @@ BEGIN_PROVIDER [ double precision, c0_weight, (N_states) ]
c = maxval(psi_coef(:,i) * psi_coef(:,i)) c = maxval(psi_coef(:,i) * psi_coef(:,i))
c0_weight(i) = 1.d0/(c+1.d-20) c0_weight(i) = 1.d0/(c+1.d-20)
enddo enddo
c = 1.d0/minval(c0_weight(:)) c = 1.d0/sum(c0_weight(:))
do i=1,N_states do i=1,N_states
c0_weight(i) = c0_weight(i) * c c0_weight(i) = c0_weight(i) * c
enddo enddo
else else
c0_weight = 1.d0 c0_weight(:) = 1.d0
endif endif
END_PROVIDER END_PROVIDER
@ -352,7 +352,7 @@ BEGIN_PROVIDER [ double precision, state_average_weight, (N_states) ]
if (weight_one_e_dm == 0) then if (weight_one_e_dm == 0) then
state_average_weight(:) = c0_weight(:) state_average_weight(:) = c0_weight(:)
else if (weight_one_e_dm == 1) then else if (weight_one_e_dm == 1) then
state_average_weight(:) = 1./N_states state_average_weight(:) = 1.d0/N_states
else else
call ezfio_has_determinants_state_average_weight(exists) call ezfio_has_determinants_state_average_weight(exists)
if (exists) then if (exists) then

View File

@ -84,7 +84,6 @@ BEGIN_PROVIDER [ integer, psi_det_size ]
else else
psi_det_size = 1 psi_det_size = 1
endif endif
psi_det_size = max(psi_det_size,100000)
call write_int(6,psi_det_size,'Dimension of the psi arrays') call write_int(6,psi_det_size,'Dimension of the psi arrays')
endif endif
IRP_IF MPI_DEBUG IRP_IF MPI_DEBUG