10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-06-26 07:02:14 +02:00

Debugging

This commit is contained in:
Anthony Scemama 2017-11-26 11:09:03 +01:00
parent 9a0654c17d
commit d6ac6e6e73
16 changed files with 190 additions and 97 deletions

View File

@ -30,8 +30,9 @@ OPENMP : 1 ; Append OpenMP flags
# -ip : Inter-procedural optimizations
# -ftz : Flushes denormal results to zero
#
#FCFLAGS : -xAVX -O2 -ip -ftz -g -traceback
[OPT]
FCFLAGS : -xAVX -O2 -ip -ftz -g -traceback
FCFLAGS : -xAVX -O2 -ip -ftz -g -traceback -C -fpe0
# Profiling flags
#################

View File

@ -1,35 +1,3 @@
BEGIN_TEMPLATE
subroutine broadcast_chunks_$double(A, LDA)
implicit none
integer, intent(in) :: LDA
$type, intent(inout) :: A(LDA)
use bitmasks
include 'mpif.h'
BEGIN_DOC
! Broadcast with chunks of ~2GB
END_DOC
integer :: i, sze, ierr
do i=1,LDA,200000000/$8
sze = min(LDA-i+1, 200000000/$8)
call MPI_BCAST (A(i), sze, MPI_$DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast chuks $double ', i
stop -1
endif
enddo
end
SUBST [ double, type, 8, DOUBLE_PRECISION ]
double ; double precision ; 8 ; DOUBLE_PRECISION ;;
bit_kind ; integer(bit_kind) ; bit_kind_size ; BIT_KIND ;;
integer ; integer ; 4 ; INTEGER4 ;;
integer8 ; integer*8 ; 8 ; INTEGER8 ;;
END_TEMPLATE
subroutine mpi_bcast_psi(energy, size_energy)
implicit none
BEGIN_DOC

View File

@ -20,7 +20,7 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
double precision, allocatable :: pt2_detail(:,:), comb(:)
logical, allocatable :: computed(:)
integer, allocatable :: tbc(:)
integer :: i, j, k, Ncomb, generator_per_task, i_generator_end
integer :: i, j, k, Ncomb, i_generator_end
integer, external :: pt2_find
double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
@ -56,7 +56,6 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
end do
pt2_detail = 0d0
generator_per_task = 1
print *, '========== ================= ================= ================='
print *, ' Samples Energy Stat. Error Seconds '
print *, '========== ================= ================= ================='
@ -192,7 +191,7 @@ subroutine pt2_collector(E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove,
integer, allocatable :: task_id(:)
integer :: Nindex
integer, allocatable :: index(:)
double precision, save :: time0 = -1.d0
double precision :: time0
double precision :: time, timeLast, Nabove_old
double precision, external :: omp_get_wtime
integer :: tooth, firstTBDcomb, orgTBDcomb
@ -227,9 +226,7 @@ subroutine pt2_collector(E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove,
zmq_socket_pull = new_zmq_pull_socket()
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(1))
more = 1
if (time0 < 0.d0) then
call wall_time(time0)
endif
call wall_time(time0)
timeLast = time0
call get_first_tooth(actually_computed, tooth)

View File

@ -24,6 +24,7 @@ subroutine run_pt2_slave(thread,iproc,energy)
double precision,allocatable :: pt2_detail(:,:)
integer :: index
integer :: Nindex
logical :: buffer_ready
allocate(pt2_detail(N_states, N_det_generators))
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
@ -40,6 +41,7 @@ subroutine run_pt2_slave(thread,iproc,energy)
Nindex=1
pt2 = 0d0
pt2_detail = 0d0
buffer_ready = .False.
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task)
@ -53,6 +55,7 @@ subroutine run_pt2_slave(thread,iproc,energy)
if(buf%N == 0) then
! Only first time
call create_selection_buffer(1, 2, buf)
buffer_ready = .True.
end if
do i_i_generator=1, Nindex
i_generator = index
@ -81,7 +84,9 @@ subroutine run_pt2_slave(thread,iproc,energy)
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
call delete_selection_buffer(buf)
if (buffer_ready) then
call delete_selection_buffer(buf)
endif
end subroutine

View File

@ -18,7 +18,7 @@ subroutine run_selection_slave(thread,iproc,energy)
integer(ZMQ_PTR) :: zmq_socket_push
type(selection_buffer) :: buf, buf2
logical :: done
logical :: done, buffer_ready
double precision :: pt2(N_states)
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
@ -35,6 +35,7 @@ subroutine run_selection_slave(thread,iproc,energy)
return
end if
buf%N = 0
buffer_ready = .False.
ctask = 1
pt2(:) = 0d0
@ -50,6 +51,7 @@ subroutine run_selection_slave(thread,iproc,energy)
! Only first time
call create_selection_buffer(N, N*2, buf)
call create_selection_buffer(N, N*2, buf2)
buffer_ready = .True.
else
ASSERT (N == buf%N)
end if
@ -77,7 +79,7 @@ subroutine run_selection_slave(thread,iproc,energy)
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
if (buf%N > 0) then
if (buffer_ready) then
call delete_selection_buffer(buf)
call delete_selection_buffer(buf2)
endif

View File

@ -1,11 +1,13 @@
subroutine create_selection_buffer(N, siz, res)
subroutine create_selection_buffer(N, siz_, res)
use selection_types
implicit none
integer, intent(in) :: N, siz
integer, intent(in) :: N, siz_
type(selection_buffer), intent(out) :: res
integer :: siz
siz = max(siz_,1)
allocate(res%det(N_int, 2, siz), res%val(siz))
res%val(:) = 0d0
@ -97,6 +99,10 @@ subroutine merge_selection_buffers(b1, b2)
endif
enddo
deallocate(b2%det, b2%val)
do i=nmwen+1,b2%N
val(i) = 0.d0
detmp(1:N_int,1:2,i) = 0_bit_kind
enddo
b2%det => detmp
b2%val => val
b2%mini = min(b2%mini,b2%val(b2%N))

View File

@ -20,8 +20,9 @@ end
subroutine run_wf
use f77_zmq
implicit none
include 'mpif.h'
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
@ -62,6 +63,12 @@ subroutine run_wf
call run_selection_slave(0,i,energy)
!$OMP END PARALLEL
print *, 'Selection done'
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
else if (trim(zmq_state) == 'davidson') then
@ -72,11 +79,23 @@ subroutine run_wf
if (mpi_master) then
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
endif
double precision :: t0, t1
call wall_time(t0)
call mpi_bcast_psi(energy,N_states)
call wall_time(t1)
call write_double(6,(t1-t0),'Broadcast time')
call omp_set_nested(.True.)
call davidson_slave_tcp(0)
call omp_set_nested(.False.)
print *, 'Davidson done'
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
else if (trim(zmq_state) == 'pt2') then
@ -96,6 +115,12 @@ subroutine run_wf
call run_pt2_slave(0,i,energy,lstop)
!$OMP END PARALLEL
print *, 'PT2 done'
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
endif

View File

@ -46,6 +46,9 @@ subroutine ZMQ_selection(N_in, pt2)
endif
call zmq_set_running(zmq_to_qp_run_socket)
ASSERT (allocated(b%det))
ASSERT (allocated(b%val))
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1)
i = omp_get_thread_num()
if (i==0) then

View File

@ -1 +1 @@
Determinants Utils Bitmask
Utils

View File

@ -52,19 +52,21 @@ END_PROVIDER
print *, 'ierr = ', ierr
stop 'Unable to get MPI rank'
endif
call write_int(6,mpi_rank,'MPI rank')
call MPI_COMM_SIZE (MPI_COMM_WORLD, mpi_size, ierr)
if (ierr /= MPI_SUCCESS) then
print *, 'ierr = ', ierr
stop 'Unable to get MPI size'
endif
call write_int(6,mpi_size,'MPI size')
IRP_ELSE
mpi_rank = 0
mpi_size = 1
IRP_ENDIF
if (mpi_size > 1) then
call write_int(6,mpi_rank,'MPI rank')
call write_int(6,mpi_size,'MPI size')
endif
ASSERT (mpi_rank >= 0)
ASSERT (mpi_rank < mpi_size)
@ -80,3 +82,35 @@ BEGIN_PROVIDER [ logical, mpi_master ]
END_PROVIDER
BEGIN_TEMPLATE
subroutine broadcast_chunks_$double(A, LDA)
implicit none
integer, intent(in) :: LDA
$type, intent(inout) :: A(LDA)
use bitmasks
include 'mpif.h'
BEGIN_DOC
! Broadcast with chunks of ~2GB
END_DOC
integer :: i, sze, ierr
do i=1,LDA,200000000/$8
sze = min(LDA-i+1, 200000000/$8)
call MPI_BCAST (A(i), sze, MPI_$DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast chuks $double ', i
stop -1
endif
enddo
end
SUBST [ double, type, 8, DOUBLE_PRECISION ]
double ; double precision ; 8 ; DOUBLE_PRECISION ;;
bit_kind ; integer(bit_kind) ; bit_kind_size ; BIT_KIND ;;
integer ; integer ; 4 ; INTEGER4 ;;
integer8 ; integer*8 ; 8 ; INTEGER8 ;;
END_TEMPLATE

View File

@ -32,8 +32,6 @@ subroutine davidson_run_slave(thread,iproc)
integer(ZMQ_PTR), external :: new_zmq_push_socket
integer(ZMQ_PTR) :: zmq_socket_push
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_push = new_zmq_push_socket(thread)
call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
@ -74,56 +72,93 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze,
integer*8 :: rc8
integer :: N_states_read, N_det_read, psi_det_size_read
integer :: N_det_selectors_read, N_det_generators_read
double precision :: energy(N_st)
double precision, allocatable :: energy(:)
write(msg, *) 'get_psi ', worker_id
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
if (rc /= len(trim(msg))) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
stop 'error'
allocate(u_t(N_st,N_det))
if (mpi_master) then
write(msg, *) 'get_psi ', worker_id
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
if (rc /= len(trim(msg))) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
if (msg(1:13) /= 'get_psi_reply') then
print *, rc, trim(msg)
print *, 'Error in get_psi_reply'
stop 'error'
endif
read(msg(14:rc),*) N_states_read, N_det_read, psi_det_size_read, &
N_det_generators_read, N_det_selectors_read
if (N_states_read /= N_st) then
print *, N_st
stop 'error : N_st'
endif
if (N_det_read /= N_det) then
print *, N_det
stop 'N_det /= N_det_read'
endif
rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det_read*bit_kind,0)
if (rc8 /= N_int*2_8*N_det_read*bit_kind) then
print *, 'f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det_read*bit_kind,0)'
stop 'error'
endif
rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,u_t,size(u_t)*8_8,0)
if (rc8 /= size(u_t)*8_8) then
print *, rc, size(u_t)*8
print *, 'f77_zmq_recv8(zmq_to_qp_run_socket,u_t,size(u_t)*8_8,0)'
stop 'error'
endif
allocate (energy(N_st))
rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)
if (rc /= N_st*8) then
print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)'
stop 'error'
endif
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
if (msg(1:13) /= 'get_psi_reply') then
print *, rc, trim(msg)
print *, 'Error in get_psi_reply'
stop 'error'
endif
IRP_IF MPI
include 'mpif.h'
integer :: ierr
read(msg(14:rc),*) N_states_read, N_det_read, psi_det_size_read, &
N_det_generators_read, N_det_selectors_read
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
print *, mpi_rank, size(u_t)
call sleep(1)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast N_st'
stop -1
endif
call broadcast_chunks_double(u_t,size(u_t))
if (N_states_read /= N_st) then
print *, N_st
stop 'error : N_st'
endif
call MPI_BCAST (N_st, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast N_st'
stop -1
endif
if (N_det_read /= N_det) then
N_det = N_det_read
TOUCH N_det
endif
if (.not.mpi_master) then
allocate (energy(N_st))
endif
allocate(u_t(N_st,N_det_read))
call MPI_BCAST (energy, N_st, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here//': Unable to broadcast energy'
stop -1
endif
rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det_read*bit_kind,0)
if (rc8 /= N_int*2_8*N_det_read*bit_kind) then
print *, 'f77_zmq_recv8(zmq_to_qp_run_socket,psi_det,N_int*2_8*N_det_read*bit_kind,0)'
stop 'error'
endif
rc8 = f77_zmq_recv8(zmq_to_qp_run_socket,u_t,size(u_t)*8_8,0)
if (rc8 /= size(u_t)*8_8) then
print *, rc, size(u_t)*8
print *, 'f77_zmq_recv8(zmq_to_qp_run_socket,u_t,size(u_t)*8_8,0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)
if (rc /= N_st*8) then
print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)'
stop 'error'
endif
IRP_ENDIF
! Run tasks
! ---------
@ -297,6 +332,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
allocate(u_t(N_st,N_det))

View File

@ -472,8 +472,14 @@ subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
allocate (v_0(sze,N_st),s_0(sze,N_st))
call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
endif
double precision :: norm
do i=1,N_st
e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
norm = u_dot_u(u_0(1,i),n)
if (norm /= 0.d0) then
e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)
else
e_0(i) = 0.d0
endif
enddo
deallocate (s_0, v_0)
end

View File

@ -444,7 +444,7 @@ subroutine save_wavefunction
BEGIN_DOC
! Save the wave function into the EZFIO file
END_DOC
call save_wavefunction_general(N_det,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
call save_wavefunction_general(N_det,min(N_states,N_det),psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted)
end
@ -454,7 +454,7 @@ subroutine save_wavefunction_unsorted
BEGIN_DOC
! Save the wave function into the EZFIO file
END_DOC
call save_wavefunction_general(N_det,N_states,psi_det,size(psi_coef,1),psi_coef)
call save_wavefunction_general(N_det,min(N_states,N_det),psi_det,size(psi_coef,1),psi_coef)
end
subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef)
@ -497,6 +497,9 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef)
accu_norm(k) = accu_norm(k) + psicoef(i,k) * psicoef(i,k)
psi_coef_save(i,k) = psicoef(i,k)
enddo
if (accu_norm(k) == 0.d0) then
accu_norm(k) = 1.e-12
endif
enddo
do k = 1, nstates
accu_norm(k) = 1.d0/dsqrt(accu_norm(k))

View File

@ -101,6 +101,11 @@ recursive subroutine rec_occ_pattern_to_dets(list_todo,nt,list_a,na,d,nd,sze,am
if (na == amax) then
nd += 1
if (nd > sze) then
print *, irp_here, ': nd = ', nd
print *, irp_here, ': sze = ', sze
stop 'bug in rec_occ_pattern_to_dets'
endif
if (na > 0) then
call list_to_bitstring( d(1,1,nd), list_a, na, Nint)
endif
@ -259,7 +264,7 @@ subroutine make_s2_eigenfunction
!$OMP PRIVATE(s,ithread, d, det_buffer, smax, N_det_new,i,j,k)
N_det_new = 0
call occ_pattern_to_dets_size(psi_occ_pattern(1,1,1),s,elec_alpha_num,N_int)
allocate (d(N_int,2,s), det_buffer(N_int,2,bufsze) )
allocate (d(N_int,2,s+16), det_buffer(N_int,2,bufsze) )
smax = s
ithread=0
!$ ithread = omp_get_thread_num()
@ -269,7 +274,7 @@ subroutine make_s2_eigenfunction
s += 1
if (s > smax) then
deallocate(d)
allocate ( d(N_int,2,s) )
allocate ( d(N_int,2,s+16) )
smax = s
endif
call occ_pattern_to_dets(psi_occ_pattern(1,1,i),d,s,elec_alpha_num,N_int)

View File

@ -1 +1 @@
MPI

View File

@ -4,6 +4,8 @@ BEGIN_PROVIDER [ character*(128), ezfio_filename ]
! Name of EZFIO file. It is obtained from the QPACKAGE_INPUT environment
! variable if it is set, or as the 1st argument of the command line.
END_DOC
PROVIDE mpi_initialized
! Get the QPACKAGE_INPUT environment variable
call getenv('QPACKAGE_INPUT',ezfio_filename)