10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-07-23 03:07:34 +02:00

Fixed too large stack

This commit is contained in:
Anthony Scemama 2018-09-14 16:23:25 +02:00
commit 0b64d752b7
10 changed files with 416 additions and 206 deletions

View File

@ -15,9 +15,9 @@ END_PROVIDER
integer :: i integer :: i
integer :: e integer :: e
e = elec_num - n_core_orb * 2 e = elec_num - n_core_orb * 2
pt2_n_tasks_max = min(1+(e*(e-1))/2, int(dsqrt(dble(N_det_generators)))) pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10)
do i=1,N_det_generators do i=1,N_det_generators
if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.0001d0) then if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.001d0) then
pt2_F(i) = pt2_n_tasks_max pt2_F(i) = pt2_n_tasks_max
else else
pt2_F(i) = 1 pt2_F(i) = 1
@ -158,17 +158,28 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error)
endif endif
integer, external :: add_task_to_taskserver integer, external :: add_task_to_taskserver
character(len=64000) :: task character(len=:), allocatable :: task
allocate(character(len=100000) :: task)
integer :: j,k,ipos integer :: j,k,ipos
ipos=0
do i=1,N_det_generators
if (pt2_F(i) > 1) then
ipos += 1
endif
enddo
call write_int(6,ipos,'Number of fragmented tasks')
ipos=1 ipos=1
task = ' '
do i= 1, N_det_generators do i= 1, N_det_generators
do j=1,pt2_F(pt2_J(i)) do j=1,pt2_F(pt2_J(i))
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i)
ipos += 20 ipos += 20
if (ipos > 63980) then if (ipos > len(task)-20) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server' stop 'Unable to add task to task server'
endif endif
@ -328,7 +339,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error)
print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E, eqt, time-time0, '' print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E, eqt, time-time0, ''
if( dabs(error(pt2_stoch_istate) / pt2(pt2_stoch_istate)) < relative_error) then if( dabs(error(pt2_stoch_istate) / pt2(pt2_stoch_istate)) < relative_error) then
if (zmq_abort(zmq_to_qp_run_socket) == -1) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then
call sleep(1) call sleep(10)
if (zmq_abort(zmq_to_qp_run_socket) == -1) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Error in sending abort signal (2)' print *, irp_here, ': Error in sending abort signal (2)'
endif endif
@ -402,7 +413,16 @@ end function
d(i) = .true. d(i) = .true.
pt2_J(i) = i pt2_J(i) = i
end do end do
call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/))
integer, allocatable :: seed(:)
call random_seed(size=m)
allocate(seed(m))
do i=1,m
seed(i) = i
enddo
call random_seed(put=seed)
deallocate(seed)
call RANDOM_NUMBER(pt2_u) call RANDOM_NUMBER(pt2_u)
call RANDOM_NUMBER(pt2_u) call RANDOM_NUMBER(pt2_u)

View File

@ -357,6 +357,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
endif endif
enddo enddo
enddo enddo
deallocate(exc_degree) deallocate(exc_degree)
nmax=k-1 nmax=k-1
@ -404,16 +405,35 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
end do end do
deallocate(indices) deallocate(indices)
allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det)) allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det))
allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2)) allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2))
allocate (mat(N_states, mo_tot_num, mo_tot_num)) allocate (mat(N_states, mo_tot_num, mo_tot_num))
maskInd = -1 maskInd = -1
integer :: nb_count, maskInd_save
integer :: nb_count, maskInd_save, monoBdo_save
logical :: found logical :: found
do s1=1,2 do s1=1,2
do i1=N_holes(s1),1,-1 ! Generate low excitations first do i1=N_holes(s1),1,-1 ! Generate low excitations first
monoBdo_save = monoBdo
maskInd_save = maskInd
do s2=s1,2
ib = 1
if(s1 == s2) ib = i1+1
do i2=N_holes(s2),ib,-1
maskInd += 1
if(mod(maskInd, csubset) == (subset-1)) then
found = .True.
end if
enddo
if(s1 /= s2) monoBdo = .false.
enddo
if (.not.found) cycle
monoBdo = monoBdo_save
maskInd = maskInd_save
h1 = hole_list(i1,s1) h1 = hole_list(i1,s1)
call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int) call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int)
@ -526,8 +546,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
enddo enddo
end if end if
end do end do
do s2=s1,2 do s2=s1,2
sp = s1 sp = s1

View File

@ -64,11 +64,7 @@ subroutine run_wf
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order PROVIDE psi_bilinear_matrix_transp_order
!!$OMP PARALLEL PRIVATE(i)
!i = omp_get_thread_num()
! call dress_slave_tcp(i+1, energy)
call dress_slave_tcp(0, energy) call dress_slave_tcp(0, energy)
!!$OMP END PARALLEL
endif endif
end do end do
end end
@ -77,8 +73,6 @@ subroutine dress_slave_tcp(i,energy)
implicit none implicit none
double precision, intent(in) :: energy(N_states_diag) double precision, intent(in) :: energy(N_states_diag)
integer, intent(in) :: i integer, intent(in) :: i
logical :: lstop
lstop = .False.
call run_dress_slave(0,i,energy) call run_dress_slave(0,i,energy)
end end

View File

@ -13,20 +13,24 @@ END_PROVIDER
implicit none implicit none
logical, external :: testTeethBuilding logical, external :: testTeethBuilding
integer :: i integer :: i
pt2_F(:) = 1 integer :: e
pt2_n_tasks_max = 20 e = elec_num - n_core_orb * 2
! do i=1,N_det_generators pt2_n_tasks_max = 1 + min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10)
! if (maxval(dabs(psi_coef_sorted_gen(i,:))) > 0.001d0) then do i=1,N_det_generators
! pt2_F(i) = max(1,min( (elec_alpha_num-n_core_orb)**2, pt2_n_tasks_max)) if (maxval(dabs(psi_coef_sorted_gen(i,1:N_states))) > 0.001d0) then
! endif pt2_F(i) = pt2_n_tasks_max
! enddo else
pt2_F(i) = 1
endif
enddo
if(N_det_generators < 1024) then if(N_det_generators < 1024) then
pt2_minDetInFirstTeeth = 1 pt2_minDetInFirstTeeth = 1
pt2_N_teeth = 1 pt2_N_teeth = 1
else else
pt2_minDetInFirstTeeth = min(5, N_det_generators) pt2_minDetInFirstTeeth = min(5, N_det_generators)
do pt2_N_teeth=100,2,-1 do pt2_N_teeth=20,2,-1
if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
end do end do
end if end if
@ -156,7 +160,16 @@ END_PROVIDER
d(i) = .true. d(i) = .true.
pt2_J_(i) = i pt2_J_(i) = i
end do end do
call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/))
integer, allocatable :: seed(:)
call random_seed(size=m)
allocate(seed(m))
do i=1,m
seed(i) = i
enddo
call random_seed(put=seed)
deallocate(seed)
call RANDOM_NUMBER(pt2_u) call RANDOM_NUMBER(pt2_u)
call RANDOM_NUMBER(pt2_u) call RANDOM_NUMBER(pt2_u)
@ -219,7 +232,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
implicit none implicit none
character(len=64000) :: task character(len=:), allocatable :: task
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
integer, external :: omp_get_thread_num integer, external :: omp_get_thread_num
double precision, intent(in) :: E(N_states), relative_error double precision, intent(in) :: E(N_states), relative_error
@ -232,8 +245,9 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
integer :: i, j, k, Ncp integer :: i, j, k, Ncp
integer, external :: add_task_to_taskserver
double precision :: state_average_weight_save(N_states) double precision :: state_average_weight_save(N_states)
allocate(character(len=100000) :: task)
PROVIDE Nproc
task(:) = CHAR(0) task(:) = CHAR(0)
allocate(delta(N_states,N_det), delta_s2(N_states, N_det)) allocate(delta(N_states,N_det), delta_s2(N_states, N_det))
state_average_weight_save(:) = state_average_weight(:) state_average_weight_save(:) = state_average_weight(:)
@ -254,7 +268,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
integer, external :: zmq_put_N_det_generators integer, external :: zmq_put_N_det_generators
integer, external :: zmq_put_N_det_selectors integer, external :: zmq_put_N_det_selectors
integer, external :: zmq_put_dvector integer, external :: zmq_put_dvector
integer, external :: zmq_set_running integer, external :: zmq_put_int
if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
stop 'Unable to put psi on ZMQ server' stop 'Unable to put psi on ZMQ server'
@ -271,25 +285,58 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
if (zmq_put_dvector(zmq_to_qp_run_socket,1,"state_average_weight",state_average_weight,N_states) == -1) then if (zmq_put_dvector(zmq_to_qp_run_socket,1,"state_average_weight",state_average_weight,N_states) == -1) then
stop 'Unable to put state_average_weight on ZMQ server' stop 'Unable to put state_average_weight on ZMQ server'
endif endif
if (zmq_put_dvector(zmq_to_qp_run_socket,1,"dress_stoch_istate",real(dress_stoch_istate,8),1) == -1) then if (zmq_put_int(zmq_to_qp_run_socket,1,"dress_stoch_istate",dress_stoch_istate) == -1) then
stop 'Unable to put dress_stoch_istate on ZMQ server' stop 'Unable to put dress_stoch_istate on ZMQ server'
endif endif
if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then
stop 'Unable to put threshold_selectors on ZMQ server'
endif
if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then
stop 'Unable to put threshold_generators on ZMQ server'
endif
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
call write_int(6,pt2_n_tasks_max,'Max number of task fragments')
do i=1,N_det_generators integer, external :: add_task_to_taskserver
do j=1,pt2_F(pt2_J(i)) integer :: ipos
write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i) ipos=0
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then do i=1,N_det_generators
if (pt2_F(i) > 1) then
ipos += 1
endif
enddo
call write_int(6,ipos,'Number of fragmented tasks')
ipos=1
do i= 1, N_det_generators
do j=1,pt2_F(pt2_J(i))
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i)
ipos += 20
if (ipos > len(task)-20) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server'
endif
ipos=1
endif
end do
enddo
if (ipos > 1) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server' stop 'Unable to add task to task server'
endif endif
end do endif
end do
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then integer, external :: zmq_set_running
print *, irp_here, ': Failed in zmq_set_running' if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
endif print *, irp_here, ': Failed in zmq_set_running'
endif
integer :: nproc_target integer :: nproc_target
nproc_target = nproc nproc_target = nproc
@ -495,14 +542,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
m += 1 m += 1
if(dabs(error / avg) <= relative_error) then if(dabs(error / avg) <= relative_error) then
integer, external :: zmq_put_dvector integer, external :: zmq_put_dvector
i= zmq_put_dvector(zmq_to_qp_run_socket, worker_id, "ending", dble(m-1), 1) integer, external :: zmq_put_int
i= zmq_put_int(zmq_to_qp_run_socket, worker_id, "ending", (m-1))
found = .true. found = .true.
end if end if
else else
do do
call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks) call pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks)
if(time0 == -1d0) then if(time0 == -1d0) then
print *, "first pull", omp_get_wtime()-time
time0 = omp_get_wtime() time0 = omp_get_wtime()
end if end if
if(m_task == 0) then if(m_task == 0) then
@ -516,14 +563,13 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
end if end if
end do end do
do i=1,n_tasks do i=1,n_tasks
if(edI(edI_index(i)) /= 0d0) stop "NIN M"
edI(edI_index(i)) += edI_task(i) edI(edI_index(i)) += edI_task(i)
end do end do
dot_f(m_task) -= f dot_f(m_task) -= f
end if end if
end do end do
if (zmq_abort(zmq_to_qp_run_socket) == -1) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then
call sleep(1) call sleep(10)
if (zmq_abort(zmq_to_qp_run_socket) == -1) then if (zmq_abort(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Error in sending abort signal (2)' print *, irp_here, ': Error in sending abort signal (2)'
endif endif

View File

@ -24,7 +24,7 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ]
double precision :: E_CI_before(N_states) double precision :: E_CI_before(N_states)
integer :: cnt = 0 integer :: cnt = 0
allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij)) allocate(dress(N_states), del(N_states, N_det), del_s2(N_states, N_det))
delta_ij_tmp = 0d0 delta_ij_tmp = 0d0
@ -32,9 +32,9 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ]
call write_double(6,dress_relative_error,"Convergence of the stochastic algorithm") call write_double(6,dress_relative_error,"Convergence of the stochastic algorithm")
call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(dress_relative_error), N_det_delta_ij) call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(dress_relative_error))
delta_ij_tmp(:,:,1) = del(:,:) delta_ij_tmp(:,1:N_det_delta_ij,1) = del(:,1:N_det_delta_ij)
delta_ij_tmp(:,:,2) = del_s2(:,:) delta_ij_tmp(:,1:N_det_delta_ij,2) = del_s2(:,1:N_det_delta_ij)
deallocate(dress, del, del_s2) deallocate(dress, del, del_s2)

View File

@ -33,15 +33,14 @@ subroutine run_dress_slave(thread,iproce,energy)
integer :: cp_max(Nproc) integer :: cp_max(Nproc)
integer :: will_send, task_id, purge_task_id, ntask_buf integer :: will_send, task_id, purge_task_id, ntask_buf
integer, allocatable :: task_buf(:) integer, allocatable :: task_buf(:)
integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1) ! integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1)
integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending, getting_task ! integer(kind=OMP_LOCK_KIND) :: lck_sto(dress_N_cp)
double precision :: fac double precision :: fac
double precision :: ending(1) integer :: ending
integer, external :: zmq_get_dvector integer, external :: zmq_get_dvector, zmq_get_int
! double precision, external :: omp_get_wtime ! double precision, external :: omp_get_wtime
double precision :: time, time0 double precision :: time, time0
integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc)
! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2))
allocate(cp(N_states, N_det, dress_N_cp, 2)) allocate(cp(N_states, N_det, dress_N_cp, 2))
@ -53,14 +52,12 @@ subroutine run_dress_slave(thread,iproce,energy)
cp = 0d0 cp = 0d0
task = CHAR(0) task = CHAR(0)
call omp_init_lock(sending) ! do i=1,dress_N_cp
call omp_init_lock(getting_task) ! call omp_init_lock(lck_sto(i))
do i=0,dress_N_cp+1 ! end do
call omp_init_lock(lck_sto(i)) ! do i=0,pt2_N_teeth+1
end do ! call omp_init_lock(lck_det(i))
do i=0,pt2_N_teeth+1 ! end do
call omp_init_lock(lck_det(i))
end do
cp_done = 0 cp_done = 0
cp_sent = 0 cp_sent = 0
@ -69,7 +66,7 @@ subroutine run_dress_slave(thread,iproce,energy)
double precision :: hij, sij, tmp double precision :: hij, sij, tmp
purge_task_id = 0 purge_task_id = 0
provide psi_energy provide psi_energy
ending(1) = dble(dress_N_cp+1) ending = dress_N_cp+1
ntask_tbd = 0 ntask_tbd = 0
!$OMP PARALLEL DEFAULT(SHARED) & !$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(breve_delta_m, task_id) & !$OMP PRIVATE(breve_delta_m, task_id) &
@ -86,7 +83,7 @@ subroutine run_dress_slave(thread,iproce,energy)
stop "WORKER -1" stop "WORKER -1"
end if end if
iproc = omp_get_thread_num()+1 iproc = omp_get_thread_num()+1
allocate(breve_delta_m(N_states,N_det,2)) allocate(breve_delta_m(N_states,N_det,2))
allocate(task_buf(pt2_n_tasks_max)) allocate(task_buf(pt2_n_tasks_max))
ntask_buf = 0 ntask_buf = 0
@ -94,8 +91,9 @@ subroutine run_dress_slave(thread,iproce,energy)
call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf)
end if end if
cp_max(:) = 0
do while(cp_done > cp_sent .or. m /= dress_N_cp+1) do while(cp_done > cp_sent .or. m /= dress_N_cp+1)
call omp_set_lock(getting_task) !$OMP CRITICAL (send)
if(ntask_tbd == 0) then if(ntask_tbd == 0) then
ntask_tbd = size(task_tbd) ntask_tbd = size(task_tbd)
call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd) call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd)
@ -113,13 +111,13 @@ subroutine run_dress_slave(thread,iproce,energy)
ntask_tbd -= 1 ntask_tbd -= 1
else else
m = dress_N_cp + 1 m = dress_N_cp + 1
i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) i= zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending)
end if end if
call omp_unset_lock(getting_task)
will_send = 0 will_send = 0
!$OMP CRITICAL
cp_max(iproc) = m cp_max(iproc) = m
! print *, cp_max(:)
! print *, ''
cp_done = minval(cp_max)-1 cp_done = minval(cp_max)-1
if(cp_done > cp_sent) then if(cp_done > cp_sent) then
will_send = cp_sent + 1 will_send = cp_sent + 1
@ -132,10 +130,8 @@ subroutine run_dress_slave(thread,iproce,energy)
ntask_buf += 1 ntask_buf += 1
task_buf(ntask_buf) = task_id task_buf(ntask_buf) = task_id
end if end if
!$OMP END CRITICAL
if(will_send /= 0 .and. will_send <= int(ending(1))) then if(will_send /= 0 .and. will_send <= ending) then
call omp_set_lock(sending)
n_tasks = 0 n_tasks = 0
sum_f = 0 sum_f = 0
do i=1,N_det_generators do i=1,N_det_generators
@ -146,9 +142,10 @@ subroutine run_dress_slave(thread,iproce,energy)
edI_index(n_tasks) = i edI_index(n_tasks) = i
end if end if
end do end do
call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks) call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, &
call omp_unset_lock(sending) breve_delta_m, 0, n_tasks)
end if end if
!$OMP END CRITICAL (send)
if(m /= dress_N_cp+1) then if(m /= dress_N_cp+1) then
!UPDATE i_generator !UPDATE i_generator
@ -158,29 +155,29 @@ subroutine run_dress_slave(thread,iproce,energy)
time0 = omp_get_wtime() time0 = omp_get_wtime()
call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc) call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc)
time = omp_get_wtime() time = omp_get_wtime()
!print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0 !print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0
t = dress_T(i_generator) t = dress_T(i_generator)
call omp_set_lock(lck_det(t)) !$OMP CRITICAL(t_crit)
do j=1,N_det do j=1,N_det
do i=1,N_states do i=1,N_states
delta_det(i,j,t, 1) = delta_det(i,j,t, 1) + breve_delta_m(i,j,1) delta_det(i,j,t, 1) = delta_det(i,j,t, 1) + breve_delta_m(i,j,1)
delta_det(i,j,t, 2) = delta_det(i,j,t, 2) + breve_delta_m(i,j,2) delta_det(i,j,t, 2) = delta_det(i,j,t, 2) + breve_delta_m(i,j,2)
enddo enddo
enddo enddo
call omp_unset_lock(lck_det(t)) !$OMP END CRITICAL(t_crit)
do p=1,dress_N_cp do p=1,dress_N_cp
if(dress_e(i_generator, p) /= 0d0) then if(dress_e(i_generator, p) /= 0d0) then
fac = dress_e(i_generator, p) fac = dress_e(i_generator, p)
call omp_set_lock(lck_sto(p)) !$OMP CRITICAL(p_crit)
do j=1,N_det do j=1,N_det
do i=1,N_states do i=1,N_states
cp(i,j,p,1) = cp(i,j,p,1) + breve_delta_m(i,j,1) * fac cp(i,j,p,1) = cp(i,j,p,1) + breve_delta_m(i,j,1) * fac
cp(i,j,p,2) = cp(i,j,p,2) + breve_delta_m(i,j,2) * fac cp(i,j,p,2) = cp(i,j,p,2) + breve_delta_m(i,j,2) * fac
enddo enddo
enddo enddo
call omp_unset_lock(lck_sto(p)) !$OMP END CRITICAL(p_crit)
end if end if
end do end do
@ -198,7 +195,9 @@ subroutine run_dress_slave(thread,iproce,energy)
ntask_buf = 0 ntask_buf = 0
end if end if
end if end if
!$OMP FLUSH
end do end do
!$OMP BARRIER !$OMP BARRIER
if(ntask_buf /= 0) then if(ntask_buf /= 0) then
call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf) call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf)
@ -206,12 +205,12 @@ subroutine run_dress_slave(thread,iproce,energy)
end if end if
!$OMP SINGLE !$OMP SINGLE
if(purge_task_id /= 0) then if(purge_task_id /= 0) then
do while(int(ending(1)) == dress_N_cp+1) do while(ending == dress_N_cp+1)
call sleep(1) call sleep(1)
i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1) i= zmq_get_int(zmq_to_qp_run_socket, worker_id, "ending", ending)
end do end do
will_send = int(ending(1)) will_send = ending
breve_delta_m = 0d0 breve_delta_m = 0d0
do l=will_send, 1,-1 do l=will_send, 1,-1
@ -238,12 +237,12 @@ subroutine run_dress_slave(thread,iproce,energy)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread) call end_zmq_push_socket(zmq_socket_push,thread)
!$OMP END PARALLEL !$OMP END PARALLEL
do i=0,dress_N_cp+1 ! do i=0,dress_N_cp+1
call omp_destroy_lock(lck_sto(i)) ! call omp_destroy_lock(lck_sto(i))
end do ! end do
do i=0,pt2_N_teeth+1 ! do i=0,pt2_N_teeth+1
call omp_destroy_lock(lck_det(i)) ! call omp_destroy_lock(lck_det(i))
end do ! end do
end subroutine end subroutine

View File

@ -1,15 +1,178 @@
program shifted_bk program shifted_bk_slave
implicit none implicit none
BEGIN_DOC BEGIN_DOC
! Helper subroutine to compute the dress in distributed mode. ! Helper program to compute the dress in distributed mode.
END_DOC END_DOC
PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order
!call diagonalize_CI() read_wf = .False.
call dress_slave() distributed_davidson = .False.
SOFT_TOUCH read_wf distributed_davidson
call provide_all
call switch_qp_run_to_master
call run_w
end end
subroutine provide_all
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
PROVIDE dress_e0_denominator mo_tot_num N_int ci_energy mpi_master zmq_state zmq_context
PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight
PROVIDE N_det_selectors dress_stoch_istate N_det
end
subroutine run_w
use f77_zmq
implicit none
IRP_IF MPI
include 'mpif.h'
IRP_ENDIF
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
double precision :: energy(N_states)
character*(64) :: states(3)
character*(64) :: old_state
integer :: rc, i, ierr
double precision :: t0, t1
integer, external :: zmq_get_dvector, zmq_get_N_det_generators
integer, external :: zmq_get_ivector
integer, external :: zmq_get_psi, zmq_get_N_det_selectors, zmq_get_int
integer, external :: zmq_get_N_states_diag
zmq_context = f77_zmq_ctx_new ()
states(1) = 'selection'
states(2) = 'davidson'
states(3) = 'dress'
old_state = 'Waiting'
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master
PROVIDE zmq_state N_det_selectors dress_stoch_istate N_det dress_e0_denominator
PROVIDE N_det_generators N_states N_states_diag
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
IRP_ENDIF
do
if (mpi_master) then
call wait_for_states(states,zmq_state,size(states))
if (zmq_state(1:64) == old_state(1:64)) then
call sleep(1)
cycle
else
old_state(1:64) = zmq_state(1:64)
endif
print *, trim(zmq_state)
endif
IRP_IF MPI_DEBUG
print *, irp_here, mpi_rank
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
IRP_ENDIF
IRP_IF MPI
call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in broadcast of zmq_state'
endif
IRP_ENDIF
if(zmq_state(1:7) == 'Stopped') then
exit
endif
if (zmq_state(1:8) == 'davidson') then
! Davidson
! --------
call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle
call wall_time(t1)
if (mpi_master) then
call write_double(6,(t1-t0),'Broadcast time')
endif
call omp_set_nested(.True.)
call davidson_slave_tcp(0)
call omp_set_nested(.False.)
print *, 'Davidson done'
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
print *, 'All Davidson done'
else if (zmq_state(1:5) == 'dress') then
! Dress
! ---
call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
print *, 'if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle', mpi_rank
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
print *, 'if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle', mpi_rank
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
print *, 'if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle', mpi_rank
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle
print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,threshold_generators,threshold_generators,1) == -1) cycle', mpi_rank
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle
print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,threshold_selectors,threshold_selectors,1) == -1) cycle', mpi_rank
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,energy,energy,N_states) == -1) cycle', mpi_rank
if (zmq_get_int(zmq_to_qp_run_socket,1,'dress_stoch_istate',dress_stoch_istate) == -1) cycle
print *, 'if (zmq_get_int(zmq_to_qp_run_socket,1,dress_stoch_istate,dress_stoch_istate) == -1) cycle', mpi_rank
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
print *, 'if (zmq_get_dvector(zmq_to_qp_run_socket,1,state_average_weight,state_average_weight,N_states) == -1) cycle', mpi_rank
psi_energy(1:N_states) = energy(1:N_states)
TOUCH psi_energy state_average_weight dress_stoch_istate threshold_selectors threshold_generators
if (mpi_master) then
print *, 'N_det', N_det
print *, 'N_det_generators', N_det_generators
print *, 'N_det_selectors', N_det_selectors
print *, 'psi_energy', psi_energy
print *, 'dress_stoch_istate', dress_stoch_istate
print *, 'state_average_weight', state_average_weight
endif
call wall_time(t1)
call write_double(6,(t1-t0),'Broadcast time')
call dress_slave_tcp(0, energy)
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
print *, 'All dress done'
endif
end do
IRP_IF MPI
call MPI_finalize(ierr)
IRP_ENDIF
end

View File

@ -36,8 +36,6 @@ subroutine davidson_run_slave(thread,iproc)
integer, external :: connect_to_taskserver integer, external :: connect_to_taskserver
include 'mpif.h'
integer ierr
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)

View File

@ -167,7 +167,7 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
end select end select
end end
subroutine get_double_excitation_ref(det1,det2,exc,phase,Nint) subroutine get_double_excitation(det1,det2,exc,phase,Nint)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -335,112 +335,6 @@ subroutine get_phasemask_bit(det1, pm, Nint)
end subroutine end subroutine
subroutine get_double_excitation(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the two excitation operators between two doubly excited determinants and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint,2)
integer(bit_kind), intent(in) :: det2(Nint,2)
integer, intent(out) :: exc(0:2,2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, ispin, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp, pm(Nint,2)
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
double precision :: refaz
logical :: ok
ASSERT (Nint > 0)
!do ispin=1,2
!tmp = 0_8
!do i=1,Nint
! pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32))
! pm(i,ispin) = xor(pm(i,ispin), tmp)
! if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp)
!end do
!end do
call get_phasemask_bit(det1, pm, Nint)
nperm = 0
exc(0,1,1) = 0
exc(0,2,1) = 0
exc(0,1,2) = 0
exc(0,2,2) = 0
do ispin = 1,2
idx_particle = 0
idx_hole = 0
ishift = 1-bit_kind_size
!par = 0
do l=1,Nint
ishift = ishift + bit_kind_size
if (det1(l,ispin) == det2(l,ispin)) then
cycle
endif
tmp = xor( det1(l,ispin), det2(l,ispin) )
particle = iand(tmp, det2(l,ispin))
hole = iand(tmp, det1(l,ispin))
do while (particle /= 0_bit_kind)
tz = trailz(particle)
nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1)
idx_particle = idx_particle + 1
exc(0,2,ispin) = exc(0,2,ispin) + 1
exc(idx_particle,2,ispin) = tz+ishift
particle = iand(particle,particle-1_bit_kind)
enddo
if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)==2
exit
endif
do while (hole /= 0_bit_kind)
tz = trailz(hole)
nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1)
idx_hole = idx_hole + 1
exc(0,1,ispin) = exc(0,1,ispin) + 1
exc(idx_hole,1,ispin) = tz+ishift
hole = iand(hole,hole-1_bit_kind)
enddo
if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)
exit
endif
enddo
select case (exc(0,1,ispin))
case(0)
cycle
case(1)
if(exc(1,1,ispin) < exc(1,2,ispin)) nperm = nperm+1
case (2)
a = exc(1,1,ispin)
b = exc(1,2,ispin)
c = exc(2,1,ispin)
d = exc(2,2,ispin)
if(min(a,c) > max(b,d) .or. min(b,d) > max(a,c) .or. (a-b)*(c-d)<0) then
nperm = nperm + 1
end if
exit
end select
enddo
phase = phase_dble(iand(nperm,1))
!call get_double_excitation_ref(det1,det2,exc,refaz,Nint)
!if(phase == refaz) then
! print *, "phase", phase, refaz, n, exc(0,1,1)
!end if
end
subroutine get_mono_excitation(det1,det2,exc,phase,Nint) subroutine get_mono_excitation(det1,det2,exc,phase,Nint)
use bitmasks use bitmasks

View File

@ -8,7 +8,7 @@ integer function zmq_put_dvector(zmq_to_qp_run_socket, worker_id, name, x, size_
integer, intent(in) :: worker_id integer, intent(in) :: worker_id
character*(*) :: name character*(*) :: name
integer, intent(in) :: size_x integer, intent(in) :: size_x
double precision, intent(out) :: x(size_x) double precision, intent(in) :: x(size_x)
integer :: rc integer :: rc
character*(256) :: msg character*(256) :: msg
@ -111,7 +111,7 @@ integer function zmq_put_ivector(zmq_to_qp_run_socket, worker_id, name, x, size_
integer, intent(in) :: worker_id integer, intent(in) :: worker_id
character*(*) :: name character*(*) :: name
integer, intent(in) :: size_x integer, intent(in) :: size_x
integer, intent(out) :: x(size_x) integer, intent(in) :: x(size_x)
integer :: rc integer :: rc
character*(256) :: msg character*(256) :: msg
@ -201,3 +201,81 @@ end
integer function zmq_put_int(zmq_to_qp_run_socket, worker_id, name, x)
use f77_zmq
implicit none
BEGIN_DOC
! Put a vector of integers on the qp_run scheduler
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer, intent(in) :: worker_id
character*(*) :: name
integer, intent(in) :: x
integer :: rc
character*(256) :: msg
zmq_put_int = 0
write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, name
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
if (rc /= len(trim(msg))) then
zmq_put_int = -1
return
endif
rc = f77_zmq_send(zmq_to_qp_run_socket,x,4,0)
if (rc /= 4) then
zmq_put_int = -1
return
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
if (msg(1:rc) /= 'put_data_reply ok') then
zmq_put_int = -1
return
endif
end
integer function zmq_get_int(zmq_to_qp_run_socket, worker_id, name, x)
use f77_zmq
implicit none
BEGIN_DOC
! Get a vector of integers from the qp_run scheduler
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer, intent(in) :: worker_id
character*(*), intent(in) :: name
integer, intent(out) :: x
integer :: rc
character*(256) :: msg
PROVIDE zmq_state
! Success
zmq_get_int = 0
if (mpi_master) then
write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, name
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
if (rc /= len(trim(msg))) then
zmq_get_int = -1
go to 10
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
if (msg(1:14) /= 'get_data_reply') then
zmq_get_int = -1
go to 10
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,x,4,0)
if (rc /= 4) then
zmq_get_int = -1
go to 10
endif
endif
10 continue
end