10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-12 05:58:24 +01:00

Merge branch 'master' of github.com:scemama/quantum_package

This commit is contained in:
Anthony Scemama 2018-10-15 11:20:27 +02:00
commit 665ece19e5
15 changed files with 323 additions and 148 deletions

View File

@ -156,9 +156,10 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error)
integer, external :: add_task_to_taskserver integer, external :: add_task_to_taskserver
character(100000) :: task character(400000) :: task
integer :: j,k,ipos integer :: j,k,ipos,ifirst
ifirst=0
ipos=0 ipos=0
do i=1,N_det_generators do i=1,N_det_generators
@ -166,19 +167,24 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error)
ipos += 1 ipos += 1
endif endif
enddo enddo
call write_int(6,sum(pt2_F),'Number of tasks')
call write_int(6,ipos,'Number of fragmented tasks') call write_int(6,ipos,'Number of fragmented tasks')
ipos=1 ipos=1
do i= 1, N_det_generators do i= 1, N_det_generators
do j=1,pt2_F(pt2_J(i)) do j=1,pt2_F(pt2_J(i))
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i) write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i)
ipos += 20 ipos += 20
if (ipos > 100000-20) then if (ipos > 400000-20) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server' stop 'Unable to add task to task server'
endif endif
ipos=1 ipos=1
if (ifirst == 0) then
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Failed in zmq_set_running'
endif
endif
endif endif
end do end do
enddo enddo
@ -204,14 +210,16 @@ subroutine ZMQ_pt2(E, pt2,relative_error, error)
nproc_target = min(nproc_target,nproc) nproc_target = min(nproc_target,nproc)
endif endif
call omp_set_nested(.true.) call omp_set_nested(.false.)
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) & !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1) &
!$OMP PRIVATE(i) !$OMP PRIVATE(i)
i = omp_get_thread_num() i = omp_get_thread_num()
if (i==0) then if (i==0) then
call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w, error) call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w, error)
pt2(pt2_stoch_istate) = w(pt2_stoch_istate) pt2(pt2_stoch_istate) = w(pt2_stoch_istate)
else else
call pt2_slave_inproc(i) call pt2_slave_inproc(i)
endif endif
@ -259,7 +267,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error)
integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer, external :: zmq_delete_tasks integer, external :: zmq_delete_tasks
integer, external :: zmq_abort integer, external :: zmq_abort
integer, external :: pt2_find_sample integer, external :: pt2_find_sample_lr
integer :: more, n, i, p, c, t, n_tasks, U integer :: more, n, i, p, c, t, n_tasks, U
integer, allocatable :: task_id(:) integer, allocatable :: task_id(:)
@ -321,10 +329,10 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error)
x = 0d0 x = 0d0
do p=pt2_N_teeth, 1, -1 do p=pt2_N_teeth, 1, -1
v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1)) v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
i = pt2_find_sample(v, pt2_cW) i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1))
x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i) x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
S(p) += x S(p) += x
S2(p) += x**2 S2(p) += x*x
end do end do
avg = E0 + S(t) / dble(c) avg = E0 + S(t) / dble(c)
if ((avg /= 0.d0) .or. (n == N_det_generators) ) then if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
@ -371,13 +379,23 @@ end subroutine
integer function pt2_find_sample(v, w) integer function pt2_find_sample(v, w)
implicit none implicit none
double precision, intent(in) :: v, w(0:N_det_generators) double precision, intent(in) :: v, w(0:N_det_generators)
integer, external :: pt2_find_sample_lr
pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators)
end function
integer function pt2_find_sample_lr(v, w, l_in, r_in)
implicit none
double precision, intent(in) :: v, w(0:N_det_generators)
integer, intent(in) :: l_in,r_in
integer :: i,l,r integer :: i,l,r
l = 0 l=l_in
r = N_det_generators r=r_in
do while(r-l > 1) do while(r-l > 1)
i = (r+l) / 2 i = ishft(r+l,-1)
if(w(i) < v) then if(w(i) < v) then
l = i l = i
else else
@ -390,33 +408,22 @@ integer function pt2_find_sample(v, w)
exit exit
endif endif
enddo enddo
pt2_find_sample = r-1 pt2_find_sample_lr = r-1
end function end function
BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)] BEGIN_PROVIDER [ integer, pt2_n_tasks ]
&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)] implicit none
&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)] BEGIN_DOC
! Number of parallel tasks for the Monte Carlo
END_DOC
pt2_n_tasks = N_det_generators
END_PROVIDER
BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)]
implicit none implicit none
integer :: N_c, N_j, U, t, i
double precision :: v
logical, allocatable :: d(:)
integer, external :: pt2_find_sample
allocate(d(N_det_generators))
pt2_R(:) = 0
N_c = 0
N_j = pt2_n_0(1)
d(:) = .false.
do i=1,N_j
d(i) = .true.
pt2_J(i) = i
end do
integer :: m
integer, allocatable :: seed(:) integer, allocatable :: seed(:)
integer :: m,i
call random_seed(size=m) call random_seed(size=m)
allocate(seed(m)) allocate(seed(m))
do i=1,m do i=1,m
@ -426,42 +433,86 @@ end function
deallocate(seed) deallocate(seed)
call RANDOM_NUMBER(pt2_u) call RANDOM_NUMBER(pt2_u)
END_PROVIDER
BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)]
&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)]
implicit none
integer :: N_c, N_j
integer :: U, t, i
double precision :: v
integer, external :: pt2_find_sample_lr
logical, allocatable :: pt2_d(:)
integer :: m,l,r,k
integer, parameter :: ncache=10000
integer, allocatable :: ii(:,:)
double precision :: dt
allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators))
pt2_R(:) = 0
pt2_d(:) = .false.
N_c = 0
N_j = pt2_n_0(1)
do i=1,N_j
pt2_d(i) = .true.
pt2_J(i) = i
end do
U = 0 U = 0
do while(N_j < pt2_n_tasks)
do while(N_j < N_det_generators) !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k)
!ADD_COMB do k=1, ncache
N_c += 1 dt = pt2_u_0
do t=0, pt2_N_teeth-1 do t=1, pt2_N_teeth
v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) v = dt + pt2_W_T *pt2_u(N_c+k)
i = pt2_find_sample(v, pt2_cW) dt = dt + pt2_W_T
if(.not. d(i)) then ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1))
N_j += 1 end do
pt2_J(N_j) = i enddo
d(i) = .true. !$OMP END PARALLEL DO
end if
end do
pt2_R(N_j) = N_c do k=1,ncache
!ADD_COMB
N_c = N_c+1
do t=1, pt2_N_teeth
i = ii(t,k)
if(.not. pt2_d(i)) then
N_j += 1
pt2_J(N_j) = i
pt2_d(i) = .true.
end if
end do
!FILL_TOOTH pt2_R(N_j) = N_c
do while(U < N_det_generators)
U += 1 !FILL_TOOTH
if(.not. d(U)) then do while(U < N_det_generators)
N_j += 1 U += 1
pt2_J(N_j) = U if(.not. pt2_d(U)) then
d(U) = .true. N_j += 1
exit; pt2_J(N_j) = U
end if pt2_d(U) = .true.
exit
end if
end do
if (N_j >= pt2_n_tasks) exit
end do end do
enddo enddo
if(N_det_generators > 1) then if(N_det_generators > 1) then
pt2_R(N_det_generators-1) = 0 pt2_R(N_det_generators-1) = 0
pt2_R(N_det_generators) = N_c pt2_R(N_det_generators) = N_c
end if end if
deallocate(ii,pt2_d)
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ] BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ] &BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_W_T ] &BEGIN_PROVIDER [ double precision, pt2_W_T ]

View File

@ -4,6 +4,7 @@ program selection_slave
! Helper program to compute the PT2 in distributed mode. ! Helper program to compute the PT2 in distributed mode.
END_DOC END_DOC
call omp_set_nested(.false.)
read_wf = .False. read_wf = .False.
distributed_davidson = .False. distributed_davidson = .False.
SOFT_TOUCH read_wf distributed_davidson SOFT_TOUCH read_wf distributed_davidson
@ -133,7 +134,7 @@ subroutine run_wf
call wall_time(t0) call wall_time(t0)
if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_psi_bilinear(zmq_to_qp_run_socket,1) == -1) cycle if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle
call wall_time(t1) call wall_time(t1)

View File

@ -374,22 +374,20 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned,
logical, intent(in) :: bannedOrb(mo_tot_num,2), banned(mo_tot_num, mo_tot_num) logical, intent(in) :: bannedOrb(mo_tot_num,2), banned(mo_tot_num, mo_tot_num)
integer(bit_kind), intent(in) :: mask(N_int, 2) integer(bit_kind), intent(in) :: mask(N_int, 2)
integer(bit_kind) :: alpha(N_int, 2) integer(bit_kind) :: alpha(N_int, 2)
integer, allocatable :: labuf(:), abuf(:) integer, allocatable :: labuf(:), abuf(:), iorder(:)
logical :: ok logical :: ok
integer :: i,j,k,s,st1,st2,st3,st4 integer :: i,j,k,s,st1,st2,st3,st4,t2
integer :: lindex(mo_tot_num,2), lindex_end(mo_tot_num, 2) integer :: lindex(mo_tot_num,2), lindex_end(mo_tot_num, 2)
integer :: s1, s2, stamo integer :: s1, s2, stamo
logical,allocatable :: putten(:)
integer(bit_kind), allocatable :: det_minilist(:,:,:) integer(bit_kind), allocatable :: det_minilist(:,:,:)
allocate(abuf(siz), labuf(N_det), putten(N_det), det_minilist(N_int, 2, N_det)) allocate(abuf(siz), labuf(N_det), iorder(siz), det_minilist(N_int, 2, N_det))
do i=1,siz do i=1,siz
abuf(i) = psi_from_sorted_gen(rabuf(i)) abuf(i) = psi_from_sorted_gen(rabuf(i))
end do end do
putten = .false.
st1 = indexes_end(0,0)-1 !! st1 = indexes_end(0,0)-1 !!
if(st1 > 0) then if(st1 > 0) then
@ -419,13 +417,21 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned,
lindex_end(:,1) = indexes_end(1:, 0)-1 lindex_end(:,1) = indexes_end(1:, 0)-1
end if end if
do i=1,mo_tot_num
do j=1,2
if(lindex(i,j) > 0 .and. lindex_end(i,j) > lindex(i,j)) then
call isort(abuf(lindex(i,j)), iorder, lindex_end(i,j)-lindex(i,j)+1)
end if
end do
end do
do i=1,mo_tot_num do i=1,mo_tot_num
if(bannedOrb(i,s1)) cycle if(bannedOrb(i,s1)) cycle
if(lindex(i,s1) /= 0) then if(lindex(i,s1) /= 0) then
st2 = st1 + 1 + lindex_end(i,s1)-lindex(i,s1) st2 = st1 + 1 + lindex_end(i,s1)-lindex(i,s1)
labuf(st1:st2-1) = abuf(lindex(i,s1):lindex_end(i,s1)) labuf(st1:st2-1) = abuf(lindex(i,s1):lindex_end(i,s1))
do j=st1,st2-1 do j=st1,st2-1
putten(labuf(j)) = .true.
det_minilist(:,:,j) = psi_det(:,:,labuf(j)) det_minilist(:,:,j) = psi_det(:,:,labuf(j))
end do end do
else else
@ -441,12 +447,25 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned,
do j=stamo,mo_tot_num do j=stamo,mo_tot_num
if(bannedOrb(j,s2) .or. banned(i,j)) cycle if(bannedOrb(j,s2) .or. banned(i,j)) cycle
if(lindex(j,s2) /= 0) then if(lindex(j,s2) /= 0) then
k = lindex(j,s2)
st3 = st2 st3 = st2
do k=lindex(j,s2), lindex_end(j,s2) t2 = st1
if(.not. putten(abuf(k))) then do while(k <= lindex_end(j,s2))
if(t2 >= st2) then
labuf(st3) = abuf(k) labuf(st3) = abuf(k)
det_minilist(:,:,st3) = psi_det(:,:,abuf(k)) det_minilist(:,:,st3) = psi_det(:,:,abuf(k))
st3 += 1 st3 += 1
k += 1
else if(abuf(k) > labuf(t2)) then
t2 += 1
else if(abuf(k) < labuf(t2)) then
labuf(st3) = abuf(k)
det_minilist(:,:,st3) = psi_det(:,:,abuf(k))
st3 += 1
k += 1
else
k += 1
t2 += 1
end if end if
end do end do
else else
@ -468,13 +487,6 @@ subroutine alpha_callback_mask(delta_ij_loc, i_gen, sp, mask, bannedOrb, banned,
call dress_with_alpha_buffer(N_states, N_det, N_int, delta_ij_loc, i_gen, labuf, det_minilist, st4-1, alpha, iproc) call dress_with_alpha_buffer(N_states, N_det, N_int, delta_ij_loc, i_gen, labuf, det_minilist, st4-1, alpha, iproc)
end if end if
end do end do
if(lindex(i,s1) /= 0) then
do j=st1,st2-1
putten(labuf(j)) = .false.
end do
end if
end do end do
end subroutine end subroutine

View File

@ -16,7 +16,8 @@ END_PROVIDER
integer :: e integer :: e
e = elec_num - n_core_orb * 2 e = elec_num - n_core_orb * 2
pt2_n_tasks_max = 1 + min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10) pt2_n_tasks_max = 1 + min((e*(e-1))/2, int(dsqrt(dble(N_det_generators)))/10)
do i=1,N_det_generators pt2_F(:) = 1
do i=1,min(10000,N_det_generators)
pt2_F(i) = 1 + dble(pt2_n_tasks_max)*maxval(dsqrt(dabs(psi_coef_sorted_gen(i,1:N_states)))) pt2_F(i) = 1 + dble(pt2_n_tasks_max)*maxval(dsqrt(dabs(psi_coef_sorted_gen(i,1:N_states))))
enddo enddo
@ -25,7 +26,7 @@ END_PROVIDER
pt2_N_teeth = 1 pt2_N_teeth = 1
else else
pt2_minDetInFirstTeeth = min(5, N_det_generators) pt2_minDetInFirstTeeth = min(5, N_det_generators)
do pt2_N_teeth=100,2,-1 do pt2_N_teeth=50,2,-1
if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
end do end do
end if end if
@ -127,7 +128,7 @@ END_PROVIDER
&BEGIN_PROVIDER [ integer, dress_N_cp ] &BEGIN_PROVIDER [ integer, dress_N_cp ]
implicit none implicit none
integer :: N_c, N_j, U, t, i, m integer :: N_c, N_j, U, t, i, m
double precision :: v double precision :: v, dt
double precision, allocatable :: tilde_M(:) double precision, allocatable :: tilde_M(:)
logical, allocatable :: d(:) logical, allocatable :: d(:)
integer, external :: dress_find_sample integer, external :: dress_find_sample
@ -141,7 +142,7 @@ END_PROVIDER
N_j = pt2_n_0(1) N_j = pt2_n_0(1)
d(:) = .false. d(:) = .false.
! Set here the positions of the checkpoints ! Set here the positions of the checkpoints
! U = N_det_generators/((dress_N_cp_max**2+dress_N_cp_max)/2)+1 ! U = N_det_generators/((dress_N_cp_max**2+dress_N_cp_max)/2)+1
! do i=1, dress_N_cp_max-1 ! do i=1, dress_N_cp_max-1
! dress_M_m(i) = U * (((i*i)+i)/2) + 10 ! dress_M_m(i) = U * (((i*i)+i)/2) + 10
@ -172,11 +173,13 @@ END_PROVIDER
U = 0 U = 0
m = 1 m = 1
! TODO Slow loop : to optimize
do while(N_j < N_det_generators) do while(N_j < N_det_generators)
!ADD_COMB !ADD_COMB
N_c += 1 N_c += 1
dt = 0.d0
do t=0, pt2_N_teeth-1 do t=0, pt2_N_teeth-1
v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c)) v = pt2_u_0 + pt2_W_T * (dt + pt2_u(N_c))
i = dress_find_sample(v, pt2_cW) i = dress_find_sample(v, pt2_cW)
tilde_M(i) += 1d0 tilde_M(i) += 1d0
if(.not. d(i)) then if(.not. d(i)) then
@ -184,6 +187,7 @@ END_PROVIDER
pt2_J_(N_j) = i pt2_J_(N_j) = i
d(i) = .true. d(i) = .true.
end if end if
dt = dt + 1.d0
end do end do
!FILL_TOOTH !FILL_TOOTH
@ -193,7 +197,7 @@ END_PROVIDER
N_j += 1 N_j += 1
pt2_J_(N_j) = U pt2_J_(N_j) = U
d(U) = .true. d(U) = .true.
exit; exit
end if end if
end do end do
@ -254,7 +258,7 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
state_average_weight(dress_stoch_istate) = 1.d0 state_average_weight(dress_stoch_istate) = 1.d0
TOUCH state_average_weight dress_stoch_istate TOUCH state_average_weight dress_stoch_istate
provide nproc mo_bielec_integrals_in_map mo_mono_elec_integral psi_selectors pt2_F provide nproc mo_bielec_integrals_in_map mo_mono_elec_integral psi_selectors pt2_F pt2_N_teeth dress_M_m
print *, '========== ================= ================= =================' print *, '========== ================= ================= ================='
print *, ' Samples Energy Stat. Error Seconds ' print *, ' Samples Energy Stat. Error Seconds '
@ -350,8 +354,6 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
nproc_target = min(nproc_target,nproc) nproc_target = min(nproc_target,nproc)
endif endif
call omp_set_nested(.true.)
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) & !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) &
!$OMP PRIVATE(i) !$OMP PRIVATE(i)
i = omp_get_thread_num() i = omp_get_thread_num()
@ -372,7 +374,6 @@ subroutine ZMQ_dress(E, dress, delta_out, delta_s2_out, relative_error)
enddo enddo
FREE dress_stoch_istate FREE dress_stoch_istate
state_average_weight(:) = state_average_weight_save(:) state_average_weight(:) = state_average_weight_save(:)
! call omp_set_nested(.false.)
TOUCH state_average_weight TOUCH state_average_weight
deallocate(delta,delta_s2) deallocate(delta,delta_s2)
@ -631,7 +632,7 @@ integer function dress_find_sample(v, w)
r = N_det_generators r = N_det_generators
do while(r-l > 1) do while(r-l > 1)
i = (r+l) / 2 i = ishft(r+l,-1)
if(w(i) < v) then if(w(i) < v) then
l = i l = i
else else

View File

@ -43,6 +43,7 @@ subroutine run_dress_slave(thread,iproce,energy)
integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc) integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc)
logical :: interesting logical :: interesting
PROVIDE dress_dot_F psi_coef dress_stoch_istate dress_e N_int
allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2)) allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2))
allocate(cp(N_states, N_det, dress_N_cp, 2)) allocate(cp(N_states, N_det, dress_N_cp, 2))
@ -71,6 +72,8 @@ subroutine run_dress_slave(thread,iproce,energy)
provide psi_energy provide psi_energy
ending = dress_N_cp+1 ending = dress_N_cp+1
ntask_tbd = 0 ntask_tbd = 0
call omp_set_nested(.true.)
!$OMP PARALLEL DEFAULT(SHARED) & !$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(interesting, breve_delta_m, task_id) & !$OMP PRIVATE(interesting, breve_delta_m, task_id) &
!$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) & !$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) &
@ -81,6 +84,7 @@ subroutine run_dress_slave(thread,iproce,energy)
zmq_socket_push = new_zmq_push_socket(thread) zmq_socket_push = new_zmq_push_socket(thread)
integer, external :: connect_to_taskserver integer, external :: connect_to_taskserver
!$OMP CRITICAL !$OMP CRITICAL
call omp_set_nested(.false.)
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
print *, irp_here, ': Unable to connect to task server' print *, irp_here, ': Unable to connect to task server'
stop -1 stop -1
@ -292,6 +296,7 @@ subroutine run_dress_slave(thread,iproce,energy)
!$OMP END CRITICAL !$OMP END CRITICAL
!$OMP END PARALLEL !$OMP END PARALLEL
call omp_set_nested(.false.)
! do i=0,dress_N_cp+1 ! do i=0,dress_N_cp+1
! call omp_destroy_lock(lck_sto(i)) ! call omp_destroy_lock(lck_sto(i))
! end do ! end do

View File

@ -8,6 +8,7 @@ program shifted_bk
PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order PROVIDE psi_bilinear_matrix_rows psi_det_sorted_gen_order psi_bilinear_matrix_order
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
PROVIDE psi_bilinear_matrix_transp_order PROVIDE psi_bilinear_matrix_transp_order
PROVIDE psi_energy
!call diagonalize_CI() !call diagonalize_CI()

View File

@ -50,7 +50,7 @@ subroutine run_w
PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master PROVIDE psi_det psi_coef threshold_generators threshold_selectors state_average_weight mpi_master
PROVIDE zmq_state N_det_selectors dress_stoch_istate N_det dress_e0_denominator PROVIDE zmq_state N_det_selectors dress_stoch_istate N_det dress_e0_denominator
PROVIDE N_det_generators N_states N_states_diag PROVIDE N_det_generators N_states N_states_diag psi_energy
IRP_IF MPI IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr) call MPI_BARRIER(MPI_COMM_WORLD, ierr)
IRP_ENDIF IRP_ENDIF
@ -89,8 +89,8 @@ subroutine run_w
! -------- ! --------
call wall_time(t0) call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states_diag) == -1) cycle
call wall_time(t1) call wall_time(t1)
@ -115,23 +115,21 @@ subroutine run_w
! Dress ! Dress
! --- ! ---
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
call wall_time(t0) call wall_time(t0)
if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
if (zmq_get_int(zmq_to_qp_run_socket,1,'dress_stoch_istate',dress_stoch_istate) == -1) cycle if (zmq_get_int(zmq_to_qp_run_socket,1,'dress_stoch_istate',dress_stoch_istate) == -1) cycle
if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
psi_energy(1:N_states) = energy(1:N_states) psi_energy(1:N_states) = energy(1:N_states)
TOUCH psi_energy state_average_weight dress_stoch_istate threshold_selectors threshold_generators TOUCH psi_energy state_average_weight dress_stoch_istate threshold_selectors threshold_generators
if (mpi_master) then if (mpi_master) then
@ -145,9 +143,19 @@ subroutine run_w
call wall_time(t1) call wall_time(t1)
call write_double(6,(t1-t0),'Broadcast time') call write_double(6,(t1-t0),'Broadcast time')
IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
print *, irp_here, 'error in barrier'
endif
IRP_ENDIF
call dress_slave_tcp(0, energy) if (.true.) then
call omp_set_nested(.True.)
call run_dress_slave(0,i,dress_e0_denominator)
endif
print *, 'PT2 done'
FREE state_average_weight
IRP_IF MPI IRP_IF MPI
call MPI_BARRIER(MPI_COMM_WORLD, ierr) call MPI_BARRIER(MPI_COMM_WORLD, ierr)

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/bash -x
# #
# Creates a self-contained binary distribution in the form of a tar.gz file # Creates a self-contained binary distribution in the form of a tar.gz file
# #
@ -22,7 +22,7 @@ if [[ -f quantum_package.rc \
&& -d ocaml \ && -d ocaml \
&& -d scripts ]] && -d scripts ]]
then then
head -1 README.md | grep "Quantum package" > /dev/null head -1 README.md | grep "IMPORTANT" > /dev/null
if [[ $? -ne 0 ]] if [[ $? -ne 0 ]]
then then
echo "This doesn't look like a quantum_package directory (README.md)" echo "This doesn't look like a quantum_package directory (README.md)"
@ -35,25 +35,11 @@ fi
# Build all sources # Build all sources
for dir in ${QP_ROOT}/{src} ninja
do if [[ $? -ne 0 ]]
pushd $dir then
ninja echo "Error building ${dir}"
if [[ $? -ne 0 ]] fi
then
echo "Error building ${dir}"
fi
popd
done
for dir in ${QP_ROOT}/{ocaml}
do
make -C ${dir}
if [[ $? -ne 0 ]]
then
echo "Error building ${dir}"
fi
done
# Copy the files in the static directory # Copy the files in the static directory
@ -79,10 +65,10 @@ echo "Creating root of static directory"
# --------------------------------- # ---------------------------------
rm -rf -- ${QPACKAGE_STATIC} rm -rf -- ${QPACKAGE_STATIC}
mkdir -p -- ${QPACKAGE_STATIC}/{bin,lib,extra_lib,data} mkdir -p -- ${QPACKAGE_STATIC}/{bin,lib,extra_lib,data,install}
if [[ $? -ne 0 ]] ; if [[ $? -ne 0 ]] ;
then then
echo "Error creating ${QPACKAGE_STATIC}/{bin,lib,extra_lib,data}" echo "Error creating ${QPACKAGE_STATIC}/{bin,lib,extra_lib,data,install}"
exit 1 exit 1
fi fi
@ -108,7 +94,7 @@ fi
cp -- ${FORTRAN_EXEC} ${OCAML_EXEC} ${QPACKAGE_STATIC}/bin cp -- ${FORTRAN_EXEC} ${OCAML_EXEC} ${QPACKAGE_STATIC}/bin
if [[ $? -ne 0 ]] ; if [[ $? -ne 0 ]] ;
then then
echo 'cp -- ${FORTRAN_EXEC} ${OCAML_EXEC} ${QPACKAGE_STATIC}/bin' cp -- ${FORTRAN_EXEC} ${OCAML_EXEC} ${QPACKAGE_STATIC}/bin
exit 1 exit 1
fi fi
@ -154,10 +140,10 @@ cp -- ${QPACKAGE_STATIC}/extra_lib/lib{[gi]omp*,mkl*,lapack*,blas*,z*} ${QPACKAG
echo "Copying EMSL_Basis directory" echo "Copying EMSL_Basis directory"
# ---------------------------- # ----------------------------
cp -r -- ${QP_ROOT}/EMSL_Basis ${QPACKAGE_STATIC}/ cp -r -- ${QP_ROOT}/install/emsl ${QPACKAGE_STATIC}/install
if [[ $? -ne 0 ]] ; if [[ $? -ne 0 ]] ;
then then
echo 'cp -r -- ${QP_ROOT}/EMSL_Basis ${QPACKAGE_STATIC}/' echo 'cp -r -- ${QP_ROOT}/install/emsl ${QPACKAGE_STATIC}/'
exit 1 exit 1
fi fi
@ -180,12 +166,27 @@ echo "Creating quantum_package.rc"
cat << EOF > ${QPACKAGE_STATIC}/quantum_package.rc cat << EOF > ${QPACKAGE_STATIC}/quantum_package.rc
export QP_ROOT=\$( cd \$(dirname "\${BASH_SOURCE}") ; pwd -P ) export QP_ROOT=\$( cd \$(dirname "\${BASH_SOURCE}") ; pwd -P )
export LD_LIBRARY_PATH="\${QP_ROOT}"/lib:\${LD_LIBRARY_PATH}
export LIBRARY_PATH="\${QP_ROOT}"/lib:\${LIBRARY_PATH} export QP_EZFIO=\${QP_ROOT}/install/EZFIO
export PYTHONPATH="\${QP_ROOT}"/scripts:\${PYTHONPATH} export QP_PYTHON=\${QP_ROOT}/scripts:\${QP_ROOT}/scripts/ezfio_interface:\${QP_ROOT}/scripts/utility:\${QP_ROOT}/scripts/module:\${QP_ROOT}/scripts/pseudo:\${QP_ROOT}/scripts/compilation:\${QP_ROOT}/install/bats:\${QP_ROOT}/install/Downloads:\${QP_ROOT}/install/eigen:\${QP_ROOT}/install/p_graphviz:\${QP_ROOT}/install/gmp:\${QP_ROOT}/install/resultsFile:\${QP_ROOT}/install/_build:\${QP_ROOT}/install/emsl:\${QP_ROOT}/install/scripts:\${QP_ROOT}/install/docopt:\${QP_ROOT}/install/irpf90:\${QP_ROOT}/install/zlib:\${QP_ROOT}/install/EZFIO
export PATH="\${QP_ROOT}"/scripts:\${PATH}
export PATH="\${QP_ROOT}"/bin:\${PATH} export IRPF90=\${QP_ROOT}/bin/irpf90
export PATH="\${QP_ROOT}"/ocaml:\${PATH} export NINJA=\${QP_ROOT}/bin/ninja
function qp_prepend_export () {
eval "value_1="\\\${\$1}""
if [[ -z \$value_1 ]] ; then
echo "\${2}:"
else
echo "\${2}:\${value_1}"
fi
}
export PYTHONPATH=\$(qp_prepend_export "PYTHONPATH" "\${QP_EZFIO}/Python":"\${QP_PYTHON}")
export PATH=\$(qp_prepend_export "PATH" "\${QP_PYTHON}":"\${QP_ROOT}"/bin:"\${QP_ROOT}"/ocaml)
export LD_LIBRARY_PATH=\$(qp_prepend_export "LD_LIBRARY_PATH" "\${QP_ROOT}"/lib:"\${QP_ROOT}"/extra_lib:"\${QP_ROOT}"/lib64)
export LIBRARY_PATH=\$(qp_prepend_export "LIBRARY_PATH" "\${QP_ROOT}"/lib:"\${QP_ROOT}"/extra_lib:"\${QP_ROOT}"/lib64)
export C_INCLUDE_PATH=\$(qp_prepend_export "C_INCLUDE_PATH" "\${QP_ROOT}"/include)
# export QP_NIC=ib0
EOF EOF
#exit 0 #exit 0

View File

@ -309,7 +309,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_states_diag on ZMQ server' stop 'Unable to put N_states_diag on ZMQ server'
endif endif
if (zmq_put_psi_bilinear(zmq_to_qp_run_socket,1) == -1) then if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
stop 'Unable to put psi on ZMQ server' stop 'Unable to put psi on ZMQ server'
endif endif
energy = 0.d0 energy = 0.d0
@ -323,7 +323,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
integer :: istep, imin, imax, ishift, ipos integer :: istep, imin, imax, ishift, ipos
integer, external :: add_task_to_taskserver integer, external :: add_task_to_taskserver
integer, parameter :: tasksize=10000 integer, parameter :: tasksize=40000
character*(100000) :: task character*(100000) :: task
istep=1 istep=1
ishift=0 ishift=0
@ -331,7 +331,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
ipos=1 ipos=1
do imin=1,N_det,10000 do imin=1,N_det,tasksize
imax = min(N_det,imin-1+tasksize) imax = min(N_det,imin-1+tasksize)
do ishift=0,istep-1 do ishift=0,istep-1
write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|' write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|'
@ -352,12 +352,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
ipos=1 ipos=1
endif endif
integer, external :: zmq_set_running
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Failed in zmq_set_running'
endif
allocate(u_t(N_st,N_det)) allocate(u_t(N_st,N_det))
do k=1,N_st do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
@ -378,7 +372,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
integer*8 :: rc8 integer*8 :: rc8
double precision :: energy(N_st) double precision :: energy(N_st)
integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag, zmq_put_psi_bilinear integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
integer, external :: zmq_put_dmatrix integer, external :: zmq_put_dmatrix
if (size(u_t) < 8388608) then if (size(u_t) < 8388608) then
@ -396,6 +390,10 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
deallocate(u_t) deallocate(u_t)
integer, external :: zmq_set_running
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Failed in zmq_set_running'
endif
v_0 = 0.d0 v_0 = 0.d0
s_0 = 0.d0 s_0 = 0.d0
@ -411,11 +409,22 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
!$OMP END PARALLEL !$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'davidson') call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'davidson')
!$OMP PARALLEL
!$OMP SINGLE
do k=1,N_st do k=1,N_st
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
!$OMP END TASK
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
!$OMP END TASK
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
!$OMP END TASK
enddo enddo
!$OMP END SINGLE
!$OMP TASKWAIT
!$OMP END PARALLEL
end end

View File

@ -491,6 +491,7 @@ subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
deallocate(u_1) deallocate(u_1)
endif endif
double precision :: norm double precision :: norm
!$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED)
do i=1,N_st do i=1,N_st
norm = u_dot_u(u_0(1,i),n) norm = u_dot_u(u_0(1,i),n)
if (norm /= 0.d0) then if (norm /= 0.d0) then
@ -499,6 +500,7 @@ subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
e_0(i) = 0.d0 e_0(i) = 0.d0
endif endif
enddo enddo
!$OMP END PARALLEL DO
deallocate (s_0, v_0) deallocate (s_0, v_0)
end end

View File

@ -338,6 +338,7 @@ end subroutine
call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, & call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, &
psi_det_sorted_bit, psi_coef_sorted_bit, N_states) psi_det_sorted_bit, psi_coef_sorted_bit, N_states)
END_PROVIDER END_PROVIDER
subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out, N_st) subroutine sort_dets_by_det_search_key(Ndet, det_in, coef_in, det_out, coef_out, N_st)

View File

@ -36,6 +36,7 @@ BEGIN_PROVIDER [ integer(bit_kind), psi_det_alpha, (N_int,psi_det_size) ]
psi_det_alpha(k,i) = psi_det(k,1,i) psi_det_alpha(k,i) = psi_det(k,1,i)
enddo enddo
enddo enddo
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ integer(bit_kind), psi_det_beta, (N_int,psi_det_size) ] BEGIN_PROVIDER [ integer(bit_kind), psi_det_beta, (N_int,psi_det_size) ]
@ -71,10 +72,13 @@ BEGIN_TEMPLATE
allocate ( iorder(N_det), bit_tmp(N_det), duplicate(N_det) ) allocate ( iorder(N_det), bit_tmp(N_det), duplicate(N_det) )
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
do i=1,N_det do i=1,N_det
iorder(i) = i iorder(i) = i
bit_tmp(i) = spin_det_search_key(psi_det_$alpha(1,i),N_int) bit_tmp(i) = spin_det_search_key(psi_det_$alpha(1,i),N_int)
enddo enddo
!$OMP END PARALLEL DO
call i8sort(bit_tmp,iorder,N_det) call i8sort(bit_tmp,iorder,N_det)
@ -126,6 +130,7 @@ BEGIN_TEMPLATE
N_det_$alpha_unique = j N_det_$alpha_unique = j
deallocate (iorder, bit_tmp, duplicate) deallocate (iorder, bit_tmp, duplicate)
END_PROVIDER END_PROVIDER
SUBST [ alpha ] SUBST [ alpha ]
@ -430,11 +435,19 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states)
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
call i8sort(to_sort, psi_bilinear_matrix_order, N_det) call i8sort(to_sort, psi_bilinear_matrix_order, N_det)
!$OMP PARALLEL
!$OMP SINGLE
call iset_order(psi_bilinear_matrix_rows,psi_bilinear_matrix_order,N_det) call iset_order(psi_bilinear_matrix_rows,psi_bilinear_matrix_order,N_det)
!$OMP END SINGLE
!$OMP SINGLE
call iset_order(psi_bilinear_matrix_columns,psi_bilinear_matrix_order,N_det) call iset_order(psi_bilinear_matrix_columns,psi_bilinear_matrix_order,N_det)
!$OMP END SINGLE
!$OMP DO
do l=1,N_states do l=1,N_states
call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det) call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det)
enddo enddo
!$OMP END DO
!$OMP END PARALLEL
deallocate(to_sort) deallocate(to_sort)
ASSERT (minval(psi_bilinear_matrix_rows) == 1) ASSERT (minval(psi_bilinear_matrix_rows) == 1)
ASSERT (minval(psi_bilinear_matrix_columns) == 1) ASSERT (minval(psi_bilinear_matrix_columns) == 1)
@ -442,6 +455,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states)
ASSERT (maxval(psi_bilinear_matrix_rows) == N_det_alpha_unique) ASSERT (maxval(psi_bilinear_matrix_rows) == N_det_alpha_unique)
ASSERT (maxval(psi_bilinear_matrix_columns) == N_det_beta_unique) ASSERT (maxval(psi_bilinear_matrix_columns) == N_det_beta_unique)
ASSERT (maxval(psi_bilinear_matrix_order) == N_det) ASSERT (maxval(psi_bilinear_matrix_order) == N_det)
END_PROVIDER END_PROVIDER
@ -477,6 +491,7 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1)
l = psi_bilinear_matrix_columns(1) l = psi_bilinear_matrix_columns(1)
psi_bilinear_matrix_columns_loc(l) = 1 psi_bilinear_matrix_columns_loc(l) = 1
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k,l)
do k=2,N_det do k=2,N_det
if (psi_bilinear_matrix_columns(k) == psi_bilinear_matrix_columns(k-1)) then if (psi_bilinear_matrix_columns(k) == psi_bilinear_matrix_columns(k-1)) then
cycle cycle
@ -488,9 +503,11 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1)
stop '(psi_bilinear_matrix_columns(k) < 1)' stop '(psi_bilinear_matrix_columns(k) < 1)'
endif endif
enddo enddo
!$OMP END PARALLEL DO
psi_bilinear_matrix_columns_loc(N_det_beta_unique+1) = N_det+1 psi_bilinear_matrix_columns_loc(N_det_beta_unique+1) = N_det+1
ASSERT (minval(psi_bilinear_matrix_columns_loc) == 1) ASSERT (minval(psi_bilinear_matrix_columns_loc) == 1)
ASSERT (maxval(psi_bilinear_matrix_columns_loc) == N_det+1) ASSERT (maxval(psi_bilinear_matrix_columns_loc) == N_det+1)
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ] BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ]
@ -508,7 +525,6 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_
END_DOC END_DOC
integer :: i,j,k,l integer :: i,j,k,l
PROVIDE psi_coef_sorted_bit PROVIDE psi_coef_sorted_bit
integer*8, allocatable :: to_sort(:) integer*8, allocatable :: to_sort(:)
@ -542,9 +558,11 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_
call i8radix_sort(to_sort, psi_bilinear_matrix_transp_order, N_det,-1) call i8radix_sort(to_sort, psi_bilinear_matrix_transp_order, N_det,-1)
call iset_order(psi_bilinear_matrix_transp_rows,psi_bilinear_matrix_transp_order,N_det) call iset_order(psi_bilinear_matrix_transp_rows,psi_bilinear_matrix_transp_order,N_det)
call iset_order(psi_bilinear_matrix_transp_columns,psi_bilinear_matrix_transp_order,N_det) call iset_order(psi_bilinear_matrix_transp_columns,psi_bilinear_matrix_transp_order,N_det)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(l)
do l=1,N_states do l=1,N_states
call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det) call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det)
enddo enddo
!$OMP END PARALLEL DO
deallocate(to_sort) deallocate(to_sort)
ASSERT (minval(psi_bilinear_matrix_transp_columns) == 1) ASSERT (minval(psi_bilinear_matrix_transp_columns) == 1)
ASSERT (minval(psi_bilinear_matrix_transp_rows) == 1) ASSERT (minval(psi_bilinear_matrix_transp_rows) == 1)
@ -552,6 +570,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_
ASSERT (maxval(psi_bilinear_matrix_transp_columns) == N_det_beta_unique) ASSERT (maxval(psi_bilinear_matrix_transp_columns) == N_det_beta_unique)
ASSERT (maxval(psi_bilinear_matrix_transp_rows) == N_det_alpha_unique) ASSERT (maxval(psi_bilinear_matrix_transp_rows) == N_det_alpha_unique)
ASSERT (maxval(psi_bilinear_matrix_transp_order) == N_det) ASSERT (maxval(psi_bilinear_matrix_transp_order) == N_det)
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_unique+1) ] BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_unique+1) ]
@ -564,6 +583,7 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_uniq
l = psi_bilinear_matrix_transp_rows(1) l = psi_bilinear_matrix_transp_rows(1)
psi_bilinear_matrix_transp_rows_loc(l) = 1 psi_bilinear_matrix_transp_rows_loc(l) = 1
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k,l)
do k=2,N_det do k=2,N_det
if (psi_bilinear_matrix_transp_rows(k) == psi_bilinear_matrix_transp_rows(k-1)) then if (psi_bilinear_matrix_transp_rows(k) == psi_bilinear_matrix_transp_rows(k-1)) then
cycle cycle
@ -572,9 +592,11 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_uniq
psi_bilinear_matrix_transp_rows_loc(l) = k psi_bilinear_matrix_transp_rows_loc(l) = k
endif endif
enddo enddo
!$OMP END PARALLEL DO
psi_bilinear_matrix_transp_rows_loc(N_det_alpha_unique+1) = N_det+1 psi_bilinear_matrix_transp_rows_loc(N_det_alpha_unique+1) = N_det+1
ASSERT (minval(psi_bilinear_matrix_transp_rows_loc) == 1) ASSERT (minval(psi_bilinear_matrix_transp_rows_loc) == 1)
ASSERT (maxval(psi_bilinear_matrix_transp_rows_loc) == N_det+1) ASSERT (maxval(psi_bilinear_matrix_transp_rows_loc) == N_det+1)
END_PROVIDER END_PROVIDER
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ] BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ]
@ -584,7 +606,6 @@ BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ]
! Order which allows to go from psi_bilinear_matrix_order_transp to psi_bilinear_matrix ! Order which allows to go from psi_bilinear_matrix_order_transp to psi_bilinear_matrix
END_DOC END_DOC
integer :: k integer :: k
psi_bilinear_matrix_order_transp_reverse = -1 psi_bilinear_matrix_order_transp_reverse = -1
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k) !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k)
do k=1,N_det do k=1,N_det

View File

@ -61,6 +61,20 @@ program bench_maps
cpu = (cpu1 - cpu0)/count_rate cpu = (cpu1 - cpu0)/count_rate
print *, 'loop ijkl : ', cpu/dble(ii) print *, 'loop ijkl : ', cpu/dble(ii)
call system_clock(cpu0, count_rate, count_max)
do ii=1,100000_8
call random_number(r)
i = int(r*mo_tot_num)+1
call random_number(r)
j = int(r*mo_tot_num)+1
call random_number(r)
k = int(r*mo_tot_num)+1
call random_number(r)
l = int(r*mo_tot_num)+1
call get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
enddo
call system_clock(cpu1, count_rate, count_max)
ii=0 ii=0
call system_clock(cpu0, count_rate, count_max) call system_clock(cpu0, count_rate, count_max)
do jj=1,10 do jj=1,10
@ -79,6 +93,20 @@ program bench_maps
cpu = (cpu1 - cpu0)/count_rate cpu = (cpu1 - cpu0)/count_rate
print *, 'loop ikjl : ', cpu/dble(ii) print *, 'loop ikjl : ', cpu/dble(ii)
call system_clock(cpu0, count_rate, count_max)
do ii=1,100000_8
call random_number(r)
i = int(r*mo_tot_num)+1
call random_number(r)
j = int(r*mo_tot_num)+1
call random_number(r)
k = int(r*mo_tot_num)+1
call random_number(r)
l = int(r*mo_tot_num)+1
call get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
enddo
call system_clock(cpu1, count_rate, count_max)
ii=0 ii=0
call system_clock(cpu0, count_rate, count_max) call system_clock(cpu0, count_rate, count_max)
do jj=1,10 do jj=1,10
@ -97,6 +125,20 @@ program bench_maps
cpu = (cpu1 - cpu0)/count_rate cpu = (cpu1 - cpu0)/count_rate
print *, 'loop ijlk : ', cpu/dble(ii) print *, 'loop ijlk : ', cpu/dble(ii)
call system_clock(cpu0, count_rate, count_max)
do ii=1,100000_8
call random_number(r)
i = int(r*mo_tot_num)+1
call random_number(r)
j = int(r*mo_tot_num)+1
call random_number(r)
k = int(r*mo_tot_num)+1
call random_number(r)
l = int(r*mo_tot_num)+1
call get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
enddo
call system_clock(cpu1, count_rate, count_max)
ii=0 ii=0
call system_clock(cpu0, count_rate, count_max) call system_clock(cpu0, count_rate, count_max)
do jj=1,10 do jj=1,10

View File

@ -37,7 +37,11 @@ BEGIN_TEMPLATE
integer,intent(in) :: isize integer,intent(in) :: isize
$type,intent(inout) :: x(isize) $type,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize) integer,intent(inout) :: iorder(isize)
!$OMP PARALLEL
!$OMP SINGLE
call rec_$X_quicksort(x,iorder,isize,1,isize) call rec_$X_quicksort(x,iorder,isize,1,isize)
!$OMP END SINGLE
!$OMP END PARALLEL
end end
recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last) recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last)
@ -70,11 +74,16 @@ BEGIN_TEMPLATE
j=j-1 j=j-1
enddo enddo
if (first < i-1) then if (first < i-1) then
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(isize,first,i) if (i-first > 100000)
call rec_$X_quicksort(x, iorder, isize, first, i-1) call rec_$X_quicksort(x, iorder, isize, first, i-1)
!$OMP END TASK
endif endif
if (j+1 < last) then if (j+1 < last) then
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(isize,last,j) if (last-j > 100000)
call rec_$X_quicksort(x, iorder, isize, j+1, last) call rec_$X_quicksort(x, iorder, isize, j+1, last)
!$OMP END TASK
endif endif
!$OMP TASKWAIT
end end
subroutine heap_$Xsort(x,iorder,isize) subroutine heap_$Xsort(x,iorder,isize)
@ -281,7 +290,8 @@ BEGIN_TEMPLATE
$type,intent(inout) :: x(isize) $type,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize) integer,intent(inout) :: iorder(isize)
integer :: n integer :: n
call $Xradix_sort(x,iorder,isize,-1) ! call $Xradix_sort(x,iorder,isize,-1)
call quick_$Xsort(x,iorder,isize)
end subroutine $Xsort end subroutine $Xsort
SUBST [ X, type ] SUBST [ X, type ]

View File

@ -246,7 +246,7 @@ IRP_ENDIF
! stop 'Unable to set ZMQ_RCVBUF on pull socket' ! stop 'Unable to set ZMQ_RCVBUF on pull socket'
! endif ! endif
rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,2,4) rc = f77_zmq_setsockopt(new_zmq_pull_socket,ZMQ_RCVHWM,10,4)
if (rc /= 0) then if (rc /= 0) then
stop 'Unable to set ZMQ_RCVHWM on pull socket' stop 'Unable to set ZMQ_RCVHWM on pull socket'
endif endif
@ -323,7 +323,7 @@ IRP_ENDIF
stop 'Unable to set ZMQ_LINGER on push socket' stop 'Unable to set ZMQ_LINGER on push socket'
endif endif
rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,2,4) rc = f77_zmq_setsockopt(new_zmq_push_socket,ZMQ_SNDHWM,10,4)
if (rc /= 0) then if (rc /= 0) then
stop 'Unable to set ZMQ_SNDHWM on push socket' stop 'Unable to set ZMQ_SNDHWM on push socket'
endif endif
@ -783,21 +783,31 @@ integer function zmq_abort(zmq_to_qp_run_socket)
! Aborts a running parallel computation ! Aborts a running parallel computation
END_DOC END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer :: rc, sze integer :: rc, sze, i
integer, parameter :: count_max=60
character*(512) :: message character*(512) :: message
zmq_abort = 0 zmq_abort = 0
write(message,*) 'abort ' write(message,*) 'abort '
sze = len(trim(message)) sze = len(trim(message))
rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0) do i=1,count_max
rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)
if (rc == sze) exit
call sleep(1)
enddo
if (rc /= sze) then if (rc /= sze) then
print *, 'zmq_abort: rc /= sze', rc, sze print *, 'zmq_abort: rc /= sze', rc, sze
zmq_abort = -1 zmq_abort = -1
return return
endif endif
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0) do i=1,count_max
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
if (trim(message(1:rc)) == 'ok') exit
call sleep(1)
enddo
if (trim(message(1:rc)) /= 'ok') then if (trim(message(1:rc)) /= 'ok') then
print *, 'zmq_abort: ', rc, ':', trim(message(1:rc)) print *, 'zmq_abort: ', rc, ':', trim(message(1:rc))
zmq_abort = -1 zmq_abort = -1