10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-25 05:43:47 +01:00

sparse vectors

This commit is contained in:
Yann Garniron 2018-04-29 17:09:46 +02:00
parent 274bb043c2
commit 12e527157c
3 changed files with 141 additions and 148 deletions

View File

@ -32,7 +32,7 @@ OPENMP : 1 ; Append OpenMP flags
# #
[OPT] [OPT]
FC : -traceback FC : -traceback
FCFLAGS : -xAVX -O2 -ip -ftz -g FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g
# Profiling flags # Profiling flags
################# #################

View File

@ -66,7 +66,7 @@ subroutine ZMQ_dress(E, dress, delta, delta_s2, relative_error)
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer :: ipos, sz integer :: ipos, sz
integer :: block(50), block_i, cur_tooth_reduce, ntas integer :: block(8), block_i, cur_tooth_reduce, ntas
logical :: flushme logical :: flushme
block = 0 block = 0
block_i = 0 block_i = 0
@ -167,8 +167,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
implicit none implicit none
integer, parameter :: delta_loc_N = 1
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
integer, intent(in) :: istate integer, intent(in) :: istate
@ -178,7 +176,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
double precision, intent(out) :: delta(N_states, N_det) double precision, intent(out) :: delta(N_states, N_det)
double precision, intent(out) :: delta_s2(N_states, N_det) double precision, intent(out) :: delta_s2(N_states, N_det)
double precision, allocatable :: delta_loc(:,:,:,:), delta_det(:,:,:,:) double precision, allocatable :: delta_loc(:,:,:), delta_det(:,:,:,:)
double precision, allocatable :: dress_detail(:,:) double precision, allocatable :: dress_detail(:,:)
double precision :: dress_mwen(N_states) double precision :: dress_mwen(N_states)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
@ -188,7 +186,7 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
integer :: more integer :: more
integer :: i, j, k, i_state, N integer :: i, j, k, i_state, N
integer :: task_id, ind, inds(delta_loc_N) integer :: task_id, ind
double precision, save :: time0 = -1.d0 double precision, save :: time0 = -1.d0
double precision :: time, timeLast, old_tooth double precision :: time, timeLast, old_tooth
double precision, external :: omp_get_wtime double precision, external :: omp_get_wtime
@ -197,20 +195,20 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
logical, allocatable :: actually_computed(:) logical, allocatable :: actually_computed(:)
integer :: total_computed integer :: total_computed
integer :: delta_loc_cur, is, N_buf(3) integer :: delta_loc_cur, is, N_buf(3)
double precision :: fac(delta_loc_N) , wei(delta_loc_N) double precision :: fac , wei
logical :: ok logical :: ok
integer, allocatable :: int_buf(:) integer, allocatable :: int_buf(:)
double precision, allocatable :: double_buf(:) double precision, allocatable :: double_buf(:)
integer(bit_kind), allocatable :: det_buf(:,:,:) integer(bit_kind), allocatable :: det_buf(:,:,:)
allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer),det_buf(N_int,2,N_dress_det_buffer)) allocate(int_buf(N_dress_int_buffer), double_buf(N_dress_double_buffer), det_buf(N_int,2,N_dress_det_buffer))
delta_loc_cur = 1 delta_loc_cur = 1
delta = 0d0 delta = 0d0
delta_s2 = 0d0 delta_s2 = 0d0
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2)) allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det)) allocate(cp(N_states, N_det, N_cp, 2), dress_detail(N_states, N_det))
allocate(delta_loc(N_states, N_det, 2, delta_loc_N)) allocate(delta_loc(N_states, N_det, 2))
dress_detail = 0d0 dress_detail = 0d0
delta_det = 0d0 delta_det = 0d0
cp = 0d0 cp = 0d0
@ -219,7 +217,6 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators)) allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators))
dress_mwen =0.d0
parts_to_get(:) = 1 parts_to_get(:) = 1
if(fragment_first > 0) then if(fragment_first > 0) then
@ -239,16 +236,14 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
cur_cp = 0 cur_cp = 0
old_cur_cp = 0 old_cur_cp = 0
logical :: loop, last logical :: loop, last
integer :: felem(0:delta_loc_N), felem_loc integer, allocatable :: sparse(:)
allocate(sparse(0:N_det))
loop = .true. loop = .true.
felem = N_det+1
pullLoop : do while (loop) pullLoop : do while (loop)
call pull_dress_results(zmq_socket_pull, ind, last, delta_loc(1,1,1,delta_loc_cur), int_buf, double_buf, det_buf, N_buf, task_id, felem_loc) call pull_dress_results(zmq_socket_pull, ind, last, delta_loc(1,1,1), int_buf, double_buf, det_buf, N_buf, task_id, sparse, dress_mwen)
call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf) call dress_pulled(ind, int_buf, double_buf, det_buf, N_buf)
!print *, "felem", felem_loc, felem
felem(delta_loc_cur) = felem_loc
felem(0) = min(felem_loc, felem(0))
dress_mwen(:) = 0d0
integer, external :: zmq_delete_tasks integer, external :: zmq_delete_tasks
@ -259,94 +254,67 @@ subroutine dress_collector(zmq_socket_pull, E, relative_error, delta, delta_s2,
if(more == 0) loop = .false. if(more == 0) loop = .false.
end if end if
!dress_mwen = 0d0
do i_state=1,N_states
do i=felem_loc, N_det !do i_state=1,N_states
dress_mwen(i_state) += delta_loc(i_state, i, 1, delta_loc_cur) * psi_coef(i, i_state) ! do i=1,sparse(0)
end do ! dress_mwen(i_state) += delta_loc(i_state, i, 1) * psi_coef(sparse(i), i_state)
end do ! end do
!end do
dress_detail(:, ind) += dress_mwen(:) dress_detail(:, ind) += dress_mwen(:)
wei(delta_loc_cur) = dress_weight_inv(ind) wei = dress_weight_inv(ind)
inds(delta_loc_cur) = ind
if(delta_loc_cur == delta_loc_N .or. .not. loop) then do j=1,N_cp !! optimizable
do j=1,N_cp !! optimizable fac = 0d0
fac = 0d0 ok = .false.
ok = .false. !fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step
do i=1,delta_loc_cur fac = cps(ind, j) * wei * comb_step
!fac(i) = cps(inds(i), j) / cps_N(j) * wei(i) * comb_step
fac(i) = cps(inds(i), j) * wei(i) * comb_step if(fac /= 0) then
if(fac(i) /= 0d0) then do i=1,sparse(0)
ok = .true. do is=1,N_states
end if cp(is,sparse(i),j,1) += delta_loc(is,i,1) * fac
end do
end do end do
if(ok .and. .false.) then
do i=felem(0),N_det_generators
do is=1,N_states
cp(is,i,j,1) += delta_loc(is,i,1,1) * fac(1)! &
!+ delta_loc(is,i,1,2) * fac(2) &
!+ delta_loc(is,i,1,3) * fac(3) &
!+ delta_loc(is,i,1,4) * fac(4) &
!+ delta_loc(is,i,1,5) * fac(5) &
!+ delta_loc(is,i,1,6) * fac(6) &
!+ delta_loc(is,i,1,7) * fac(7) &
!+ delta_loc(is,i,1,8) * fac(8)
end do
end do
do i=felem(0),N_det_generators do i=1,sparse(0)
do is=1,N_states do is=1,N_states
cp(is,i,j,2) += delta_loc(is,i,2,1) * fac(1)! & cp(is,sparse(i),j,2) += delta_loc(is,i,2) * fac
!+ delta_loc(is,i,2,2) * fac(2) & end do
!+ delta_loc(is,i,2,3) * fac(3) & end do
!+ delta_loc(is,i,2,4) * fac(4) & end if
!+ delta_loc(is,i,2,5) * fac(5) & end do
!+ delta_loc(is,i,2,6) * fac(6) &
!+ delta_loc(is,i,2,7) * fac(7) &
!+ delta_loc(is,i,2,8) * fac(8)
end do
end do
end if
end do
do i=1,delta_loc_cur ! do i=1,delta_loc_cur
logical :: fracted logical :: fracted
integer :: toothMwen integer :: toothMwen
ind = inds(i)
toothMwen = tooth_of_det(ind) toothMwen = tooth_of_det(ind)
fracted = (toothMwen /= 0) fracted = (toothMwen /= 0)
if(fracted) fracted = (ind == first_det_of_teeth(toothMwen)) if(fracted) fracted = (ind == first_det_of_teeth(toothMwen))
if(fracted .and. .false.) then if(fracted .and. .false.) then
delta_det(1:N_states,felem(i):N_det,toothMwen-1, 1) = delta_det(1:N_states,felem(i):N_det,toothMwen-1, 1) + delta_loc(1:N_states,felem(i):N_det,1,i) * (1d0-fractage(toothMwen)) do i=1,sparse(0)
delta_det(1:N_states,felem(i):N_det,toothMwen-1, 2) = delta_det(1:N_states,felem(i):N_det,toothMwen-1, 2) + delta_loc(1:N_states,felem(i):N_det,2,i) * (1d0-fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen-1, 1) += delta_loc(1:N_states,i,1) * (1d0-fractage(toothMwen))
delta_det(1:N_states,felem(i):N_det,toothMwen , 1) = delta_det(1:N_states,felem(i):N_det,toothMwen , 1) + delta_loc(1:N_states,felem(i):N_det,1,i) * (fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen-1, 2) += delta_loc(1:N_states,i,2) * (1d0-fractage(toothMwen))
delta_det(1:N_states,felem(i):N_det,toothMwen , 2) = delta_det(1:N_states,felem(i):N_det,toothMwen , 2) + delta_loc(1:N_states,felem(i):N_det,2,i) * (fractage(toothMwen)) delta_det(1:N_states,sparse(i),toothMwen , 1) += delta_loc(1:N_states,i,1) * (fractage(toothMwen))
else if(.false.) then delta_det(1:N_states,sparse(i),toothMwen , 2) += delta_loc(1:N_states,i,2) * (fractage(toothMwen))
delta_det(1:N_states,felem(i):N_det,toothMwen , 1) = delta_det(1:N_states,felem(i):N_det,toothMwen , 1) + delta_loc(1:N_states,felem(i):N_det,1,i)
delta_det(1:N_states,felem(i):N_det,toothMwen , 2) = delta_det(1:N_states,felem(i):N_det,toothMwen , 2) + delta_loc(1:N_states,felem(i):N_det,2,i)
end if
parts_to_get(ind) -= 1
if(parts_to_get(ind) == 0) then
actually_computed(ind) = .true.
total_computed += 1
end if
end do end do
felem = N_det+1 else if(.false.) then
delta_loc_cur = 1 do i=1,sparse(0)
else delta_det(1:N_states,sparse(i),toothMwen , 1) = delta_loc(1:N_states,i,1)
delta_loc_cur += 1 delta_det(1:N_states,sparse(i),toothMwen , 2) = delta_loc(1:N_states,i,2)
cycle end do
end if end if
parts_to_get(ind) -= 1
if(parts_to_get(ind) == 0) then
actually_computed(ind) = .true.
total_computed += 1
end if
!end do
time = omp_get_wtime() time = omp_get_wtime()
@ -492,6 +460,10 @@ END_PROVIDER
integer :: nfiller, lfiller, cfiller integer :: nfiller, lfiller, cfiller
logical :: fracted logical :: fracted
integer :: first_suspect
first_suspect = 1
allocate(filler(n_det_generators)) allocate(filler(n_det_generators))
allocate(iorder(N_det_generators), first_cp(N_cps_max+1)) allocate(iorder(N_det_generators), first_cp(N_cps_max+1))
allocate(computed(N_det_generators)) allocate(computed(N_det_generators))
@ -541,10 +513,10 @@ END_PROVIDER
if (N_dress_jobs == N_det_generators) exit if (N_dress_jobs == N_det_generators) exit
end if end if
do l=1,N_det_generators do l=first_suspect,N_det_generators
if((.not. computed(l)) .and. (.not. comp_filler(l))) exit if((.not. computed(l)) .and. (.not. comp_filler(l))) exit
end do end do
first_suspect = l
if(l > N_det_generators) exit if(l > N_det_generators) exit
cfiller = tooth_of_det(l) cfiller = tooth_of_det(l)

View File

@ -107,7 +107,7 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
implicit none implicit none
integer(ZMQ_PTR), intent(in) :: zmq_socket_push integer(ZMQ_PTR), intent(in) :: zmq_socket_push
double precision, intent(in) :: delta_loc(N_states, N_det, 2) double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
double precision, intent(in) :: double_buf(*) double precision, intent(in) :: double_buf(*)
logical, intent(in) :: last logical, intent(in) :: last
integer, intent(in) :: int_buf(*) integer, intent(in) :: int_buf(*)
@ -115,33 +115,14 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
integer, intent(in) :: N_bufi(3) integer, intent(in) :: N_bufi(3)
integer :: N_buf(3) integer :: N_buf(3)
integer, intent(in) :: ind, task_id integer, intent(in) :: ind, task_id
integer :: rc, i, j, felem integer :: rc, i, j
double precision :: vast_emptiness(N_states) double precision :: tmp(N_states,2)
integer :: fillness integer, allocatable :: sparse(:)
integer :: sparsei
double precision :: contrib(N_states)
vast_emptiness = 0d0 contrib = 0d0
felem = 1 allocate(sparse(N_det))
dloop : do i=1, N_det
do j=1,N_states
if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then
felem = i
exit dloop
end if
end do
end do dloop
if(last) then
fillness = 0
do i=felem,N_det
do j=1,N_states
if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then
fillness += 1
end if
end do
end do
!print *, "FILLNESS", float(fillness) / float((N_det-felem+1)*N_states)
end if
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
@ -151,27 +132,57 @@ subroutine push_dress_results(zmq_socket_push, ind, last, delta_loc, int_buf, do
if(rc /= 1) stop "push" if(rc /= 1) stop "push"
if(last) then if(last) then
rc = f77_zmq_send( zmq_socket_push, felem, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,1), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE)
if(rc /= 8*N_states*(N_det+1-felem)) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,felem,2), 8*N_states*(N_det-felem+1), ZMQ_SNDMORE) sparsei = 0
if(rc /= 8*N_states*(N_det+1-felem)) stop "push" do i=1,N_det
do j=1,N_states
if(delta_loc(j,i,1) /= 0d0 .or. delta_loc(j,i,2) /= 0d0) then
sparsei += 1
sparse(sparsei) = i
delta_loc(:,sparsei,:) = delta_loc(:,i,:)
contrib(:) += delta_loc(:,sparsei, 1) * psi_coef(i, :)
end if
end do
end do
rc = f77_zmq_send( zmq_socket_push, sparsei, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(sparsei /= 0) then
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
rc = f77_zmq_send( zmq_socket_push, sparse, 4*sparsei, ZMQ_SNDMORE)
if(rc /= 4*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,1), 8*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 8*N_states*sparsei) stop "push"
rc = f77_zmq_send( zmq_socket_push, delta_loc(1,1,2), 8*N_states*sparsei, ZMQ_SNDMORE)
if(rc /= 8*N_states*sparsei) stop "push"
do i=sparsei,1
tmp(:,:) = delta_loc(:,i,:)
delta_loc(:,i,:) = 0d0
delta_loc(:,sparse(i),:) = tmp(:,:)
end do
end if
else else
rc = f77_zmq_send( zmq_socket_push, N_det, 4, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, 0, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push" if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) !rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push" !if(rc /= 8*N_states) stop "push"
rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE) !rc = f77_zmq_send( zmq_socket_push, vast_emptiness, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push" !if(rc /= 8*N_states) stop "push"
end if end if
N_buf = N_bufi N_buf = N_bufi
N_buf = (/0, 1, 0/)
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE) rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5" if(rc /= 4*3) stop "push5"
@ -209,38 +220,48 @@ IRP_ENDIF
end subroutine end subroutine
subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, felem) subroutine pull_dress_results(zmq_socket_pull, ind, last, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, sparse, contrib)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
logical, intent(out) :: last logical, intent(out) :: last
double precision, intent(inout) :: delta_loc(N_states, N_det, 2) double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
double precision, intent(out) :: double_buf(*) double precision, intent(out) :: double_buf(*), contrib(N_states)
integer, intent(out) :: int_buf(*) integer, intent(out) :: int_buf(*)
integer(bit_kind), intent(out) :: det_buf(N_int, 2, *) integer(bit_kind), intent(out) :: det_buf(N_int, 2, *)
integer, intent(out) :: felem integer, intent(out) :: sparse(0:N_det)
integer, intent(out) :: ind integer, intent(out) :: ind
integer, intent(out) :: task_id integer, intent(out) :: task_id
integer :: rc, i integer :: rc, i, sparsen
integer, intent(out) :: N_buf(3) integer, intent(out) :: N_buf(3)
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
if(rc /= 4) stop "pulla" if(rc /= 4) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0) rc = f77_zmq_recv( zmq_socket_pull, last, 1, 0)
if(rc /= 1) stop "pulla" if(rc /= 1) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, felem, 4, 0) rc = f77_zmq_recv( zmq_socket_pull, sparse(0), 4, 0)
if(rc /= 4) stop "pullb" if(rc /= 4) stop "pullb"
delta_loc(:,:felem,:) = 0d0 if(sparse(0) /= 0) then
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,1), N_states*8*(N_det+1-felem), 0) if(rc /= 8*N_states) stop "pullc"
if(rc /= 8*N_states*(N_det+1-felem)) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,felem,2), N_states*8*(N_det+1-felem), 0)
if(rc /= 8*N_states*(N_det+1-felem)) stop "pulld" rc = f77_zmq_recv( zmq_socket_pull, sparse(1), 4*sparse(0), 0)
if(rc /= 4*sparse(0)) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,1), N_states*8*sparse(0), 0)
if(rc /= 8*N_states*sparse(0)) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, delta_loc(1,1,2), N_states*8*sparse(0), 0)
if(rc /= 8*N_states*sparse(0)) stop "pulld"
else
contrib = 0d0
end if
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0) rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)