10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-07-03 18:05:59 +02:00

Squashed commit of the following:

commit 4b9c435dce0f3b3078d573e66fd32b40fca26497
Merge: 74e559c8 093e3fd0
Author: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date:   Tue Sep 4 16:58:51 2018 +0200

    Merge branch 'thesis' of git://github.com/garniron/quantum_package into garniron-thesis

commit 093e3fd021
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Sep 4 16:13:00 2018 +0200

    removed ungodly hack

commit 8529a0f3f6
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Sep 4 14:57:19 2018 +0200

    reduced prints in pt2_stoch

commit 03b8f353bd
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Sep 4 14:41:46 2018 +0200

    teeth building check for pt2_stoch

commit 0d91b9310a
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Sep 4 14:35:04 2018 +0200

    timestamp of first pull

commit 34d9fa0165
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Sep 4 14:27:10 2018 +0200

    potential numerical precision bug

commit 9a0f900d8c
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Sep 4 14:09:51 2018 +0200

    tests if teeth can be built

commit dda0dc34df
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Mon Sep 3 17:48:04 2018 +0200

    corrected pt2_find_sample

commit a521f0cb82
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Mon Sep 3 16:08:02 2018 +0200

    tasks get by batches of Nproc

commit 997a5a1265
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Mon Sep 3 14:18:04 2018 +0200

    buffered task_id send

commit 99ea7948e0
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Mon Sep 3 12:29:12 2018 +0200

    unbalanced fragmentation

commit abb3b7e08b
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Sun Sep 2 17:18:44 2018 +0200

    overflow of pt2_J

commit 8df49f394b
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Sun Sep 2 15:58:48 2018 +0200

    removed useless computation of intermediate checkpoints

commit 4ba5b79eb3
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Sun Sep 2 15:50:14 2018 +0200

    dressing only sent for chosen checkpoint

commit a4a6a69459
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Sat Sep 1 17:01:56 2018 +0200

    cumulative dot_F

commit 6a7f04cb79
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Sat Sep 1 16:58:07 2018 +0200

    simpler purge

commit 168ca2f2e2
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Fri Aug 31 21:07:01 2018 +0200

    task list optimized

commit de4a0d0caf
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Fri Aug 31 18:57:03 2018 +0200

    removed print

commit fee31d4e3e
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Fri Aug 31 18:56:23 2018 +0200

    dress fragmentation

commit 02893a419d
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Fri Aug 31 15:52:16 2018 +0200

    bug in blocked search - replaced with thesis version

commit bb6e073cf1
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Thu Aug 30 21:24:45 2018 +0200

    ungodly hack to prevent double providing

commit 0609e8c627
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Thu Aug 30 20:52:05 2018 +0200

    debugging

commit a254fdd7cf
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Thu Aug 30 15:24:07 2018 +0200

    parallel bug

commit 2a6c1941d4
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Thu Aug 30 11:43:11 2018 +0200

    corrected when relative_error=0d0

commit bac039bdf1
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Thu Aug 30 10:58:17 2018 +0200

    relative error 1d-5

commit aae9d203ec
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Thu Aug 30 10:07:02 2018 +0200

    potential fragmentation bug

commit ad69f39f99
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Wed Aug 29 20:54:58 2018 +0200

    dress_zmq re-implemented

commit d78f64732a
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Wed Aug 29 11:30:19 2018 +0200

    pt2_stoch re-implemented

commit 4b9b54e19a
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Tue Aug 28 10:24:38 2018 +0200

    removed test for phase_mask_bit

commit 3abccca5e3
Author: Yann Garniron <yann.garniron@yahoo.fr>
Date:   Fri Aug 3 23:44:05 2018 +0200

    phasemask_bit
This commit is contained in:
Anthony Scemama 2018-09-04 17:31:45 +02:00
parent 74e559c85a
commit 873035e016
10 changed files with 1115 additions and 1348 deletions

View File

@ -1,8 +1,73 @@
BEGIN_PROVIDER [ integer, fragment_first ]
implicit none
fragment_first = first_det_of_teeth(1)
BEGIN_PROVIDER [ integer, pt2_stoch_istate ]
implicit none
BEGIN_DOC
! State for stochatsic PT2
END_DOC
pt2_stoch_istate = 1
END_PROVIDER
BEGIN_PROVIDER [ integer, pt2_N_teeth ]
&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ]
&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ]
&BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ]
implicit none
logical, external :: testTeethBuilding
pt2_F(:) = 1
!pt2_F(:N_det_generators/1000*0+50) = 1
pt2_n_tasks_max = 16 ! N_det_generators/100 + 1
if(N_det_generators < 1024) then
pt2_minDetInFirstTeeth = 1
pt2_N_teeth = 1
else
do pt2_N_teeth=32,1,-1
pt2_minDetInFirstTeeth = min(5, N_det_generators)
if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
end do
end if
print *, pt2_N_teeth
END_PROVIDER
logical function testTeethBuilding(minF, N)
implicit none
integer, intent(in) :: minF, N
integer :: n0, i
double precision :: u0, Wt, r
double precision, allocatable :: tilde_w(:), tilde_cW(:)
integer, external :: dress_find_sample
allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
tilde_cW(0) = 0d0
do i=1,N_det_generators
tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2
tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
enddo
tilde_cW(N_det_generators) = 1d0
n0 = 0
do
u0 = tilde_cW(n0)
r = tilde_cW(n0 + minF)
Wt = (1d0 - u0) / dble(N)
if(Wt >= r - u0) then
testTeethBuilding = .true.
return
end if
n0 += 1
if(N_det_generators - n0 < minF * N) then
testTeethBuilding = .false.
return
end if
end do
stop "exited testTeethBuilding"
end function
subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
use f77_zmq
use selection_types
@ -11,22 +76,15 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
character(len=64000) :: task
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
type(selection_buffer) :: b
integer, external :: omp_get_thread_num
double precision, intent(in) :: relative_error, absolute_error, E(N_states)
double precision, intent(out) :: pt2(N_states),error(N_states)
double precision, allocatable :: pt2_detail(:,:), comb(:)
logical, allocatable :: computed(:)
integer, allocatable :: tbc(:)
integer :: i, j, k, Ncomb, i_generator_end
integer, external :: pt2_find
integer :: i, j, k
double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
double precision, external :: omp_get_wtime
double precision :: state_average_weight_save(N_states), w(N_states)
double precision :: time
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
if (N_det < max(10,N_states)) then
@ -41,25 +99,8 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
state_average_weight(pt2_stoch_istate) = 1.d0
TOUCH state_average_weight pt2_stoch_istate
allocate(pt2_detail(N_states,N_det_generators+1), comb(N_det_generators), computed(N_det_generators), tbc(0:size_tbc))
sumabove = 0d0
sum2above = 0d0
Nabove = 0d0
provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors
provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight psi_selectors
computed = .false.
tbc(0) = first_det_of_comb - 1
do i=1, tbc(0)
tbc(i) = i
computed(i) = .true.
end do
Ncomb=size(comb)
call get_carlo_workbatch(computed, comb, Ncomb, tbc)
pt2_detail = 0d0
print *, '========== ================= ================= ================='
print *, ' Samples Energy Stat. Error Seconds '
print *, '========== ================= ================= ================='
@ -97,41 +138,17 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
endif
call create_selection_buffer(1, 1*2, b)
integer :: ipos
ipos=1
integer, external :: add_task_to_taskserver
do i=1,tbc(0)
if(tbc(i) > fragment_first) then
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i)
ipos += 20
if (ipos > 63980) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server'
endif
ipos=1
do i=1,N_det_generators
do j=1,pt2_F(i) !!!!!!!!!!!!
write(task(1:20),'(I9,1X,I9''|'')') j, pt2_J(i)
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:20))) == -1) then
stop 'Unable to add task to task server'
endif
else
do j=1,fragment_count
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i)
ipos += 20
if (ipos > 63980) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server'
endif
ipos=1
endif
end do
end if
end do
end do
if (ipos > 1) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task to task server'
endif
endif
integer, external :: zmq_set_running
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
@ -153,18 +170,16 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
!$OMP PRIVATE(i)
i = omp_get_thread_num()
if (i==0) then
call pt2_collector(zmq_socket_pull,E(pt2_stoch_istate), b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, w, error)
call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, absolute_error, w, error)
pt2(pt2_stoch_istate) = w(pt2_stoch_istate)
else
call pt2_slave_inproc(i)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
call delete_selection_buffer(b)
print *, '========== ================= ================= ================='
deallocate(pt2_detail, comb, computed, tbc)
enddo
FREE pt2_stoch_istate
state_average_weight(:) = state_average_weight_save(:)
@ -177,34 +192,6 @@ subroutine ZMQ_pt2(E, pt2,relative_error, absolute_error, error)
end subroutine
subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove)
integer, intent(in) :: tbc(0:size_tbc), Ncomb
logical, intent(in) :: computed(N_det_generators)
double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states,N_det_generators)
double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
integer :: i, dets(comb_teeth)
double precision :: myVal, myVal2
mainLoop : do i=1,Ncomb
call get_comb(comb(i), dets, comb_teeth)
do j=1,comb_teeth
if(.not.(computed(dets(j)))) then
exit mainLoop
end if
end do
myVal = 0d0
myVal2 = 0d0
do j=comb_teeth,1,-1
myVal += pt2_detail(pt2_stoch_istate,dets(j)) * pt2_weight_inv(dets(j)) * comb_step
sumabove(j) += myVal
sum2above(j) += myVal*myVal
Nabove(j) += 1
end do
end do mainLoop
end subroutine
subroutine pt2_slave_inproc(i)
implicit none
integer, intent(in) :: i
@ -212,411 +199,249 @@ subroutine pt2_slave_inproc(i)
call run_pt2_slave(1,i,pt2_e0_denominator)
end
subroutine pt2_collector(zmq_socket_pull, E, b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, absolute_error, pt2,error)
subroutine pt2_collector(zmq_socket_pull, E, relative_error, absolute_error, pt2, error)
use f77_zmq
use selection_types
use bitmasks
implicit none
integer, intent(in) :: Ncomb
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
double precision, intent(inout) :: pt2_detail(N_states, N_det_generators)
double precision, intent(in) :: comb(Ncomb), relative_error, absolute_error, E
logical, intent(inout) :: computed(N_det_generators)
integer, intent(in) :: tbc(0:size_tbc)
double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
double precision, intent(out) :: pt2(N_states),error(N_states)
double precision, intent(in) :: relative_error, absolute_error, E
double precision, intent(out) :: pt2(N_states), error(N_states)
type(selection_buffer), intent(inout) :: b
double precision, allocatable :: pt2_mwen(:,:)
double precision, allocatable :: eI(:,:), eI_task(:,:), S(:), S2(:)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer, external :: zmq_delete_tasks
integer, external :: pt2_find_sample
integer :: msg_size, rc, more
integer :: acc, i, j, robin, N, n_tasks
double precision, allocatable :: val(:)
integer(bit_kind), allocatable :: det(:,:,:)
integer :: more, n, i, p, c, t, n_tasks, U
integer, allocatable :: task_id(:)
integer, allocatable :: index(:)
double precision :: time0
double precision :: time, timeLast, Nabove_old
double precision, external :: omp_get_wtime
integer :: tooth, firstTBDcomb, orgTBDcomb, n_tasks_max
integer, allocatable :: parts_to_get(:)
logical, allocatable :: actually_computed(:)
double precision :: eqt
character*(512) :: task
Nabove_old = -1.d0
n_tasks_max = N_det_generators/100+1
double precision :: v, x, avg, eqt, E0
double precision :: time, time0
allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), &
pt2_mwen(N_states, n_tasks_max) )
pt2_mwen(1:N_states, 1:n_tasks_max) = 0.d0
do i=1,N_det_generators
actually_computed(i) = computed(i)
enddo
parts_to_get(:) = 1
if(fragment_first > 0) then
do i=1,fragment_first
parts_to_get(i) = fragment_count
enddo
endif
do i=1,tbc(0)
actually_computed(tbc(i)) = .false.
end do
orgTBDcomb = int(Nabove(1))
firstTBDcomb = 1
integer, allocatable :: f(:)
logical, allocatable :: d(:)
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
allocate(val(b%N), det(N_int, 2, b%N), task_id(n_tasks_max), index(n_tasks_max))
allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
allocate(d(N_det_generators+1))
allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max))
allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1))
S(:) = 0d0
S2(:) = 0d0
n = 1
t = 0
U = 0
eI(:,:) = 0d0
f(:) = pt2_F(:)
d(:) = .false.
n_tasks = 0
E0 = E
more = 1
call wall_time(time0)
timeLast = time0
time0 = omp_get_wtime()
call get_first_tooth(actually_computed, tooth)
Nabove_old = Nabove(tooth)
logical :: loop
loop = .True.
pullLoop : do while (loop)
call pull_pt2_results(zmq_socket_pull, index, pt2_mwen, task_id, n_tasks)
do i=1,n_tasks
pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i)
parts_to_get(index(i)) -= 1
if(parts_to_get(index(i)) < 0) then
print *, i, index(i), parts_to_get(index(i))
print *, parts_to_get
stop "PARTS ??"
end if
if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true.
enddo
integer, external :: zmq_delete_tasks
if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then
cycle
endif
if (more == 0) then
loop = .False.
endif
call wall_time(time)
if(time - timeLast > 5d0 .or. (.not.loop)) then
timeLast = time
do i=1, first_det_of_teeth(1)-1
if(.not.(actually_computed(i))) then
cycle pullLoop
do while (n <= N_det_generators)
if(f(pt2_J(n)) == 0) then
d(pt2_J(n)) = .true.
do while(d(U+1))
U += 1
end do
do while(t <= pt2_N_teeth)
if(U >= pt2_n_0(t+1)) then
t=t+1
E0 = E
do i=pt2_n_0(t),1,-1
E0 += eI(pt2_stoch_istate, i)
end do
else
exit
end if
end do
integer, external :: zmq_abort
double precision :: E0, avg, prop
call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove)
firstTBDcomb = int(Nabove(1)) - orgTBDcomb + 1
call get_first_tooth(actually_computed, tooth)
if (firstTBDcomb > Ncomb) then
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
call sleep(1)
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Error in sending abort signal (1)'
endif
endif
! exit pullLoop
endif
E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1))
if (tooth <= comb_teeth) then
prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1))
prop = prop * pt2_weight_inv(first_det_of_teeth(tooth))
E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop
avg = E0 + (sumabove(tooth) / Nabove(tooth))
eqt = sqrt(1d0 / (Nabove(tooth)-1.d0) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2))
else
eqt = 0.d0
tooth=comb_teeth
endif
call wall_time(time)
if ( ((dabs(eqt/avg) < relative_error) .or. (dabs(eqt) < absolute_error)) .and. Nabove(tooth) >= 10.d0) then
! Termination
pt2(pt2_stoch_istate) = avg
c = pt2_R(n)
if(c /= 0) then
x = 0d0
do p=pt2_N_teeth, 1, -1
v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
i = pt2_find_sample(v, pt2_cW)
x += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
S(p) += x
S2(p) += x**2
end do
avg = S(t) / dble(c)
eqt = (S2(t) / c) - (S(t)/c)**2
eqt = sqrt(eqt / dble(c-1))
pt2(pt2_stoch_istate) = E0-E+avg
error(pt2_stoch_istate) = eqt
print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, ''
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
call sleep(1)
if (zmq_abort(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Error in sending abort signal (2)'
endif
endif
else
if ( (Nabove(tooth) > 2.d0) .and. (Nabove(tooth) > Nabove_old) ) then
print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', Nabove(tooth), avg+E, eqt, time-time0, ''
Nabove_old = Nabove(tooth)
endif
time = omp_get_wtime()
if(mod(c,10)==1 .or. n==N_det_generators) print '(G10.3, 2X, F16.10, 2X, G16.3, 2X, F16.4, A20)', c, avg+E0, eqt, time-time0, ''
end if
n += 1
else if(more == 0) then
exit
else
call pull_pt2_results(zmq_socket_pull, index, eI_task, task_id, n_tasks)
if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then
stop 'Unable to delete tasks'
endif
do i=1,n_tasks
eI(:, index(i)) += eI_task(:, i)
f(index(i)) -= 1
end do
end if
end do pullLoop
if(tooth == comb_teeth+1) then
pt2(pt2_stoch_istate) = sum(pt2_detail(pt2_stoch_istate,:))
error(pt2_stoch_istate) = 0d0
else
E0 = sum(pt2_detail(pt2_stoch_istate,:first_det_of_teeth(tooth)-1))
prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1))
prop = prop * pt2_weight_inv(first_det_of_teeth(tooth))
E0 += pt2_detail(pt2_stoch_istate,first_det_of_teeth(tooth)) * prop
pt2(pt2_stoch_istate) = E0 + (sumabove(tooth) / Nabove(tooth))
error(pt2_stoch_istate) = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2))
end if
end do
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call sort_selection_buffer(b)
end subroutine
integer function pt2_find(v, w, sze, imin, imax)
integer function pt2_find_sample(v, w)
implicit none
integer, intent(in) :: sze, imin, imax
double precision, intent(in) :: v, w(sze)
integer :: i,l,h
integer, parameter :: block=64
double precision, intent(in) :: v, w(0:N_det_generators)
integer :: i,l,r
l = imin
h = imax-1
l = 0
r = N_det_generators
do while(h-l >= block)
i = ishft(h+l,-1)
if(w(i+1) > v) then
h = i-1
do while(r-l > 1)
i = (r+l) / 2
if(w(i) < v) then
l = i
else
l = i+1
end if
end do
!DIR$ LOOP COUNT (64)
do pt2_find=l,h
if(w(pt2_find) >= v) then
exit
r = i
end if
end do
pt2_find_sample = r
end function
BEGIN_PROVIDER [ integer, comb_teeth ]
BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)]
&BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)]
&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)]
implicit none
BEGIN_DOC
! Number of teeth in the comb
END_DOC
comb_teeth = min(1+N_det/10,100)
integer :: N_c, N_j, U, t, i
double precision :: v
logical, allocatable :: d(:)
integer, external :: pt2_find_sample
allocate(d(N_det_generators))
pt2_R(:) = 0
N_c = 0
N_j = pt2_n_0(1)
d(:) = .false.
do i=1,N_j
d(i) = .true.
pt2_J(i) = i
end do
call random_seed(put=(/3211,64,6566,321,65,321,654,65,321,6321,654,65,321,621,654,65,321,65,654,65,321,65/))
call RANDOM_NUMBER(pt2_u)
call RANDOM_NUMBER(pt2_u)
U = 0
do while(N_j < N_det_generators)
!ADD_COMB
N_c += 1
do t=0, pt2_N_teeth-1
v = pt2_u_0 + pt2_W_T * (dble(t) + pt2_u(N_c))
i = pt2_find_sample(v, pt2_cW)
if(.not. d(i)) then
N_j += 1
pt2_J(N_j) = i
d(i) = .true.
end if
end do
pt2_R(N_j) = N_c
!FILL_TOOTH
do while(U < N_det_generators)
U += 1
if(.not. d(U)) then
N_j += 1
pt2_J(N_j) = U
d(U) = .true.
exit;
end if
end do
enddo
if(N_det_generators > 1) then
pt2_R(N_det_generators-1) = 0
pt2_R(N_det_generators) = N_c
end if
END_PROVIDER
subroutine get_first_tooth(computed, first_teeth)
BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_W_T ]
&BEGIN_PROVIDER [ double precision, pt2_u_0 ]
&BEGIN_PROVIDER [ integer, pt2_n_0, (pt2_N_teeth+1) ]
implicit none
logical, intent(in) :: computed(N_det_generators)
integer, intent(out) :: first_teeth
integer :: i, first_det
integer :: i, t
double precision, allocatable :: tilde_w(:), tilde_cW(:)
double precision :: r, tooth_width
integer, external :: pt2_find_sample
first_det = N_det_generators+1+1
first_teeth = 1
do i=first_det_of_comb, N_det_generators
if(.not.(computed(i))) then
first_det = i
allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
tilde_cW(0) = 0d0
do i=1,N_det_generators
tilde_w(i) = psi_coef_generators(i,pt2_stoch_istate)**2
tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
enddo
pt2_n_0(1) = 0
do
pt2_u_0 = tilde_cW(pt2_n_0(1))
r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth)
pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth)
if(pt2_W_T >= r - pt2_u_0) then
exit
end if
end do
do i=comb_teeth+1, 1, -1
if(first_det_of_teeth(i) < first_det) then
first_teeth = i
exit
pt2_n_0(1) += 1
if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then
stop "teeth building failed"
end if
end do
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
end subroutine
BEGIN_PROVIDER [ integer*8, size_tbc ]
implicit none
BEGIN_DOC
! Size of the tbc array
END_DOC
size_tbc = int((comb_teeth+1),8)*int(N_det_generators,8) + fragment_count*fragment_first
END_PROVIDER
subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
implicit none
integer, intent(inout) :: Ncomb
double precision, intent(out) :: comb(Ncomb)
integer, intent(inout) :: tbc(0:size_tbc)
logical, intent(inout) :: computed(N_det_generators)
integer :: i, j, last_full, dets(comb_teeth)
integer :: icount, n
integer :: k, l
l=first_det_of_comb
call RANDOM_NUMBER(comb)
do i=1,size(comb)
comb(i) = comb(i) * comb_step
!DIR$ FORCEINLINE
call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth)
Ncomb = i
if (tbc(0) == N_det_generators) return
do while (computed(l))
l=l+1
enddo
k=tbc(0)+1
tbc(k) = l
computed(l) = .True.
tbc(0) = k
enddo
end subroutine
subroutine get_comb(stato, dets, ct)
implicit none
integer, intent(in) :: ct
double precision, intent(in) :: stato
integer, intent(out) :: dets(ct)
double precision :: curs
integer :: j
integer, external :: pt2_find
curs = 1d0 - stato
do j = comb_teeth, 1, -1
!DIR$ FORCEINLINE
dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1))
curs -= comb_step
do t=2, pt2_N_teeth
r = pt2_u_0 + pt2_W_T * dble(t-1)
pt2_n_0(t) = pt2_find_sample(r, tilde_cW)
end do
end subroutine
pt2_n_0(pt2_N_teeth+1) = N_det_generators
subroutine add_comb(comb, computed, tbc, stbc, ct)
implicit none
integer*8, intent(in) :: stbc
integer, intent(in) :: ct
double precision, intent(in) :: comb
logical, intent(inout) :: computed(N_det_generators)
integer, intent(inout) :: tbc(0:stbc)
integer :: i, k, l, dets(ct)
!DIR$ FORCEINLINE
call get_comb(comb, dets, ct)
k=tbc(0)+1
do i = 1, ct
l = dets(i)
if(.not.(computed(l))) then
tbc(k) = l
k = k+1
computed(l) = .true.
end if
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1))
do t=1, pt2_N_teeth
tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t))
do i=pt2_n_0(t)+1, pt2_n_0(t+1)
pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
end do
end do
tbc(0) = k-1
end subroutine
BEGIN_PROVIDER [ integer, pt2_stoch_istate ]
implicit none
BEGIN_DOC
! State for stochatsic PT2
END_DOC
pt2_stoch_istate = 1
END_PROVIDER
BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, comb_step ]
&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ]
&BEGIN_PROVIDER [ integer, first_det_of_comb ]
implicit none
integer :: i
double precision :: norm_left, stato
integer, external :: pt2_find
pt2_weight(1) = psi_coef_generators(1,pt2_stoch_istate)**2
pt2_cweight(1) = psi_coef_generators(1,pt2_stoch_istate)**2
pt2_cW(0) = 0d0
do i=1,N_det_generators
pt2_weight(i) = psi_coef_generators(i,pt2_stoch_istate)**2
enddo
! Important to loop backwards for numerical precision
pt2_cweight(N_det_generators) = pt2_weight(N_det_generators)
do i=N_det_generators-1,1,-1
pt2_cweight(i) = pt2_weight(i) + pt2_cweight(i+1)
pt2_cW(i) = pt2_cW(i-1) + pt2_w(i)
end do
do i=1,N_det_generators
pt2_weight(i) = pt2_weight(i) / pt2_cweight(1)
pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(1)
enddo
do i=1,N_det_generators-1
pt2_cweight(i) = 1.d0 - pt2_cweight(i+1)
end do
pt2_cweight(N_det_generators) = 1.d0
norm_left = 1d0
comb_step = 1d0/dfloat(comb_teeth)
first_det_of_comb = 1
do i=1,N_det_generators
if(pt2_weight(i)/norm_left < .5d0*comb_step) then
first_det_of_comb = i
exit
end if
norm_left -= pt2_weight(i)
end do
first_det_of_comb = max(2,first_det_of_comb)
call write_int(6, first_det_of_comb-1, 'Size of deterministic set')
comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step
stato = 1d0 - comb_step
iloc = N_det_generators
do i=comb_teeth, 1, -1
integer :: iloc
iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc)
first_det_of_teeth(i) = iloc
stato -= comb_step
end do
first_det_of_teeth(comb_teeth+1) = N_det_generators + 1
first_det_of_teeth(1) = first_det_of_comb
if(first_det_of_teeth(1) /= first_det_of_comb) then
print *, 'Error in ', irp_here
stop "comb provider"
endif
END_PROVIDER
BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ]
implicit none
BEGIN_DOC
! Inverse of pt2_weight array
END_DOC
integer :: i
do i=1,N_det_generators
pt2_weight_inv(i) = 1.d0/pt2_weight(i)
enddo
pt2_n_0(pt2_N_teeth+1) = N_det_generators
END_PROVIDER

View File

@ -22,12 +22,11 @@ subroutine run_pt2_slave(thread,iproc,energy)
logical :: done
double precision,allocatable :: pt2(:,:)
integer :: n_tasks, k, n_tasks_max
integer :: n_tasks, k
integer, allocatable :: i_generator(:), subset(:)
n_tasks_max = N_det_generators/100+1
allocate(task_id(n_tasks_max), task(n_tasks_max))
allocate(pt2(N_states,n_tasks_max), i_generator(n_tasks_max), subset(n_tasks_max))
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
@ -47,7 +46,7 @@ subroutine run_pt2_slave(thread,iproc,energy)
do while (.not.done)
n_tasks = max(1,n_tasks)
n_tasks = min(n_tasks,n_tasks_max)
n_tasks = min(n_tasks,pt2_n_tasks_max)
integer, external :: get_tasks_from_taskserver
if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
@ -66,7 +65,7 @@ subroutine run_pt2_slave(thread,iproc,energy)
do k=1,n_tasks
pt2(:,k) = 0.d0
buf%cur = 0
call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k))
call select_connected(i_generator(k),energy,pt2(1,k),buf,subset(k),pt2_F(i_generator(k)))
enddo
call wall_time(time1)
@ -202,11 +201,4 @@ IRP_ENDIF
end subroutine
BEGIN_PROVIDER [ double precision, pt2_workload, (N_det_generators) ]
integer :: i
do i=1,N_det_generators
pt2_workload(i) = dfloat(N_det_generators - i + 1)**2
end do
pt2_workload = pt2_workload / sum(pt2_workload)
END_PROVIDER

View File

@ -177,7 +177,7 @@ subroutine run_selection_slave_old(thread,iproc,energy)
else
ASSERT (N == buf%N)
end if
call select_connected(i_generator,energy,pt2,buf,subset)
call select_connected(i_generator,energy,pt2,buf,subset,fragment_count)
endif
integer, external :: task_done_to_taskserver

View File

@ -46,11 +46,11 @@ subroutine get_mask_phase(det, phasemask)
end subroutine
subroutine select_connected(i_generator,E0,pt2,b,subset)
subroutine select_connected(i_generator,E0,pt2,b,subset,csubset)
use bitmasks
use selection_types
implicit none
integer, intent(in) :: i_generator, subset
integer, intent(in) :: i_generator, subset, csubset
type(selection_buffer), intent(inout) :: b
double precision, intent(inout) :: pt2(N_states)
integer :: k,l
@ -71,7 +71,7 @@ subroutine select_connected(i_generator,E0,pt2,b,subset)
particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) )
particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) )
enddo
call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset)
call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,b,subset,csubset)
enddo
deallocate(fock_diag_tmp)
end subroutine
@ -266,7 +266,7 @@ subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
end
subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset)
subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,buf,subset,csubset)
use bitmasks
use selection_types
implicit none
@ -274,7 +274,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted
END_DOC
integer, intent(in) :: i_generator, subset
integer, intent(in) :: i_generator, subset, csubset
integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2)
double precision, intent(in) :: fock_diag_tmp(mo_tot_num)
double precision, intent(in) :: E0(N_states)
@ -298,7 +298,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
integer(bit_kind), allocatable:: preinteresting_det(:,:,:)
allocate (preinteresting_det(N_int,2,N_det))
PROVIDE fragment_count
!PROVIDE fragment_count
monoAdo = .true.
monoBdo = .true.
@ -571,7 +571,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
end if
maskInd += 1
if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then
if(mod(maskInd, csubset) == (subset-1)) then
call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting)
if(fullMatch) cycle

View File

@ -2,10 +2,10 @@ use bitmasks
subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc)
subroutine alpha_callback(delta_ij_loc, i_generator, subset, csubset, iproc)
use bitmasks
implicit none
integer, intent(in) :: i_generator, subset
integer, intent(in) :: i_generator, subset, csubset
double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2)
integer, intent(in) :: iproc
@ -15,7 +15,7 @@ subroutine alpha_callback(delta_ij_loc, i_generator, subset,iproc)
do l=1,N_generators_bitmask
call generate_singles_and_doubles(delta_ij_loc, i_generator,l,subset,iproc)
call generate_singles_and_doubles(delta_ij_loc,i_generator,l,subset,csubset,iproc)
enddo
end subroutine
@ -34,7 +34,7 @@ BEGIN_PROVIDER [ integer, psi_from_sorted_gen, (N_det) ]
END_PROVIDER
subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, iproc)
subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index, subset, csubset, iproc)
use bitmasks
implicit none
BEGIN_DOC
@ -42,7 +42,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index
END_DOC
double precision,intent(inout) :: delta_ij_loc(N_states,N_det,2)
integer, intent(in) :: i_generator, subset, bitmask_index
integer, intent(in) :: i_generator, subset, csubset, bitmask_index
integer, intent(in) :: iproc
@ -69,8 +69,8 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index
allocate(abuf(0:N_det*6), labuf(0:N_det))
allocate(preinteresting_det(N_int,2,N_det))
PROVIDE fragment_count
maskInd = -1
monoAdo = .true.
monoBdo = .true.
@ -193,7 +193,6 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index
allocate(counted(mo_tot_num, mo_tot_num), countedOrb(mo_tot_num, 2))
allocate (indexes(0:mo_tot_num, 0:mo_tot_num))
allocate (indexes_end(0:mo_tot_num, 0:mo_tot_num))
maskInd = -1
integer :: nb_count
do s1=1,2
do i1=N_holes(s1),1,-1 ! Generate low excitations first
@ -345,7 +344,7 @@ subroutine generate_singles_and_doubles(delta_ij_loc, i_generator, bitmask_index
end if
maskInd += 1
if(subset == 0 .or. mod(maskInd, fragment_count) == (subset-1)) then
if(mod(maskInd, csubset) == (subset-1)) then
call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting)
if(fullMatch) cycle

File diff suppressed because it is too large Load Diff

View File

@ -85,34 +85,21 @@ BEGIN_PROVIDER [ double precision, delta_ij_tmp, (N_states,N_det_delta_ij,2) ]
double precision, allocatable :: dress(:), del(:,:), del_s2(:,:)
double precision :: E_CI_before(N_states), relative_error
! prevents re-providing if delta_ij_tmp is
! just being copied
if(N_det_delta_ij == N_det) then
allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij))
allocate(dress(N_states), del(N_states, N_det_delta_ij), del_s2(N_states, N_det_delta_ij))
delta_ij_tmp = 0d0
delta_ij_tmp = 0d0
E_CI_before(:) = dress_E0_denominator(:) + nuclear_repulsion
relative_error = 1.d-5
E_CI_before(:) = psi_energy(:) + nuclear_repulsion
threshold_selectors = 1.d0
threshold_generators = 1.d0
SOFT_TOUCH threshold_selectors threshold_generators
! if(errr /= 0d0) then
! errr = errr / 2d0
! else
! errr = 1d-4
! end if
relative_error = 1.d-3
call write_double(6,relative_error,"Convergence of the stochastic algorithm")
call write_double(6,relative_error,"Relative error for the stochastic algorithm")
call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij)
delta_ij_tmp(:,:,1) = del(:,:)
delta_ij_tmp(:,:,2) = del_s2(:,:)
call ZMQ_dress(E_CI_before, dress, del, del_s2, abs(relative_error), N_det_delta_ij)
delta_ij_tmp(:,:,1) = del(:,:)
delta_ij_tmp(:,:,2) = del_s2(:,:)
deallocate(dress, del, del_s2)
end if
deallocate(dress, del, del_s2)
END_PROVIDER

View File

@ -1,13 +1,5 @@
use bitmasks
BEGIN_PROVIDER [ integer, fragment_count ]
implicit none
BEGIN_DOC
! Number of fragments for the deterministic part
END_DOC
fragment_count = 1
END_PROVIDER
subroutine run_dress_slave(thread,iproce,energy)
use f77_zmq
@ -18,8 +10,8 @@ subroutine run_dress_slave(thread,iproce,energy)
integer, intent(in) :: thread, iproce
integer :: rc, i, subset, i_generator
integer :: worker_id, task_id, ctask, ltask
character*(5120) :: task
integer :: worker_id, ctask, ltask
character*(512) :: task(Nproc)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
@ -27,70 +19,64 @@ subroutine run_dress_slave(thread,iproce,energy)
integer(ZMQ_PTR), external :: new_zmq_push_socket
integer(ZMQ_PTR) :: zmq_socket_push
logical :: done
double precision,allocatable :: dress_detail(:)
integer :: ind
double precision,allocatable :: delta_ij_loc(:,:,:)
integer :: h,p,n,i_state
logical :: ok
integer, allocatable :: int_buf(:)
double precision, allocatable :: double_buf(:)
integer(bit_kind), allocatable :: det_buf(:,:,:)
integer :: N_buf(3)
logical :: last
double precision,allocatable :: breve_delta_m(:,:,:)
integer :: i_state,m,l,t,p,sum_f
!integer, external :: omp_get_thread_num
double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:)
integer :: toothMwen
logical :: fracted
double precision, allocatable :: delta_det(:,:,:,:), cp(:,:,:,:), edI(:)
double precision, allocatable :: edI_task(:)
integer, allocatable :: edI_index(:), edI_taskID(:)
integer :: n_tasks
integer :: iproc
integer, allocatable :: f(:)
integer :: cp_sent, cp_done
integer :: cp_max(Nproc)
integer :: will_send, task_id, purge_task_id, ntask_buf
integer, allocatable :: task_buf(:)
integer(kind=OMP_LOCK_KIND) :: lck_det(0:pt2_N_teeth+1)
integer(kind=OMP_LOCK_KIND) :: lck_sto(0:dress_N_cp+1), sending, getting_task
double precision :: fac
double precision :: ending(1)
integer, external :: zmq_get_dvector
! double precision, external :: omp_get_wtime
double precision :: time, time0
integer :: ntask_tbd, task_tbd(Nproc), i_gen_tbd(Nproc), subset_tbd(Nproc)
if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
allocate(delta_det(N_states, N_det, 0:pt2_N_teeth+1, 2))
allocate(cp(N_states, N_det, dress_N_cp, 2))
allocate(edI(N_det_generators), f(N_det_generators))
allocate(edI_index(N_det_generators), edI_task(N_det_generators))
edI = 0d0
f = 0
delta_det = 0d0
! if(iproce /= 0) stop "RUN DRESS SLAVE is OMP"
task = CHAR(0)
allocate(delta_det(N_states, N_det, 0:comb_teeth+1, 2))
allocate(cp(N_states, N_det, N_cp, 2))
delta_det = 0d9
cp = 0d0
task(:) = CHAR(0)
integer :: iproc, cur_cp, done_for(0:N_cp)
integer, allocatable :: tasks(:)
integer :: lastCp(Nproc)
integer :: lastSent, lastSendable
logical :: send
integer(kind=OMP_LOCK_KIND) :: lck_det(0:comb_teeth+1)
integer(kind=OMP_LOCK_KIND) :: lck_sto(0:N_cp+1)
do i=0,N_cp+1
call omp_init_lock(sending)
call omp_init_lock(getting_task)
do i=0,dress_N_cp+1
call omp_init_lock(lck_sto(i))
end do
do i=0,comb_teeth+1
do i=0,pt2_N_teeth+1
call omp_init_lock(lck_det(i))
end do
lastCp = 0
lastSent = 0
send = .false.
done_for = 0
double precision :: hij, sij
!call i_h_j_s2(psi_det(1,1,1),psi_det(1,1,2),N_int,hij, sij)
hij = dress_E0_denominator(1) !PROVIDE BEFORE OMP PARALLEL
cp_done = 0
cp_sent = 0
will_send = 0
double precision :: hij, sij, tmp
purge_task_id = 0
provide psi_energy
ending(1) = dble(dress_N_cp+1)
ntask_tbd = 0
!$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(int_buf, double_buf, det_buf, delta_ij_loc, task, task_id) &
!$OMP PRIVATE(lastSendable, toothMwen, fracted, fac) &
!$OMP PRIVATE(i, cur_cp, send, i_generator, subset, iproc, N_buf) &
!$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
!$OMP PRIVATE(breve_delta_m, task_id) &
!$OMP PRIVATE(tmp,fac,m,l,t,sum_f,n_tasks) &
!$OMP PRIVATE(i,p,will_send, i_generator, subset, iproc) &
!$OMP PRIVATE(zmq_to_qp_run_socket, zmq_socket_push, worker_id) &
!$OMP PRIVATE(task_buf, ntask_buf,time, time0)
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_push = new_zmq_push_socket(thread)
call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
@ -99,276 +85,239 @@ subroutine run_dress_slave(thread,iproce,energy)
call end_zmq_push_socket(zmq_socket_push,thread)
stop "WORKER -1"
end if
iproc = omp_get_thread_num()+1
allocate(int_buf(N_dress_int_buffer))
allocate(double_buf(N_dress_double_buffer))
allocate(det_buf(N_int, 2, N_dress_det_buffer))
allocate(delta_ij_loc(N_states,N_det,2))
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
task = task//" 0"
if(task_id == 0) exit
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if(task_id /= 0) then
read (task,*) subset, i_generator
allocate(breve_delta_m(N_states,N_det,2))
allocate(task_buf(pt2_n_tasks_max))
ntask_buf = 0
!$OMP ATOMIC
done_for(done_cp_at_det(i_generator)) += 1
! print *, "IGEN", i_generator, done_cp_at_det(i_generator)
delta_ij_loc(:,:,:) = 0d0
call generator_start(i_generator, iproc)
call alpha_callback(delta_ij_loc, i_generator, subset, iproc)
call generator_done(i_generator, int_buf, double_buf, det_buf, N_buf, iproc)
if(iproc==1) then
call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf)
end if
do i=1,N_cp
fac = cps(i_generator, i) * dress_weight_inv(i_generator) * comb_step
if(fac == 0d0) cycle
call omp_set_lock(lck_sto(i))
cp(:,:,i,1) += (delta_ij_loc(:,:,1) * fac)
cp(:,:,i,2) += (delta_ij_loc(:,:,2) * fac)
call omp_unset_lock(lck_sto(i))
end do
toothMwen = tooth_of_det(i_generator)
fracted = (toothMwen /= 0)
if(fracted) fracted = (i_generator == first_det_of_teeth(toothMwen))
if(fracted) then
call omp_set_lock(lck_det(toothMwen))
call omp_set_lock(lck_det(toothMwen-1))
delta_det(:,:,toothMwen-1, 1) += delta_ij_loc(:,:,1) * (1d0-fractage(toothMwen))
delta_det(:,:,toothMwen-1, 2) += delta_ij_loc(:,:,2) * (1d0-fractage(toothMwen))
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1) * (fractage(toothMwen))
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2) * (fractage(toothMwen))
call omp_unset_lock(lck_det(toothMwen))
call omp_unset_lock(lck_det(toothMwen-1))
else
call omp_set_lock(lck_det(toothMwen))
delta_det(:,:,toothMwen , 1) += delta_ij_loc(:,:,1)
delta_det(:,:,toothMwen , 2) += delta_ij_loc(:,:,2)
call omp_unset_lock(lck_det(toothMwen))
end if
call push_dress_results(zmq_socket_push, i_generator, -1, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, task_id)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
lastCp(iproc) = done_cp_at_det(i_generator)
do while(cp_done > cp_sent .or. m /= dress_N_cp+1)
call omp_set_lock(getting_task)
if(ntask_tbd == 0) then
ntask_tbd = size(task_tbd)
call get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_tbd, task, ntask_tbd)
!task = task//" 0"
end if
task_id = task_tbd(1)
if(task_id /= 0) then
read (task(1),*) subset, i_generator
do i=1,size(task_tbd)-1
task_tbd(i) = task_tbd(i+1)
task(i) = task(i+1)
end do
m = dress_P(i_generator)
ntask_tbd -= 1
else
m = dress_N_cp + 1
i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1)
end if
call omp_unset_lock(getting_task)
will_send = 0
!$OMP CRITICAL
send = .false.
lastSendable = N_cp*2
do i=1,Nproc
lastSendable = min(lastCp(i), lastSendable)
end do
lastSendable -= 1
if(lastSendable > lastSent .or. (lastSendable == N_cp-1 .and. lastSent /= N_cp-1)) then
lastSent = lastSendable
cur_cp = lastSent
send = .true.
cp_max(iproc) = m
cp_done = minval(cp_max)-1
if(cp_done > cp_sent) then
will_send = cp_sent + 1
cp_sent = will_send
end if
if(purge_task_id == 0) then
purge_task_id = task_id
task_id = 0
else if(task_id /= 0) then
ntask_buf += 1
task_buf(ntask_buf) = task_id
end if
!$OMP END CRITICAL
if(send) then
N_buf = (/0,1,0/)
delta_ij_loc = 0d0
if(cur_cp < 1) stop "cur_cp < 1"
do i=1,cur_cp
delta_ij_loc(:,:,1) += cp(:,:,i,1)
delta_ij_loc(:,:,2) += cp(:,:,i,2)
if(will_send /= 0 .and. will_send <= int(ending(1))) then
call omp_set_lock(sending)
n_tasks = 0
sum_f = 0
do i=1,N_det_generators
if(dress_P(i) <= will_send) sum_f = sum_f + f(i)
if(dress_P(i) == will_send .and. f(i) /= 0) then
n_tasks += 1
edI_task(n_tasks) = edI(i)
edI_index(n_tasks) = i
end if
end do
delta_ij_loc(:,:,:) = delta_ij_loc(:,:,:) / cps_N(cur_cp)
do i=cp_first_tooth(cur_cp)-1,0,-1
delta_ij_loc(:,:,1) = delta_ij_loc(:,:,1) +delta_det(:,:,i,1)
delta_ij_loc(:,:,2) = delta_ij_loc(:,:,2) +delta_det(:,:,i,2)
end do
call push_dress_results(zmq_socket_push, done_for(cur_cp), cur_cp, delta_ij_loc, int_buf, double_buf, det_buf, N_buf, -1)
call push_dress_results(zmq_socket_push, will_send, sum_f, edI_task, edI_index, breve_delta_m, 0, n_tasks)
call omp_unset_lock(sending)
end if
if(task_id == 0) exit
end do
if(m /= dress_N_cp+1) then
!UPDATE i_generator
breve_delta_m(:,:,:) = 0d0
call generator_start(i_generator, iproc)
time0 = omp_get_wtime()
call alpha_callback(breve_delta_m, i_generator, subset, pt2_F(i_generator), iproc)
time = omp_get_wtime()
!print '(I0.11, I4, A12, F12.3)', i_generator, subset, "GREPMETIME", time-time0
t = dress_T(i_generator)
call omp_set_lock(lck_det(t))
delta_det(:,:,t, 1) += breve_delta_m(:,:,1)
delta_det(:,:,t, 2) += breve_delta_m(:,:,2)
call omp_unset_lock(lck_det(t))
do p=1,dress_N_cp
if(dress_e(i_generator, p) /= 0d0) then
fac = dress_e(i_generator, p)
call omp_set_lock(lck_sto(p))
cp(:,:,p,1) += breve_delta_m(:,:,1) * fac
cp(:,:,p,2) += breve_delta_m(:,:,2) * fac
call omp_unset_lock(lck_sto(p))
end if
end do
tmp = 0d0
do i=N_det,1,-1
tmp += psi_coef(i, dress_stoch_istate)*breve_delta_m(dress_stoch_istate, i, 1)
end do
!$OMP ATOMIC
edI(i_generator) += tmp
!$OMP ATOMIC
f(i_generator) += 1
!push bidon
if(ntask_buf == size(task_buf)) then
call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf)
ntask_buf = 0
end if
end if
end do
!$OMP BARRIER
if(ntask_buf /= 0) then
call push_dress_results(zmq_socket_push, 0, 0, edI_task, edI_index, breve_delta_m, task_buf, ntask_buf)
ntask_buf = 0
end if
!$OMP SINGLE
if(purge_task_id /= 0) then
do while(int(ending(1)) == dress_N_cp+1)
call sleep(1)
i= zmq_get_dvector(zmq_to_qp_run_socket, worker_id, "ending", ending, 1)
end do
will_send = int(ending(1))
breve_delta_m = 0d0
do l=will_send, 1,-1
breve_delta_m(:,:,1) += cp(:,:,l,1)
breve_delta_m(:,:,2) += cp(:,:,l,2)
end do
breve_delta_m(:,:,:) = breve_delta_m(:,:,:) / dress_M_m(will_send)
do t=dress_dot_t(will_send)-1,0,-1
breve_delta_m(:,:,1) = breve_delta_m(:,:,1) + delta_det(:,:,t,1)
breve_delta_m(:,:,2) = breve_delta_m(:,:,2) + delta_det(:,:,t,2)
end do
sum_f = 0
do i=1,N_det_generators
if(dress_P(i) <= will_send) sum_f = sum_f + f(i)
end do
call push_dress_results(zmq_socket_push, -will_send, sum_f, edI_task, edI_index, breve_delta_m, purge_task_id, 1)
end if
!$OMP END SINGLE
call disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
!$OMP END PARALLEL
do i=0,N_cp+1
do i=0,dress_N_cp+1
call omp_destroy_lock(lck_sto(i))
end do
do i=0,comb_teeth+1
do i=0,pt2_N_teeth+1
call omp_destroy_lock(lck_det(i))
end do
end subroutine
subroutine push_dress_results(zmq_socket_push, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_bufi, task_id)
subroutine push_dress_results(zmq_socket_push, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks)
use f77_zmq
implicit none
integer, parameter :: sendt = 4
integer(ZMQ_PTR), intent(in) :: zmq_socket_push
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
real(kind=4), allocatable :: delta_loc4(:,:,:)
double precision, intent(in) :: double_buf(*)
integer, intent(in) :: int_buf(*)
integer(bit_kind), intent(in) :: det_buf(N_int, 2, *)
integer, intent(in) :: N_bufi(3)
integer :: N_buf(3)
integer, intent(in) :: ind, cur_cp, task_id
integer :: rc, i, j, k, l
double precision :: contrib(N_states)
real(sendt), allocatable :: r4buf(:,:,:)
integer, intent(in) :: m_task, f, edI_index(n_tasks)
double precision, intent(in) :: breve_delta_m(N_states, N_det, 2), edI_task(n_tasks)
integer, intent(in) :: task_id(pt2_n_tasks_max), n_tasks
integer :: rc, i, j, k
rc = f77_zmq_send( zmq_socket_push, m_task, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push3"
rc = f77_zmq_send( zmq_socket_push, ind, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
rc = f77_zmq_send( zmq_socket_push, cur_cp, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push"
if(cur_cp /= -1) then
allocate(r4buf(N_states, N_det, 2))
do i=1,2
do j=1,N_det
do k=1,N_states
r4buf(k,j,i) = real(delta_loc(k,j,i), sendt)
end do
end do
end do
rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,1), sendt*N_states*N_det, ZMQ_SNDMORE)
if(rc /= sendt*N_states*N_det) stop "push"
rc = f77_zmq_send( zmq_socket_push, r4buf(1,1,2), sendt*N_states*N_det, ZMQ_SNDMORE)
if(rc /= sendt*N_states*N_det) stop "push"
else
contrib = 0d0
do i=1,N_det
contrib(:) += delta_loc(:,i, 1) * psi_coef(i, :)
end do
rc = f77_zmq_send( zmq_socket_push, contrib, 8*N_states, ZMQ_SNDMORE)
if(rc /= 8*N_states) stop "push"
N_buf = N_bufi
!N_buf = (/0,1,0/)
rc = f77_zmq_send( zmq_socket_push, N_buf, 4*3, ZMQ_SNDMORE)
if(rc /= 4*3) stop "push5"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_send( zmq_socket_push, int_buf, 4*N_buf(1), ZMQ_SNDMORE)
if(rc /= 4*N_buf(1)) stop "push6"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_send( zmq_socket_push, double_buf, 8*N_buf(2), ZMQ_SNDMORE)
if(rc /= 8*N_buf(2)) stop "push8"
end if
if(N_buf(3) > 0) then
rc = f77_zmq_send( zmq_socket_push, det_buf, 2*N_int*bit_kind*N_buf(3), ZMQ_SNDMORE)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "push10"
end if
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "push11"
end if
! Activate is zmq_socket_push is a REQ
if(m_task > 0) then
rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push1"
rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push4"
rc = f77_zmq_send( zmq_socket_push, edI_task, 8*n_tasks, ZMQ_SNDMORE)
if(rc /= 8*n_tasks) stop "push5"
rc = f77_zmq_send( zmq_socket_push, edI_index, 4*n_tasks, 0)
if(rc /= 4*n_tasks) stop "push6"
else if(m_task == 0) then
rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push1"
rc = f77_zmq_send( zmq_socket_push, task_id, 4*n_tasks, 0)
if(rc /= 4*n_tasks) stop "push2"
else
rc = f77_zmq_send( zmq_socket_push, f, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "push4"
rc = f77_zmq_send( zmq_socket_push, breve_delta_m, 8*N_det*N_states*2, ZMQ_SNDMORE)
if(rc /= 8*N_det*N_states*2) stop "push6"
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "push6"
end if
! Activate is zmq_socket_pull is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE
character*(2) :: ok
rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
IRP_ENDIF
end subroutine
BEGIN_PROVIDER [ real(4), real4buf, (N_states, N_det, 2) ]
END_PROVIDER
subroutine pull_dress_results(zmq_socket_pull, ind, cur_cp, delta_loc, int_buf, double_buf, det_buf, N_buf, task_id, contrib)
subroutine pull_dress_results(zmq_socket_pull, m_task, f, edI_task, edI_index, breve_delta_m, task_id, n_tasks)
use f77_zmq
implicit none
integer, parameter :: sendt = 4
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
integer, intent(out) :: cur_cp
double precision, intent(inout) :: delta_loc(N_states, N_det, 2)
double precision, intent(out) :: double_buf(*), contrib(N_states)
integer, intent(out) :: int_buf(*)
integer(bit_kind), intent(out) :: det_buf(N_int, 2, *)
integer, intent(out) :: ind
integer, intent(out) :: task_id
integer, intent(out) :: m_task, f, edI_index(N_det_generators)
double precision, intent(out) :: breve_delta_m(N_states, N_det, 2), edI_task(N_det_generators)
integer, intent(out) :: task_id(pt2_n_tasks_max), n_tasks
integer :: rc, i, j, k
integer, intent(out) :: N_buf(3)
rc = f77_zmq_recv( zmq_socket_pull, ind, 4, 0)
if(rc /= 4) stop "pulla"
rc = f77_zmq_recv( zmq_socket_pull, m_task, 4, 0)
if(rc /= 4) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, cur_cp, 4, 0)
if(rc /= 4) stop "pulla"
if(cur_cp /= -1) then
rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,1), N_states*sendt*N_det, 0)
if(rc /= sendt*N_states*N_det) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, real4buf(1,1,2), N_states*sendt*N_det, 0)
if(rc /= sendt*N_states*N_det) stop "pulld"
do i=1,2
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j,k)
do j=1,N_det
do k=1,N_states
delta_loc(k,j,i) = real(real4buf(k,j,i), 8)
end do
end do
end do
else
rc = f77_zmq_recv( zmq_socket_pull, contrib, 8*N_states, 0)
if(rc /= 8*N_states) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, N_buf, 4*3, 0)
if(rc /= 4*3) stop "pull"
if(N_buf(1) > N_dress_int_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(2) > N_dress_double_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(3) > N_dress_det_buffer) stop "run_dress_slave N_buf bad size?"
if(N_buf(1) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, int_buf, 4*N_buf(1), 0)
if(rc /= 4*N_buf(1)) stop "pull1"
if(m_task > 0) then
rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
if(rc /= 4) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0)
if(rc /= 4) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, edI_task, 8*n_tasks, 0)
if(rc /= 8*n_tasks) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, edI_index, 4*n_tasks, 0)
if(rc /= 4*n_tasks) stop "pullc"
else if(m_task==0) then
rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
if(rc /= 4) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4*n_tasks, 0)
if(rc /= 4*n_tasks) stop "pull4"
else
rc = f77_zmq_recv( zmq_socket_pull, f, 4, 0)
if(rc /= 4) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, breve_delta_m, 8*N_det*N_states*2, 0)
if(rc /= 8*N_det*N_states*2) stop "pullc"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "pull4"
end if
if(N_buf(2) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, double_buf, 8*N_buf(2), 0)
if(rc /= 8*N_buf(2)) stop "pull2"
end if
if(N_buf(3) > 0) then
rc = f77_zmq_recv( zmq_socket_pull, det_buf, 2*N_int*bit_kind*N_buf(3), 0)
if(rc /= 2*N_int*bit_kind*N_buf(3)) stop "pull3"
end if
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "pull4"
end if
! Activate is zmq_socket_pull is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE

View File

@ -300,7 +300,6 @@ subroutine dress_with_alpha_buffer(Nstates,Ndet,Nint,delta_ij_loc, i_gen, minili
haa = diag_H_mat_elem_fock(psi_det_generators(1,1,i_gen),alpha,fock_diag_tmp_(1,1,iproc),N_int)
call dress_with_alpha_(Nstates, Ndet, Nint, delta_ij_loc, minilist, det_minilist, n_minilist, alpha, haa, contrib, c_alpha, iproc)
slave_sum_alpha2(:,iproc) += c_alpha(:)**2
if(contrib < sb(iproc)%mini) then
call add_to_selection_buffer(sb(iproc), alpha, contrib)

View File

@ -167,8 +167,7 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
end select
end
subroutine get_double_excitation(det1,det2,exc,phase,Nint)
subroutine get_double_excitation_ref(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
@ -312,6 +311,137 @@ subroutine get_double_excitation(det1,det2,exc,phase,Nint)
end
subroutine get_phasemask_bit(det1, pm, Nint)
use bitmasks
implicit none
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint,2)
integer(bit_kind), intent(out) :: pm(Nint,2)
integer(bit_kind) :: tmp
integer :: ispin, i
do ispin=1,2
tmp = 0_8
do i=1,Nint
pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1))
pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2))
pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4))
pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8))
pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16))
pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32))
pm(i,ispin) = xor(pm(i,ispin), tmp)
if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp)
end do
end do
end subroutine
subroutine get_double_excitation(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the two excitation operators between two doubly excited determinants and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint,2)
integer(bit_kind), intent(in) :: det2(Nint,2)
integer, intent(out) :: exc(0:2,2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, ispin, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp, pm(Nint,2)
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
double precision :: refaz
logical :: ok
ASSERT (Nint > 0)
!do ispin=1,2
!tmp = 0_8
!do i=1,Nint
! pm(i,ispin) = xor(det1(i,ispin), ishft(det1(i,ispin), 1))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 2))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 4))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 8))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 16))
! pm(i,ispin) = xor(pm(i,ispin), ishft(pm(i,ispin), 32))
! pm(i,ispin) = xor(pm(i,ispin), tmp)
! if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp)
!end do
!end do
call get_phasemask_bit(det1, pm, Nint)
nperm = 0
exc(0,1,1) = 0
exc(0,2,1) = 0
exc(0,1,2) = 0
exc(0,2,2) = 0
do ispin = 1,2
idx_particle = 0
idx_hole = 0
ishift = 1-bit_kind_size
!par = 0
do l=1,Nint
ishift = ishift + bit_kind_size
if (det1(l,ispin) == det2(l,ispin)) then
cycle
endif
tmp = xor( det1(l,ispin), det2(l,ispin) )
particle = iand(tmp, det2(l,ispin))
hole = iand(tmp, det1(l,ispin))
do while (particle /= 0_bit_kind)
tz = trailz(particle)
nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1)
idx_particle = idx_particle + 1
exc(0,2,ispin) = exc(0,2,ispin) + 1
exc(idx_particle,2,ispin) = tz+ishift
particle = iand(particle,particle-1_bit_kind)
enddo
if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)==2
exit
endif
do while (hole /= 0_bit_kind)
tz = trailz(hole)
nperm = nperm + iand(ishft(pm(l,ispin), -tz), 1)
idx_hole = idx_hole + 1
exc(0,1,ispin) = exc(0,1,ispin) + 1
exc(idx_hole,1,ispin) = tz+ishift
hole = iand(hole,hole-1_bit_kind)
enddo
if (iand(exc(0,1,ispin),exc(0,2,ispin))==2) then ! exc(0,1,ispin)==2 or exc(0,2,ispin)
exit
endif
enddo
select case (exc(0,1,ispin))
case(0)
cycle
case(1)
if(exc(1,1,ispin) < exc(1,2,ispin)) nperm = nperm+1
case (2)
a = exc(1,1,ispin)
b = exc(1,2,ispin)
c = exc(2,1,ispin)
d = exc(2,2,ispin)
if(min(a,c) > max(b,d) .or. min(b,d) > max(a,c) .or. (a-b)*(c-d)<0) then
nperm = nperm + 1
end if
exit
end select
enddo
phase = phase_dble(iand(nperm,1))
!call get_double_excitation_ref(det1,det2,exc,refaz,Nint)
!if(phase == refaz) then
! print *, "phase", phase, refaz, n, exc(0,1,1)
!end if
end
subroutine get_mono_excitation(det1,det2,exc,phase,Nint)
use bitmasks
implicit none