Merge scemama

This commit is contained in:
Anthony Scemama 2017-04-20 18:04:51 +02:00
commit 20f2fff7b2
22 changed files with 3515 additions and 1666 deletions

View File

@ -635,20 +635,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
use selection_types
implicit none
integer, intent(in) :: i_generator, sp, h1, h2
double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num)
logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num)
double precision, intent(in) :: fock_diag_tmp(mo_tot_num)
double precision, intent(in) :: E0(N_states)
double precision, intent(inout) :: pt2(N_states)
integer, intent(in) :: i_generator, sp, h1, h2
double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num)
logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num)
double precision, intent(in) :: fock_diag_tmp(mo_tot_num)
double precision, intent(in) :: E0(N_states)
double precision, intent(inout) :: pt2(N_states)
type(selection_buffer), intent(inout) :: buf
logical :: ok
integer :: s1, s2, p1, p2, ib, j, istate
integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
double precision :: e_pert, delta_E, val, Hii, max_e_pert,tmp
double precision, external :: diag_H_mat_elem_fock
logical :: ok
integer :: s1, s2, p1, p2, ib, j, istate
integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
double precision :: e_pert, delta_E, val, Hii, max_e_pert,tmp
double precision, external :: diag_H_mat_elem_fock
logical, external :: detEq
logical, external :: detEq
if(sp == 3) then
@ -670,18 +670,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
if(banned(p1,p2)) cycle
if(mat(1, p1, p2) == 0d0) cycle
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
logical, external :: is_in_wavefunction
if (is_in_wavefunction(det,N_int)) then
cycle
endif
if (do_ddci) then
logical, external :: is_a_two_holes_two_particles
if (is_a_two_holes_two_particles(det)) then
cycle
endif
endif
Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
max_e_pert = 0d0
do istate=1,N_states
delta_E = E0(istate) - Hii
val = mat(istate, p1, p2) + mat(istate, p1, p2)
val = mat(istate, p1, p2) + mat(istate, p1, p2)
tmp = dsqrt(delta_E * delta_E + val * val)
if (delta_E < 0.d0) then
tmp = -tmp
@ -1205,3 +1207,127 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
end do genl
end subroutine
subroutine ZMQ_selection(N_in, pt2)
use f77_zmq
use selection_types
implicit none
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer, intent(in) :: N_in
type(selection_buffer) :: b
integer :: i, N
integer, external :: omp_get_thread_num
double precision, intent(out) :: pt2(N_states)
integer, parameter :: maxtasks=10000
N = max(N_in,1)
if (.True.) then
PROVIDE pt2_e0_denominator
provide nproc
call new_parallel_job(zmq_to_qp_run_socket,"selection")
call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator))
call create_selection_buffer(N, N*2, b)
endif
character*(20*maxtasks) :: task
task = ' '
integer :: k
k=0
do i= 1, N_det_generators
k = k+1
write(task(20*(k-1)+1:20*k),'(I9,1X,I9,''|'')') i, N
if (k>=maxtasks) then
k=0
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
endif
enddo
if (k > 0) then
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
endif
call zmq_set_running(zmq_to_qp_run_socket)
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1)
i = omp_get_thread_num()
if (i==0) then
call selection_collector(b, pt2)
else
call selection_slave_inproc(i)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'selection')
if (N_in > 0) then
call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN
call copy_H_apply_buffer_to_wf()
if (s2_eig) then
call make_s2_eigenfunction
endif
call save_wavefunction
endif
end subroutine
subroutine selection_slave_inproc(i)
implicit none
integer, intent(in) :: i
call run_selection_slave(1,i,pt2_e0_denominator)
end
subroutine selection_collector(b, pt2)
use f77_zmq
use selection_types
use bitmasks
implicit none
type(selection_buffer), intent(inout) :: b
double precision, intent(out) :: pt2(N_states)
double precision :: pt2_mwen(N_states)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull
integer :: msg_size, rc, more
integer :: acc, i, j, robin, N, ntask
double precision, allocatable :: val(:)
integer(bit_kind), allocatable :: det(:,:,:)
integer, allocatable :: task_id(:)
integer :: done
real :: time, time0
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det))
done = 0
more = 1
pt2(:) = 0d0
call CPU_TIME(time0)
do while (more == 1)
call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask)
pt2 += pt2_mwen
do i=1, N
call add_to_selection_buffer(b, det(1,1,i), val(i))
end do
do i=1, ntask
if(task_id(i) == 0) then
print *, "Error in collector"
endif
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
end do
done += ntask
call CPU_TIME(time)
! print *, "DONE" , done, time - time0
end do
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_pull_socket(zmq_socket_pull)
call sort_selection_buffer(b)
end subroutine

View File

@ -0,0 +1,580 @@
BEGIN_PROVIDER [ integer, fragment_first ]
implicit none
fragment_first = first_det_of_teeth(1)
END_PROVIDER
subroutine ZMQ_pt2(pt2,relative_error)
use f77_zmq
use selection_types
implicit none
character(len=64000) :: task
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_to_qp_run_socket2
type(selection_buffer) :: b
integer, external :: omp_get_thread_num
double precision, intent(in) :: relative_error
double precision, intent(out) :: pt2(N_states)
double precision, allocatable :: pt2_detail(:,:), comb(:)
logical, allocatable :: computed(:)
integer, allocatable :: tbc(:)
integer :: i, j, k, Ncomb, generator_per_task, i_generator_end
integer, external :: pt2_find
double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
double precision, external :: omp_get_wtime
double precision :: time0, time
allocate(pt2_detail(N_states, N_det_generators), comb(N_det_generators/2), computed(N_det_generators), tbc(0:size_tbc))
sumabove = 0d0
sum2above = 0d0
Nabove = 0d0
provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight
!call random_seed()
computed = .false.
tbc(0) = first_det_of_comb - 1
do i=1, tbc(0)
tbc(i) = i
computed(i) = .true.
end do
pt2_detail = 0d0
time0 = omp_get_wtime()
print *, "time - avg - err - n_combs"
generator_per_task = 1
do while(.true.)
call write_time(6)
call new_parallel_job(zmq_to_qp_run_socket,"pt2")
call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator))
call create_selection_buffer(1, 1*2, b)
Ncomb=size(comb)
call get_carlo_workbatch(computed, comb, Ncomb, tbc)
call write_time(6)
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer :: ipos
logical :: tasks
tasks = .False.
ipos=1
do i=1,tbc(0)
if(tbc(i) > fragment_first) then
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i)
ipos += 20
if (ipos > 63980) then
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20)))
ipos=1
tasks = .True.
endif
else
do j=1,fragment_count
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i)
ipos += 20
if (ipos > 63980) then
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20)))
ipos=1
tasks = .True.
endif
end do
end if
end do
if (ipos > 1) then
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20)))
tasks = .True.
endif
if (tasks) then
call zmq_set_running(zmq_to_qp_run_socket)
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) &
!$OMP PRIVATE(i)
i = omp_get_thread_num()
if (i==0) then
call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2)
else
call pt2_slave_inproc(i)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'pt2')
else
pt2 = 0.d0
do i=1,N_det_generators
do k=1,N_states
pt2(k) = pt2(k) + pt2_detail(k,i)
enddo
enddo
endif
tbc(0) = 0
if (pt2(1) /= 0.d0) then
exit
endif
end do
deallocate(pt2_detail, comb, computed, tbc)
end subroutine
subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove)
integer, intent(in) :: tbc(0:size_tbc), Ncomb
logical, intent(in) :: computed(N_det_generators)
double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators)
double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
integer :: i, dets(comb_teeth)
double precision :: myVal, myVal2
mainLoop : do i=1,Ncomb
call get_comb(comb(i), dets, comb_teeth)
do j=1,comb_teeth
if(.not.(computed(dets(j)))) then
exit mainLoop
end if
end do
myVal = 0d0
myVal2 = 0d0
do j=comb_teeth,1,-1
myVal += pt2_detail(1, dets(j)) * pt2_weight_inv(dets(j)) * comb_step
sumabove(j) += myVal
sum2above(j) += myVal*myVal
Nabove(j) += 1
end do
end do mainLoop
end subroutine
subroutine pt2_slave_inproc(i)
implicit none
integer, intent(in) :: i
call run_pt2_slave(1,i,pt2_e0_denominator)
end
subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2)
use f77_zmq
use selection_types
use bitmasks
implicit none
integer, intent(in) :: Ncomb
double precision, intent(inout) :: pt2_detail(N_states, N_det_generators)
double precision, intent(in) :: comb(Ncomb), relative_error
logical, intent(inout) :: computed(N_det_generators)
integer, intent(in) :: tbc(0:size_tbc)
double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
double precision, intent(out) :: pt2(N_states)
type(selection_buffer), intent(inout) :: b
double precision, allocatable :: pt2_mwen(:,:)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull
integer :: msg_size, rc, more
integer :: acc, i, j, robin, N, ntask
double precision, allocatable :: val(:)
integer(bit_kind), allocatable :: det(:,:,:)
integer, allocatable :: task_id(:)
integer :: done, Nindex
integer, allocatable :: index(:)
double precision, save :: time0 = -1.d0
double precision :: time, timeLast
double precision, external :: omp_get_wtime
integer :: tooth, firstTBDcomb, orgTBDcomb
integer, allocatable :: parts_to_get(:)
logical, allocatable :: actually_computed(:)
allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), &
pt2_mwen(N_states, N_det_generators) )
do i=1,N_det_generators
actually_computed(i) = computed(i)
enddo
parts_to_get(:) = 1
if(fragment_first > 0) then
do i=1,fragment_first
parts_to_get(i) = fragment_count
enddo
endif
do i=1,tbc(0)
actually_computed(tbc(i)) = .false.
end do
orgTBDcomb = Nabove(1)
firstTBDcomb = 1
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(1))
more = 1
if (time0 < 0.d0) then
time0 = omp_get_wtime()
endif
timeLast = time0
print *, 'N_deterministic = ', first_det_of_teeth(1)-1
pullLoop : do while (more == 1)
call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask)
do i=1,Nindex
pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i)
parts_to_get(index(i)) -= 1
if(parts_to_get(index(i)) < 0) then
print *, i, index(i), parts_to_get(index(i)), Nindex
print *, "PARTS ??"
print *, parts_to_get
stop "PARTS ??"
end if
if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true.
end do
do i=1, ntask
if(task_id(i) == 0) then
print *, "Error in collector"
endif
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
end do
time = omp_get_wtime()
if(time - timeLast > 1d1 .or. more /= 1) then
timeLast = time
do i=1, first_det_of_teeth(1)-1
if(.not.(actually_computed(i))) then
print *, "PT2 : deterministic part not finished"
cycle pullLoop
end if
end do
double precision :: E0, avg, eqt, prop
call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove)
firstTBDcomb = Nabove(1) - orgTBDcomb + 1
if(Nabove(1) < 2d0) cycle
call get_first_tooth(actually_computed, tooth)
done = 0
do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1)-1
if(actually_computed(i)) done = done + 1
end do
E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1))
prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1))
prop = prop * pt2_weight_inv(first_det_of_teeth(tooth))
E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop
avg = E0 + (sumabove(tooth) / Nabove(tooth))
eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2))
time = omp_get_wtime()
if (dabs(eqt/avg) < relative_error) then
pt2(1) = avg
! exit pullLoop
else
print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth)
endif
end if
end do pullLoop
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_pull_socket(zmq_socket_pull)
call sort_selection_buffer(b)
end subroutine
integer function pt2_find(v, w, sze, imin, imax)
implicit none
integer, intent(in) :: sze, imin, imax
double precision, intent(in) :: v, w(sze)
integer :: i,l,h
integer, parameter :: block=64
l = imin
h = imax-1
do while(h-l >= block)
i = ishft(h+l,-1)
if(w(i+1) > v) then
h = i-1
else
l = i+1
end if
end do
!DIR$ LOOP COUNT (64)
do pt2_find=l,h
if(w(pt2_find) >= v) then
exit
end if
end do
end function
BEGIN_PROVIDER [ integer, comb_teeth ]
implicit none
comb_teeth = 100
END_PROVIDER
subroutine get_first_tooth(computed, first_teeth)
implicit none
logical, intent(in) :: computed(N_det_generators)
integer, intent(out) :: first_teeth
integer :: i, first_det
first_det = 1
first_teeth = 1
do i=first_det_of_comb, N_det_generators
if(.not.(computed(i))) then
first_det = i
exit
end if
end do
do i=comb_teeth, 1, -1
if(first_det_of_teeth(i) < first_det) then
first_teeth = i
exit
end if
end do
end subroutine
subroutine get_last_full_tooth(computed, last_tooth)
implicit none
logical, intent(in) :: computed(N_det_generators)
integer, intent(out) :: last_tooth
integer :: i, j, missing
last_tooth = 0
combLoop : do i=comb_teeth, 1, -1
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-12) ! /4096
do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1
if(.not.computed(j)) then
missing -= 1
if(missing < 0) cycle combLoop
end if
end do
last_tooth = i
exit
end do combLoop
end subroutine
BEGIN_PROVIDER [ integer, size_tbc ]
implicit none
BEGIN_DOC
! Size of the tbc array
END_DOC
size_tbc = N_det_generators + fragment_count*fragment_first
END_PROVIDER
subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
implicit none
integer, intent(inout) :: Ncomb
double precision, intent(out) :: comb(Ncomb)
integer, intent(inout) :: tbc(0:size_tbc)
logical, intent(inout) :: computed(N_det_generators)
integer :: i, j, last_full, dets(comb_teeth), tbc_save
integer :: icount, n
n = tbc(0)
icount = 0
call RANDOM_NUMBER(comb)
do i=1,size(comb)
comb(i) = comb(i) * comb_step
tbc_save = tbc(0)
!DIR$ FORCEINLINE
call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth)
if (tbc(0) < size(tbc)) then
Ncomb = i
else
tbc(0) = tbc_save
return
endif
icount = icount + tbc(0) - tbc_save
if ((i>1000).and.(icount > n)) then
call get_filling_teeth(computed, tbc)
icount = 0
n = ishft(tbc_save,-4)
endif
enddo
call get_filling_teeth(computed, tbc)
end subroutine
subroutine get_filling_teeth(computed, tbc)
implicit none
integer, intent(inout) :: tbc(0:size_tbc)
logical, intent(inout) :: computed(N_det_generators)
integer :: i, j, k, last_full, dets(comb_teeth)
call get_last_full_tooth(computed, last_full)
if(last_full /= 0) then
if (tbc(0) > size(tbc) - first_det_of_teeth(last_full+1) -2) then
return
endif
k = tbc(0)+1
do j=1,first_det_of_teeth(last_full+1)-1
if(.not.(computed(j))) then
tbc(k) = j
k=k+1
computed(j) = .true.
end if
end do
tbc(0) = k-1
end if
end subroutine
subroutine reorder_tbc(tbc)
implicit none
integer, intent(inout) :: tbc(0:size_tbc)
logical, allocatable :: ltbc(:)
integer :: i, ci
allocate(ltbc(size_tbc))
ltbc(:) = .false.
do i=1,tbc(0)
ltbc(tbc(i)) = .true.
end do
ci = 0
do i=1,size_tbc
if(ltbc(i)) then
ci = ci+1
tbc(ci) = i
end if
end do
end subroutine
subroutine get_comb(stato, dets, ct)
implicit none
integer, intent(in) :: ct
double precision, intent(in) :: stato
integer, intent(out) :: dets(ct)
double precision :: curs
integer :: j
integer, external :: pt2_find
curs = 1d0 - stato
do j = comb_teeth, 1, -1
!DIR$ FORCEINLINE
dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1))
curs -= comb_step
end do
end subroutine
subroutine add_comb(comb, computed, tbc, stbc, ct)
implicit none
integer, intent(in) :: stbc, ct
double precision, intent(in) :: comb
logical, intent(inout) :: computed(N_det_generators)
integer, intent(inout) :: tbc(0:stbc)
integer :: i, k, l, dets(ct)
!DIR$ FORCEINLINE
call get_comb(comb, dets, ct)
k=tbc(0)+1
do i = 1, ct
l = dets(i)
if(.not.(computed(l))) then
tbc(k) = l
k = k+1
computed(l) = .true.
end if
end do
tbc(0) = k-1
end subroutine
BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ]
&BEGIN_PROVIDER [ double precision, comb_step ]
&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ]
&BEGIN_PROVIDER [ integer, first_det_of_comb ]
implicit none
integer :: i
double precision :: norm_left, stato
integer, external :: pt2_find
pt2_weight(1) = psi_coef_generators(1,1)**2
pt2_cweight(1) = psi_coef_generators(1,1)**2
do i=2,N_det_generators
pt2_weight(i) = psi_coef_generators(i,1)**2
pt2_cweight(i) = pt2_cweight(i-1) + psi_coef_generators(i,1)**2
end do
do i=1,N_det_generators
pt2_weight(i) = pt2_weight(i) / pt2_cweight(N_det_generators)
pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(N_det_generators)
enddo
norm_left = 1d0
comb_step = 1d0/dfloat(comb_teeth)
first_det_of_comb = 1
do i=1,N_det_generators
if(pt2_weight(i)/norm_left < comb_step*.5d0) then
first_det_of_comb = i
exit
end if
norm_left -= pt2_weight(i)
end do
comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step
stato = 1d0 - comb_step
iloc = N_det_generators
do i=comb_teeth, 1, -1
integer :: iloc
iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc)
first_det_of_teeth(i) = iloc
stato -= comb_step
end do
first_det_of_teeth(comb_teeth+1) = N_det_generators + 1
first_det_of_teeth(1) = first_det_of_comb
if(first_det_of_teeth(1) /= first_det_of_comb) then
print *, 'Error in ', irp_here
stop "comb provider"
endif
END_PROVIDER
BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ]
implicit none
BEGIN_DOC
! Inverse of pt2_weight array
END_DOC
integer :: i
do i=1,N_det_generators
pt2_weight_inv(i) = 1.d0/pt2_weight(i)
enddo
END_PROVIDER

View File

@ -46,7 +46,7 @@ subroutine run_selection_slave(thread,iproc,energy)
if(buf%N == 0) then
! Only first time
call create_selection_buffer(N, N*2, buf)
call create_selection_buffer(N, N*3, buf2)
call create_selection_buffer(N, N*2, buf2)
else
if(N /= buf%N) stop "N changed... wtf man??"
end if
@ -66,8 +66,10 @@ subroutine run_selection_slave(thread,iproc,energy)
call push_selection_results(zmq_socket_push, pt2, buf, task_id(1), ctask)
do i=1,buf%cur
call add_to_selection_buffer(buf2, buf%det(1,1,i), buf%val(i))
if (buf2%cur == buf2%N) then
call sort_selection_buffer(buf2)
endif
enddo
call sort_selection_buffer(buf2)
buf%mini = buf2%mini
pt2 = 0d0
buf%cur = 0

View File

@ -27,7 +27,7 @@ subroutine add_to_selection_buffer(b, det, val)
if(dabs(val) >= b%mini) then
b%cur += 1
b%det(:,:,b%cur) = det(:,:)
b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2)
b%val(b%cur) = val
if(b%cur == size(b%val)) then
call sort_selection_buffer(b)
@ -41,29 +41,32 @@ subroutine sort_selection_buffer(b)
implicit none
type(selection_buffer), intent(inout) :: b
double precision, allocatable :: vals(:), absval(:)
double precision, allocatable:: absval(:)
integer, allocatable :: iorder(:)
integer(bit_kind), allocatable :: detmp(:,:,:)
double precision, pointer :: vals(:)
integer(bit_kind), pointer :: detmp(:,:,:)
integer :: i, nmwen
logical, external :: detEq
nmwen = min(b%N, b%cur)
allocate(iorder(b%cur), detmp(N_int, 2, nmwen), absval(b%cur), vals(nmwen))
allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)), absval(b%cur), vals(size(b%val)))
absval = -dabs(b%val(:b%cur))
do i=1,b%cur
iorder(i) = i
end do
call dsort(absval, iorder, b%cur)
do i=1, nmwen
detmp(:,:,i) = b%det(:,:,iorder(i))
detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))
vals(i) = b%val(iorder(i))
end do
b%det(:,:,:nmwen) = detmp(:,:,:)
b%det(:,:,nmwen+1:) = 0_bit_kind
b%val(:nmwen) = vals(:)
b%val(nmwen+1:) = 0d0
do i=nmwen+1, size(vals)
vals(i) = 0.d0
enddo
deallocate(b%det, b%val)
b%det => detmp
b%val => vals
b%mini = max(b%mini,dabs(b%val(b%N)))
b%cur = nmwen
end subroutine

View File

@ -12,8 +12,8 @@ program selection_slave
end
subroutine provide_everything
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context mo_mono_elec_integral
! PROVIDE pt2_e0_denominator mo_tot_num N_int
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context
PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count
end
subroutine run_wf
@ -23,7 +23,7 @@ subroutine run_wf
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
double precision :: energy(N_states)
character*(64) :: states(2)
character*(64) :: states(4)
integer :: rc, i
call provide_everything
@ -31,6 +31,7 @@ subroutine run_wf
zmq_context = f77_zmq_ctx_new ()
states(1) = 'selection'
states(2) = 'davidson'
states(3) = 'pt2'
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
@ -52,7 +53,7 @@ subroutine run_wf
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call selection_slave_tcp(i, energy)
call run_selection_slave(0,i,energy)
!$OMP END PARALLEL
print *, 'Selection done'
@ -62,46 +63,29 @@ subroutine run_wf
! --------
print *, 'Davidson'
call davidson_miniserver_get()
call davidson_slave_tcp(i)
print *, 'Davidson done'
else if (trim(zmq_state) == 'pt2') then
! PT2
! ---
print *, 'PT2'
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
logical :: lstop
lstop = .False.
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call davidson_slave_tcp(i)
call run_pt2_slave(0,i,energy,lstop)
!$OMP END PARALLEL
print *, 'Davidson done'
print *, 'PT2 done'
endif
end do
end
subroutine update_energy(energy)
implicit none
double precision, intent(in) :: energy(N_states)
BEGIN_DOC
! Update energy when it is received from ZMQ
END_DOC
integer :: j,k
do j=1,N_states
do k=1,N_det
CI_eigenvectors(k,j) = psi_coef(k,j)
enddo
enddo
call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int)
if (.True.) then
do k=1,N_states
ci_electronic_energy(k) = energy(k)
enddo
TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors
endif
call write_double(6,ci_energy,'Energy')
end
subroutine selection_slave_tcp(i,energy)
implicit none
double precision, intent(in) :: energy(N_states)
integer, intent(in) :: i
call run_selection_slave(0,i,energy)
end

View File

@ -0,0 +1,126 @@
subroutine ZMQ_selection(N_in, pt2)
use f77_zmq
use selection_types
implicit none
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer, intent(in) :: N_in
type(selection_buffer) :: b
integer :: i, N
integer, external :: omp_get_thread_num
double precision, intent(out) :: pt2(N_states)
integer, parameter :: maxtasks=10000
PROVIDE fragment_count
N = max(N_in,1)
if (.True.) then
PROVIDE pt2_e0_denominator
provide nproc
call new_parallel_job(zmq_to_qp_run_socket,"selection")
call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator))
call create_selection_buffer(N, N*2, b)
endif
character*(20*maxtasks) :: task
task = ' '
integer :: k
k=0
do i= 1, N_det_generators
k = k+1
write(task(20*(k-1)+1:20*k),'(I9,1X,I9,''|'')') i, N
if (k>=maxtasks) then
k=0
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
endif
end do
if (k > 0) then
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
endif
call zmq_set_running(zmq_to_qp_run_socket)
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1)
i = omp_get_thread_num()
if (i==0) then
call selection_collector(b, pt2)
else
call selection_slave_inproc(i)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'selection')
if (N_in > 0) then
call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
call copy_H_apply_buffer_to_wf()
if (s2_eig) then
call make_s2_eigenfunction
endif
call save_wavefunction
endif
end subroutine
subroutine selection_slave_inproc(i)
implicit none
integer, intent(in) :: i
call run_selection_slave(1,i,pt2_e0_denominator)
end
subroutine selection_collector(b, pt2)
use f77_zmq
use selection_types
use bitmasks
implicit none
type(selection_buffer), intent(inout) :: b
double precision, intent(out) :: pt2(N_states)
double precision :: pt2_mwen(N_states)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull
integer :: msg_size, rc, more
integer :: acc, i, j, robin, N, ntask
double precision, allocatable :: val(:)
integer(bit_kind), allocatable :: det(:,:,:)
integer, allocatable :: task_id(:)
integer :: done
real :: time, time0
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
zmq_socket_pull = new_zmq_pull_socket()
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators))
done = 0
more = 1
pt2(:) = 0d0
call CPU_TIME(time0)
do while (more == 1)
call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask)
pt2 += pt2_mwen
do i=1, N
call add_to_selection_buffer(b, det(1,1,i), val(i))
end do
do i=1, ntask
if(task_id(i) == 0) then
print *, "Error in collector"
endif
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
end do
done += ntask
call CPU_TIME(time)
! print *, "DONE" , done, time - time0
end do
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_pull_socket(zmq_socket_pull)
call sort_selection_buffer(b)
end subroutine

View File

@ -90,13 +90,13 @@ subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
psi_det_size = psi_det_size_read
TOUCH psi_det_size N_det N_states
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,0)
if (rc /= N_int*2*N_det*bit_kind) then
print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,0)
if (rc /= psi_det_size*N_states*8) then
print *, '77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)'
stop 'error'

View File

@ -7,13 +7,13 @@ default: 1.e-12
[n_states_diag]
type: States_number
doc: Number of states to consider during the Davdison diagonalization
default: 10
default: 4
interface: ezfio,provider,ocaml
[davidson_sze_max]
type: Strictly_positive_int
doc: Number of micro-iterations before re-contracting
default: 10
default: 8
interface: ezfio,provider,ocaml
[state_following]

View File

@ -1,191 +1,6 @@
!brought to you by garniroy inc.
use bitmasks
use f77_zmq
subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
implicit none
integer , intent(in) :: blockb, bs, blockb2, istep
integer , intent(inout) :: N
integer , intent(inout) :: idx(bs)
double precision , intent(inout) :: vt(N_states_diag, bs)
double precision , intent(inout) :: st(N_states_diag, bs)
integer :: i,ii, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi
integer(bit_kind) :: sorted_i(N_int)
double precision :: s2, hij
logical, allocatable :: wrotten(:)
allocate(wrotten(bs))
wrotten = .false.
PROVIDE dav_det
ii=0
sh = blockb
do sh2=1,shortcut_(0,1)
exa = 0
do ni=1,N_int
exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1)))
end do
if(exa > 2) cycle
do i=blockb2+shortcut_(sh,1),shortcut_(sh+1,1)-1, istep
ii = i - shortcut_(blockb,1) + 1
org_i = sort_idx_(i,1)
do ni=1,N_int
sorted_i(ni) = sorted_(ni,i,1)
enddo
do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1
if(i == j) cycle
org_j = sort_idx_(j,1)
ext = exa
do ni=1,N_int
ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1)))
end do
if(ext <= 4) then
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
vt (:,ii) = 0d0
st (:,ii) = 0d0
end if
do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j)
enddo
endif
enddo
enddo
enddo
if (blockb <= shortcut_(0,2)) then
sh=blockb
do sh2=sh, shortcut_(0,2), shortcut_(0,1)
do i=blockb2+shortcut_(sh2,2),shortcut_(sh2+1,2)-1, istep
ii += 1
org_i = sort_idx_(i,2)
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
if(i == j) cycle
org_j = sort_idx_(j,2)
ext = 0
do ni=1,N_int
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
end do
if(ext == 4) then
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
vt (:,ii) = 0d0
st (:,ii) = 0d0
end if
do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j)
enddo
end if
end do
end do
enddo
endif
N=0
do i=1,bs
if(wrotten(i)) then
N += 1
idx(N) = idx(i)
vt(:,N) = vt(:,i)
st(:,N) = st(:,i)
end if
end do
end subroutine
subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
implicit none
integer , intent(in) :: N
integer , intent(in) :: idx(N)
double precision , intent(in) :: vt(N_states_diag, N)
double precision , intent(in) :: st(N_states_diag, N)
double precision , intent(inout) :: v0t(N_states_diag,dav_size)
double precision , intent(inout) :: s0t(N_states_diag,dav_size)
integer :: i, j, k
!DIR$ IVDEP
do i=1,N
k = idx(i)
!DIR$ IVDEP
do j=1,N_states_diag
v0t(j,k) = v0t(j,k) + vt(j,i)
s0t(j,k) = s0t(j,k) + st(j,i)
enddo
end do
end subroutine
subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
use f77_zmq
implicit none
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
integer, intent(in) :: n, n_st_8
double precision, intent(in) :: ut(n_st_8,n)
integer :: i,k
dav_size = n
touch dav_size
do i=1,n
do k=1,N_int
dav_det(k,1,i) = psi_det(k,1,i)
dav_det(k,2,i) = psi_det(k,2,i)
enddo
enddo
do i=1,n
do k=1,N_states_diag
dav_ut(k,i) = ut(k,i)
enddo
enddo
touch dav_det dav_ut
call new_parallel_job(zmq_to_qp_run_socket,"davidson")
end subroutine
subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep)
use f77_zmq
implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket
integer ,intent(in) :: blockb, blockb2, istep
character*(512) :: task
write(task,*) blockb, blockb2, istep
call add_task_to_taskserver(zmq_to_qp_run_socket, task)
end subroutine
subroutine davidson_slave_inproc(i)
implicit none
@ -211,8 +26,6 @@ subroutine davidson_run_slave(thread,iproc)
integer, intent(in) :: thread, iproc
integer :: worker_id, task_id, blockb
character*(512) :: task
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
@ -231,7 +44,7 @@ subroutine davidson_run_slave(thread,iproc)
return
end if
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread)
@ -239,338 +52,346 @@ end subroutine
subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, worker_id)
use f77_zmq
implicit none
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),intent(in) :: zmq_socket_push
integer,intent(in) :: worker_id
integer :: task_id
character*(512) :: task
integer,intent(in) :: worker_id, N_st, sze
integer :: task_id
character*(512) :: msg
integer :: imin, imax, ishift, istep
double precision, allocatable :: v_0(:,:), s_0(:,:), u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0
integer :: blockb, blockb2, istep
integer :: N
integer , allocatable :: idx(:)
double precision , allocatable :: vt(:,:)
double precision , allocatable :: st(:,:)
! Get wave function (u_t)
! -----------------------
integer :: rc
write(msg, *) 'get_psi ', worker_id
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
if (rc /= len(trim(msg))) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
if (msg(1:13) /= 'get_psi_reply') then
print *, rc, trim(msg)
print *, 'Error in get_psi_reply'
stop 'error'
endif
integer :: N_states_read, N_det_read, psi_det_size_read
integer :: N_det_selectors_read, N_det_generators_read
double precision :: energy(N_st)
read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, &
N_det_generators_read, N_det_selectors_read
if (rc /= worker_id) then
print *, 'Wrong worker ID'
stop 'error'
endif
integer :: bs, i, j
allocate(idx(1), vt(1,1), st(1,1))
if (N_states_read /= N_st) then
print *, N_st
stop 'error : N_st'
endif
if (N_det_read /= N_det) then
N_det = N_det_read
TOUCH N_det
endif
allocate(v_0(sze,N_st), s_0(sze,N_st),u_t(N_st,N_det))
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,0)
if (rc /= N_int*2*N_det*bit_kind) then
print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)*8,0)
if (rc /= size(u_t)*8) then
print *, rc, size(u_t)*8
print *, 'f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)×8,0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)
if (rc /= N_st*8) then
print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)'
stop 'error'
endif
! Run tasks
! ---------
do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
v_0 = 0.d0
s_0 = 0.d0
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
if(task_id == 0) exit
read (task,*) blockb, blockb2, istep
bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1)
do i=blockb, shortcut_(0,2), shortcut_(0,1)
do j=i, min(i, shortcut_(0,2))
bs += shortcut_(j+1,2) - shortcut_(j, 2)
end do
end do
if(bs > size(idx)) then
deallocate(idx, vt, st)
allocate(idx(bs))
allocate(vt(N_states_diag, bs))
allocate(st(N_states_diag, bs))
end if
call davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
read (msg,*) imin, imax, ishift, istep
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,N_det,imin,imax,ishift,istep)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
call davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
end do
deallocate(v_0, s_0, u_t)
end subroutine
subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id)
subroutine davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
use f77_zmq
implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
integer ,intent(in) :: task_id
integer ,intent(in) :: blockb, blocke
integer ,intent(in) :: N
integer ,intent(in) :: idx(N)
double precision ,intent(in) :: vt(N_states_diag, N)
double precision ,intent(in) :: st(N_states_diag, N)
double precision ,intent(in) :: v_0(N_det,N_states_diag)
double precision ,intent(in) :: s_0(N_det,N_states_diag)
integer :: rc
rc = f77_zmq_send( zmq_socket_push, blockb, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "davidson_push_results failed to push blockb"
rc = f77_zmq_send( zmq_socket_push, v_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push vt"
rc = f77_zmq_send( zmq_socket_push, blocke, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "davidson_push_results failed to push blocke"
rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE)
if(rc /= 4) stop "davidson_push_results failed to push N"
rc = f77_zmq_send( zmq_socket_push, idx, 4*N, ZMQ_SNDMORE)
if(rc /= 4*N) stop "davidson_push_results failed to push idx"
rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states_diag* N, ZMQ_SNDMORE)
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push vt"
rc = f77_zmq_send( zmq_socket_push, st, 8*N_states_diag* N, ZMQ_SNDMORE)
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push st"
rc = f77_zmq_send( zmq_socket_push, s_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push st"
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to push task_id"
! Activate is zmq_socket_push is a REQ
integer :: idummy
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
if (rc /= 4) then
print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
stop 'error'
endif
end subroutine
subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
subroutine davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
use f77_zmq
implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
integer ,intent(out) :: task_id
integer ,intent(out) :: blockb, blocke
integer ,intent(out) :: N
integer ,intent(out) :: idx(*)
double precision ,intent(out) :: vt(N_states_diag, *)
double precision ,intent(out) :: st(N_states_diag, *)
double precision ,intent(out) :: v_0(N_det,N_states_diag)
double precision ,intent(out) :: s_0(N_det,N_states_diag)
integer :: rc
rc = f77_zmq_recv( zmq_socket_pull, blockb, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to pull blockb"
rc = f77_zmq_recv( zmq_socket_pull, blocke, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to pull blocke"
rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
if(rc /= 4) stop "davidson_push_results failed to pull N"
rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0)
if(rc /= 4*N) stop "davidson_push_results failed to pull idx"
rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*N_det*N_states_diag, 0)
if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull v_0"
rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states_diag* N, 0)
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull vt"
rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states_diag* N, 0)
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull st"
rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*N_det*N_states_diag, 0)
if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull s_0"
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
! Activate if zmq_socket_pull is a REP
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
if (rc /= 4) then
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
stop 'error'
endif
end subroutine
subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LDA)
subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze, N_st)
use f77_zmq
implicit none
integer :: LDA
integer, intent(in) :: sze, N_st
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
double precision ,intent(inout) :: v0(LDA, N_states_diag)
double precision ,intent(inout) :: s0(LDA, N_states_diag)
double precision ,intent(inout) :: v0(sze, N_st)
double precision ,intent(inout) :: s0(sze, N_st)
integer :: more, task_id, taskn
integer :: more, task_id
integer :: blockb, blocke
integer :: N
integer , allocatable :: idx(:)
double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:)
double precision , allocatable :: st(:,:)
integer :: msize
msize = (1 + max_blocksize)*2
allocate(idx(msize))
allocate(vt(N_states_diag, msize))
allocate(st(N_states_diag, msize))
allocate(v0t(N_states_diag, dav_size))
allocate(s0t(N_states_diag, dav_size))
v0t = 00.d0
s0t = 00.d0
more = 1
do while (more == 1)
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
!DIR$ FORCEINLINE
call davidson_collect(N, idx, vt, st , v0t, s0t)
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
end do
deallocate(idx,vt,st)
double precision, allocatable :: v_0(:,:), s_0(:,:)
integer :: i,j
!DIR$ IVDEP
do j=1,N_states_diag
!DIR$ IVDEP
do i=1,dav_size
v0(i,j) = v0t(j,i)
s0(i,j) = s0t(j,i)
enddo
enddo
deallocate(v0t,s0t)
end subroutine
subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
use f77_zmq
implicit none
integer :: LDA
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_collector
integer(ZMQ_PTR), external :: new_zmq_pull_socket
integer(ZMQ_PTR) :: zmq_socket_pull
integer :: i
integer, external :: omp_get_thread_num
double precision , intent(inout) :: v0(LDA, N_states_diag)
double precision , intent(inout) :: s0(LDA, N_states_diag)
call zmq_set_running(zmq_to_qp_run_socket)
zmq_collector = new_zmq_to_qp_run_socket()
allocate(v_0(N_det,N_st), s_0(N_det,N_st))
v0 = 0.d0
s0 = 0.d0
more = 1
zmq_socket_pull = new_zmq_pull_socket()
i = omp_get_thread_num()
PROVIDE nproc
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
i = omp_get_thread_num()
if (i == 0 ) then
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
call end_zmq_to_qp_run_socket(zmq_collector)
call end_zmq_pull_socket(zmq_socket_pull)
call davidson_miniserver_end()
else if (i == 1 ) then
call davidson_miniserver_run ()
else
call davidson_slave_inproc(i)
endif
!$OMP END PARALLEL
do while (more == 1)
call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
do j=1,N_st
do i=1,N_det
v0(i,j) = v0(i,j) + v_0(i,j)
s0(i,j) = s0(i,j) + s_0(i,j)
enddo
enddo
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
end do
deallocate(v_0,s_0)
call end_zmq_pull_socket(zmq_socket_pull)
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
end subroutine
subroutine davidson_miniserver_run()
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
use omp_lib
use bitmasks
use f77_zmq
implicit none
integer(ZMQ_PTR) responder
character*(64) address
character(len=:), allocatable :: buffer
integer rc
BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
!
! S2_jj : array of <j|S^2|j>
END_DOC
integer, intent(in) :: N_st, sze
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
double precision, intent(inout):: u_0(sze,N_st)
integer :: i,j,k
integer :: ithread
double precision, allocatable :: u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
allocate (character(len=20) :: buffer)
address = 'tcp://*:11223'
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
responder = f77_zmq_socket(zmq_context, ZMQ_REP)
rc = f77_zmq_bind(responder,address)
if(N_st /= N_states_diag .or. sze < N_det) stop "assert fail in H_S2_u_0_nstates"
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
call new_parallel_job(zmq_to_qp_run_socket,'davidson')
do
rc = f77_zmq_recv(responder, buffer, 5, 0)
if (buffer(1:rc) /= 'end') then
rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE)
rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE)
rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0)
else
rc = f77_zmq_send (responder, "end", 3, 0)
exit
character*(512) :: task
integer :: rc
double precision :: energy(N_st)
energy = 0.d0
task = ' '
write(task,*) 'put_psi ', 1, N_st, N_det, N_det
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(task),len(trim(task)),ZMQ_SNDMORE)
if (rc /= len(trim(task))) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(task),len(trim(task)),ZMQ_SNDMORE)'
stop 'error'
endif
rc = f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)
if (rc /= N_int*2*N_det*bit_kind) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
stop 'error'
endif
rc = f77_zmq_send(zmq_to_qp_run_socket,u_t,size(u_t)*8,ZMQ_SNDMORE)
if (rc /= size(u_t)*8) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,u_t,size(u_t)*8,ZMQ_SNDMORE)'
stop 'error'
endif
rc = f77_zmq_send(zmq_to_qp_run_socket,energy,N_st*8,0)
if (rc /= N_st*8) then
print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket,task,len(task),0)
if (task(1:rc) /= 'put_psi_reply 1') then
print *, rc, trim(task)
print *, 'Error in put_psi_reply'
stop 'error'
endif
deallocate(u_t)
! Create tasks
! ============
integer :: istep, imin, imax, ishift
double precision :: w, max_workload, N_det_inv, di
max_workload = 1000000.d0
w = 0.d0
istep=8
ishift=0
imin=1
N_det_inv = 1.d0/dble(N_det)
di = dble(N_det)
do imax=1,N_det
di = di-1.d0
w = w + di*N_det_inv
if (w > max_workload) then
do ishift=0,istep-1
write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task))
enddo
imin = imax+1
w = 0.d0
endif
enddo
if (w > 0.d0) then
imax = N_det
do ishift=0,istep-1
write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task))
enddo
endif
rc = f77_zmq_close(responder)
end subroutine
subroutine davidson_miniserver_end()
implicit none
use f77_zmq
integer(ZMQ_PTR) requester
character*(64) address
integer rc
character*(64) buf
address = trim(qp_run_address)//':11223'
requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
rc = f77_zmq_connect(requester,address)
rc = f77_zmq_send(requester, "end", 3, 0)
rc = f77_zmq_recv(requester, buf, 3, 0)
rc = f77_zmq_close(requester)
end subroutine
subroutine davidson_miniserver_get()
implicit none
use f77_zmq
integer(ZMQ_PTR) requester
character*(64) address
character*(20) buffer
integer rc
address = trim(qp_run_address)//':11223'
requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
rc = f77_zmq_connect(requester,address)
rc = f77_zmq_send(requester, "Hello", 5, 0)
rc = f77_zmq_recv(requester, dav_size, 4, 0)
TOUCH dav_size
rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0)
rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0)
TOUCH dav_det dav_ut
end subroutine
BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ]
&BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ]
use bitmasks
implicit none
BEGIN_DOC
! Temporary arrays for parallel davidson
!
! Touched in davidson_miniserver_get
END_DOC
dav_det = 0_bit_kind
dav_ut = -huge(1.d0)
END_PROVIDER
BEGIN_PROVIDER [ integer, dav_size ]
implicit none
BEGIN_DOC
! Size of the arrays for Davidson
!
! Touched in davidson_miniserver_get
END_DOC
dav_size = 1
END_PROVIDER
BEGIN_PROVIDER [ integer, shortcut_, (0:dav_size+1, 2) ]
&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, dav_size, 2) ]
&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, dav_size, 2) ]
&BEGIN_PROVIDER [ integer, sort_idx_, (dav_size, 2) ]
&BEGIN_PROVIDER [ integer, max_blocksize ]
implicit none
call sort_dets_ab_v(dav_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), dav_size, N_int)
call sort_dets_ba_v(dav_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), dav_size, N_int)
max_blocksize = max(shortcut_(0,1), shortcut_(0,2))
END_PROVIDER
v_0 = 0.d0
s_0 = 0.d0
call omp_set_nested(.True.)
call zmq_set_running(zmq_to_qp_run_socket)
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num()
if (ithread == 0 ) then
call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, N_det, N_st)
else
call davidson_slave_inproc(1)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
do k=1,N_st
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
enddo
end

View File

@ -16,24 +16,17 @@ program davidson_slave
state = 'Waiting'
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
do
call wait_for_state(zmq_state,state)
if(trim(state) /= "davidson") exit
call davidson_miniserver_get()
integer :: rc, i
print *, 'Davidson slave running'
!$OMP PARALLEL PRIVATE(i)
i = omp_get_thread_num()
call davidson_slave_tcp(i)
!$OMP END PARALLEL
end do
end
subroutine provide_everything
PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states_diag zmq_context
PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states_diag zmq_context ref_bitmask_energy
end subroutine

View File

@ -302,7 +302,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag)
integer :: sze_8
integer :: iter
integer :: i,j,k,l,m
logical :: converged
@ -365,13 +364,12 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
write(iunit,'(A)') trim(write_buffer)
integer, external :: align_double
sze_8 = align_double(sze)
allocate( &
kl_pairs(2,N_st_diag*(N_st_diag+1)/2), &
W(sze_8,N_st_diag,davidson_sze_max), &
U(sze_8,N_st_diag,davidson_sze_max), &
R(sze_8,N_st_diag), &
W(sze,N_st_diag,davidson_sze_max), &
U(sze,N_st_diag,davidson_sze_max), &
R(sze,N_st_diag), &
h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), &
y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), &
residual_norm(N_st_diag), &
@ -426,7 +424,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
! Compute |W_k> = \sum_i |i><i|H|u_k>
! -----------------------------------------
call H_u_0_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,N_st_diag,sze_8)
call H_u_0_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,N_st_diag,sze)
! do k=1,N_st
! if(store_full_H_mat.and.sze.le.n_det_max_stored)then
! call H_u_0_stored(W(1,k,iter),U(1,k,iter),H_matrix_all_dets,sze)

View File

@ -23,41 +23,33 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag), s2_out(N_st_diag)
double precision, allocatable :: H_jj(:), S2_jj(:)
double precision, allocatable :: H_jj(:)
double precision :: diag_h_mat_elem
double precision :: diag_H_mat_elem, diag_S_mat_elem
integer :: i
ASSERT (N_st > 0)
ASSERT (sze > 0)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
PROVIDE mo_bielec_integrals_in_map
allocate(H_jj(sze), S2_jj(sze))
allocate(H_jj(sze) )
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(sze,H_jj,S2_jj, dets_in,Nint) &
!$OMP SHARED(sze,H_jj, dets_in,Nint) &
!$OMP PRIVATE(i)
!$OMP DO SCHEDULE(guided)
!$OMP DO SCHEDULE(static)
do i=1,sze
H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint)
call get_s2(dets_in(1,1,i),dets_in(1,1,i),Nint,S2_jj(i))
H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint)
enddo
!$OMP END DO
!$OMP END PARALLEL
if (disk_based_davidson) then
call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
else
call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
endif
do i=1,N_st_diag
s2_out(i) = S2_jj(i)
enddo
deallocate (H_jj,S2_jj)
call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
deallocate (H_jj)
end
subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
use bitmasks
implicit none
BEGIN_DOC
@ -65,7 +57,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
!
! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
!
! S2_jj : specific diagonal S^2 matrix elements
! S2_out : Output : s^2
!
! dets_in : bitmasks corresponding to determinants
!
@ -87,12 +79,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
double precision, intent(in) :: H_jj(sze)
double precision, intent(inout) :: S2_jj(sze)
double precision, intent(inout) :: s2_out(N_st_diag)
integer, intent(in) :: iunit
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag)
integer :: sze_8
integer :: iter
integer :: i,j,k,l,m
logical :: converged
@ -122,7 +113,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
stop -1
endif
PROVIDE nuclear_repulsion expected_s2
integer, external :: align_double
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
call write_time(iunit)
call wall_time(wall)
@ -134,6 +128,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
call write_int(iunit,N_st,'Number of states')
call write_int(iunit,N_st_diag,'Number of states in diagonalization')
call write_int(iunit,sze,'Number of determinants')
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
+ 4.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
call write_double(iunit, r1, 'Memory(Gb)')
write(iunit,'(A)') ''
write_buffer = '===== '
do i=1,N_st
@ -151,14 +148,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
enddo
write(iunit,'(A)') trim(write_buffer)
integer, external :: align_double
sze_8 = align_double(sze)
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
allocate( &
W(sze_8,N_st_diag*itermax), &
U(sze_8,N_st_diag*itermax), &
S(sze_8,N_st_diag*itermax), &
! Large
W(sze,N_st_diag*itermax), &
U(sze,N_st_diag*itermax), &
S(sze,N_st_diag*itermax), &
! Small
h(N_st_diag*itermax,N_st_diag*itermax), &
y(N_st_diag*itermax,N_st_diag*itermax), &
s_(N_st_diag*itermax,N_st_diag*itermax), &
@ -202,7 +199,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
do k=1,N_st_diag
call normalize(u_in(1,k),sze)
enddo
do while (.not.converged)
@ -223,8 +220,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
! -----------------------------------------
! call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
if (distributed_davidson) then
call H_S2_u_0_nstates_zmq (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
else
call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
endif
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
@ -400,7 +400,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
endif
enddo
write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(1:3,1:N_st)
write(iunit,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter, to_print(1:3,1:N_st)
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
do k=1,N_st
if (residual_norm(k) > 1.e8) then
@ -424,7 +424,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
do k=1,N_st_diag
energies(k) = lambda(k)
S2_jj(k) = s2(k)
s2_out(k) = s2(k)
enddo
write_buffer = '===== '
do i=1,N_st
@ -444,439 +444,3 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
)
end
subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
use bitmasks
use mmap_module
implicit none
BEGIN_DOC
! Davidson diagonalization with specific diagonal elements of the H matrix
!
! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
!
! S2_jj : specific diagonal S^2 matrix elements
!
! dets_in : bitmasks corresponding to determinants
!
! u_in : guess coefficients on the various states. Overwritten
! on exit
!
! dim_in : leftmost dimension of u_in
!
! sze : Number of determinants
!
! N_st : Number of eigenstates
!
! N_st_diag : Number of states in which H is diagonalized. Assumed > sze
!
! iunit : Unit for the I/O
!
! Initial guess vectors are not necessarily orthonormal
END_DOC
integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
double precision, intent(in) :: H_jj(sze)
double precision, intent(inout) :: S2_jj(sze)
integer, intent(in) :: iunit
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag)
integer :: sze_8
integer :: iter
integer :: i,j,k,l,m
logical :: converged
double precision :: u_dot_v, u_dot_u
integer :: k_pairs, kl
integer :: iter2
double precision, pointer :: W(:,:), U(:,:), S(:,:), overlap(:,:)
double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:)
double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:)
double precision :: diag_h_mat_elem
double precision, allocatable :: residual_norm(:)
character*(16384) :: write_buffer
double precision :: to_print(3,N_st)
double precision :: cpu, wall
logical :: state_ok(N_st_diag*davidson_sze_max)
integer :: shift, shift2, itermax
include 'constants.include.F'
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
if (N_st_diag*3 > sze) then
print *, 'error in Davidson :'
print *, 'Increase n_det_max_jacobi to ', N_st_diag*3
stop -1
endif
PROVIDE nuclear_repulsion expected_s2
call write_time(iunit)
call wall_time(wall)
call cpu_time(cpu)
write(iunit,'(A)') ''
write(iunit,'(A)') 'Davidson Diagonalization'
write(iunit,'(A)') '------------------------'
write(iunit,'(A)') ''
call write_int(iunit,N_st,'Number of states')
call write_int(iunit,N_st_diag,'Number of states in diagonalization')
call write_int(iunit,sze,'Number of determinants')
write(iunit,'(A)') ''
write_buffer = '===== '
do i=1,N_st
write_buffer = trim(write_buffer)//' ================ =========== ==========='
enddo
write(iunit,'(A)') trim(write_buffer)
write_buffer = ' Iter'
do i=1,N_st
write_buffer = trim(write_buffer)//' Energy S^2 Residual '
enddo
write(iunit,'(A)') trim(write_buffer)
write_buffer = '===== '
do i=1,N_st
write_buffer = trim(write_buffer)//' ================ =========== ==========='
enddo
write(iunit,'(A)') trim(write_buffer)
integer, external :: align_double
integer :: fd(3)
type(c_ptr) :: c_pointer(3)
sze_8 = align_double(sze)
itermax = min(davidson_sze_max, sze/N_st_diag)
call mmap( &
trim(ezfio_work_dir)//'U', &
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
8, fd(1), .False., c_pointer(1))
call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) )
call mmap( &
trim(ezfio_work_dir)//'W', &
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
8, fd(2), .False., c_pointer(2))
call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) )
call mmap( &
trim(ezfio_work_dir)//'S', &
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
8, fd(3), .False., c_pointer(3))
call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) )
allocate( &
h(N_st_diag*itermax,N_st_diag*itermax), &
y(N_st_diag*itermax,N_st_diag*itermax), &
s_(N_st_diag*itermax,N_st_diag*itermax), &
s_tmp(N_st_diag*itermax,N_st_diag*itermax), &
overlap(N_st_diag*itermax, N_st_diag*itermax), &
residual_norm(N_st_diag), &
c(N_st_diag*itermax), &
s2(N_st_diag*itermax), &
lambda(N_st_diag*itermax))
h = 0.d0
U = 0.d0
W = 0.d0
S = 0.d0
y = 0.d0
s_ = 0.d0
s_tmp = 0.d0
ASSERT (N_st > 0)
ASSERT (N_st_diag >= N_st)
ASSERT (sze > 0)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
! Davidson iterations
! ===================
converged = .False.
double precision :: r1, r2
do k=N_st+1,N_st_diag
u_in(k,k) = 10.d0
do i=1,sze
call random_number(r1)
r1 = dsqrt(-2.d0*dlog(r1))
r2 = dtwo_pi*r2
u_in(i,k) = r1*dcos(r2)
enddo
enddo
do k=1,N_st_diag
call normalize(u_in(1,k),sze)
enddo
do while (.not.converged)
do k=1,N_st_diag
do i=1,sze
U(i,k) = u_in(i,k)
enddo
enddo
do iter=1,itermax-1
shift = N_st_diag*(iter-1)
shift2 = N_st_diag*iter
call ortho_qr(U,size(U,1),sze,shift2)
! Compute |W_k> = \sum_i |i><i|H|u_k>
! -----------------------------------------
! call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
! -------------------------------------------
do k=1,iter
shift = N_st_diag*(k-1)
call dgemm('T','N', N_st_diag, shift2, sze, &
1.d0, U(1,shift+1), size(U,1), W, size(W,1), &
0.d0, h(shift+1,1), size(h,1))
call dgemm('T','N', N_st_diag, shift2, sze, &
1.d0, U(1,shift+1), size(U,1), S, size(S,1), &
0.d0, s_(shift+1,1), size(s_,1))
enddo
! ! Diagonalize S^2
! ! ---------------
!
! call lapack_diag(s2,y,s_,size(s_,1),shift2)
!
!
! ! Rotate H in the basis of eigenfunctions of s2
! ! ---------------------------------------------
!
! call dgemm('N','N',shift2,shift2,shift2, &
! 1.d0, h, size(h,1), y, size(y,1), &
! 0.d0, s_tmp, size(s_tmp,1))
!
! call dgemm('T','N',shift2,shift2,shift2, &
! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
! 0.d0, h, size(h,1))
!
! ! Damp interaction between different spin states
! ! ------------------------------------------------
!
! do k=1,shift2
! do l=1,shift2
! if (dabs(s2(k) - s2(l)) > 1.d0) then
! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
! endif
! enddo
! enddo
!
! ! Rotate back H
! ! -------------
!
! call dgemm('N','T',shift2,shift2,shift2, &
! 1.d0, h, size(h,1), y, size(y,1), &
! 0.d0, s_tmp, size(s_tmp,1))
!
! call dgemm('N','N',shift2,shift2,shift2, &
! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
! 0.d0, h, size(h,1))
! Diagonalize h
! -------------
call lapack_diag(lambda,y,h,size(h,1),shift2)
! Compute S2 for each eigenvector
! -------------------------------
call dgemm('N','N',shift2,shift2,shift2, &
1.d0, s_, size(s_,1), y, size(y,1), &
0.d0, s_tmp, size(s_tmp,1))
call dgemm('T','N',shift2,shift2,shift2, &
1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
0.d0, s_, size(s_,1))
do k=1,shift2
s2(k) = s_(k,k) + S_z2_Sz
enddo
if (s2_eig) then
do k=1,shift2
state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
enddo
else
state_ok(k) = .True.
endif
do k=1,shift2
if (.not. state_ok(k)) then
do l=k+1,shift2
if (state_ok(l)) then
call dswap(shift2, y(1,k), 1, y(1,l), 1)
call dswap(1, s2(k), 1, s2(l), 1)
call dswap(1, lambda(k), 1, lambda(l), 1)
state_ok(k) = .True.
state_ok(l) = .False.
exit
endif
enddo
endif
enddo
if (state_following) then
! Compute overlap with U_in
! -------------------------
integer :: order(N_st_diag)
double precision :: cmax
overlap = -1.d0
do k=1,shift2
do i=1,shift2
overlap(k,i) = dabs(y(k,i))
enddo
enddo
do k=1,N_st
cmax = -1.d0
do i=1,shift2
if (overlap(i,k) > cmax) then
cmax = overlap(i,k)
order(k) = i
endif
enddo
do i=1,shift2
overlap(order(k),i) = -1.d0
enddo
enddo
overlap = y
do k=1,N_st
l = order(k)
if (k /= l) then
y(1:shift2,k) = overlap(1:shift2,l)
endif
enddo
do k=1,N_st
overlap(k,1) = lambda(k)
overlap(k,2) = s2(k)
enddo
do k=1,N_st
l = order(k)
if (k /= l) then
lambda(k) = overlap(l,1)
s2(k) = overlap(l,2)
endif
enddo
endif
! Express eigenvectors of h in the determinant basis
! --------------------------------------------------
call dgemm('N','N', sze, N_st_diag, shift2, &
1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
call dgemm('N','N', sze, N_st_diag, shift2, &
1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
call dgemm('N','N', sze, N_st_diag, shift2, &
1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1))
! Compute residual vector and davidson step
! -----------------------------------------
do k=1,N_st_diag
if (state_ok(k)) then
do i=1,sze
U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) &
* (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
)/max(H_jj(i) - lambda (k),1.d-2)
enddo
else
! Randomize components with bad <S2>
do i=1,sze-2,2
call random_number(r1)
call random_number(r2)
r1 = dsqrt(-2.d0*dlog(r1))
r2 = dtwo_pi*r2
U(i,shift2+k) = r1*dcos(r2)
U(i+1,shift2+k) = r1*dsin(r2)
enddo
do i=sze-2+1,sze
call random_number(r1)
call random_number(r2)
r1 = dsqrt(-2.d0*dlog(r1))
r2 = dtwo_pi*r2
U(i,shift2+k) = r1*dcos(r2)
enddo
endif
if (k <= N_st) then
residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
to_print(1,k) = lambda(k) + nuclear_repulsion
to_print(2,k) = s2(k)
to_print(3,k) = residual_norm(k)
endif
enddo
write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(1:3,1:N_st)
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
do k=1,N_st
if (residual_norm(k) > 1.e8) then
print *, ''
stop 'Davidson failed'
endif
enddo
if (converged) then
exit
endif
enddo
! Re-contract to u_in
! -----------
call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, &
U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
enddo
do k=1,N_st_diag
energies(k) = lambda(k)
S2_jj(k) = s2(k)
enddo
write_buffer = '===== '
do i=1,N_st
write_buffer = trim(write_buffer)//' ================ =========== ==========='
enddo
write(iunit,'(A)') trim(write_buffer)
write(iunit,'(A)') ''
call write_time(iunit)
call munmap( &
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
8, fd(1), c_pointer(1))
call munmap( &
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
8, fd(2), c_pointer(2))
call munmap( &
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
8, fd(3), c_pointer(3))
deallocate ( &
residual_norm, &
c, overlap, &
h, &
y, s_, s_tmp, &
lambda &
)
end

View File

@ -66,7 +66,6 @@ END_PROVIDER
call davidson_diag_HS2(psi_det,CI_eigenvectors, CI_eigenvectors_s2, &
size(CI_eigenvectors,1),CI_electronic_energy, &
N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,output_determinants)
else if (diag_algorithm == "Lapack") then

View File

@ -1,4 +1,4 @@
subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
use bitmasks
implicit none
BEGIN_DOC
@ -7,166 +7,20 @@ subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
! n : number of determinants
!
END_DOC
integer, intent(in) :: n,Nint, N_st, sze_8
integer, intent(in) :: n,Nint, N_st, sze
double precision, intent(out) :: e_0(N_st)
double precision, intent(in) :: u_0(sze_8,N_st)
double precision, intent(inout):: u_0(sze,N_st)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision, allocatable :: H_jj(:), v_0(:,:)
double precision, allocatable :: v_0(:,:), s_0(:,:)
double precision :: u_dot_u,u_dot_v,diag_H_mat_elem
integer :: i,j
allocate (H_jj(n), v_0(sze_8,N_st))
do i = 1, n
H_jj(i) = diag_H_mat_elem(keys_tmp(1,1,i),Nint)
enddo
call H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
allocate (v_0(sze,N_st),s_0(sze,N_st))
call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
do i=1,N_st
e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
enddo
deallocate (H_jj, v_0)
end
subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
END_DOC
integer, intent(in) :: N_st,n,Nint, sze_8
double precision, intent(out) :: v_0(sze_8,N_st)
double precision, intent(in) :: u_0(sze_8,N_st)
double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij
double precision, allocatable :: vt(:,:)
double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8
integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
N_st_8 = align_double(N_st)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
allocate(ut(N_st_8,n))
v_0 = 0.d0
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(i,istate)
enddo
enddo
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
!$OMP SHARED(n,H_jj,keys_tmp,ut,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
allocate(vt(N_st_8,n))
Vt = 0.d0
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,1)
do sh2=1,shortcut(0,1)
exa = popcnt(xor(version(1,sh,1), version(1,sh2,1)))
if(exa > 2) then
cycle
end if
do ni=2,Nint
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
jloop: do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
org_j = sort_idx(j,1)
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if(ext > 4) then
cycle jloop
endif
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if(ext > 4) then
cycle jloop
endif
end do
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
do istate=1,N_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
enddo
enddo jloop
enddo
enddo
enddo
!$OMP END DO NOWAIT
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,2)
do i=shortcut(sh,2),shortcut(sh+1,2)-1
org_i = sort_idx(i,2)
do j=shortcut(sh,2),shortcut(sh+1,2)-1
org_j = sort_idx(j,2)
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
do ni=2,Nint
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
end do
if(ext /= 4) then
cycle
endif
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
do istate=1,N_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
enddo
end do
end do
enddo
!$OMP END DO NOWAIT
!$OMP CRITICAL
do istate=1,N_st
do i=n,1,-1
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
enddo
enddo
!$OMP END CRITICAL
deallocate(vt)
!$OMP END PARALLEL
do istate=1,N_st
do i=1,n
v_0(i,istate) += H_jj(i) * u_0(i,istate)
enddo
enddo
deallocate (shortcut, sort_idx, sorted, version, ut)
deallocate (s_0, v_0)
end
BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
@ -178,338 +32,411 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
END_PROVIDER
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
use bitmasks
use f77_zmq
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
!
! n : number of determinants
! Assumes that the determinants are in psi_det
!
! H_jj : array of <j|H|j>
!
! S2_jj : array of <j|S^2|j>
! istart, iend, ishift, istep are used in ZMQ parallelization.
END_DOC
integer, intent(in) :: N_st,n,Nint, sze_8
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st)
double precision, intent(in) :: u_0(sze_8,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2
double precision, allocatable :: ut(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer, allocatable :: shortcut(:,:), sort_idx(:)
integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8
integer, external :: align_double
integer :: blockb, blockb2, istep
double precision :: ave_workload, workload, target_workload_inv
integer(ZMQ_PTR) :: handler
if(N_st /= N_states_diag .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates"
N_st_8 = N_st ! align_double(N_st)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n))
allocate(ut(N_st_8,n))
integer, intent(in) :: N_st,sze
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
integer :: k
double precision, allocatable :: u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
v_0 = 0.d0
s_0 = 0.d0
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(i,istate)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,1,N_det,0,1)
deallocate(u_t)
do k=1,N_st
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
enddo
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
blockb = shortcut(0,1)
call davidson_init(handler,n,N_st_8,ut)
ave_workload = 0.d0
do sh=1,shortcut(0,1)
ave_workload += shortcut(0,1)
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
enddo
ave_workload = ave_workload/dble(shortcut(0,1))
target_workload_inv = 0.001d0/ave_workload
do sh=1,shortcut(0,1),1
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
istep = 1+ int(workload*target_workload_inv)
do blockb2=0, istep-1
call davidson_add_task(handler, sh, blockb2, istep)
enddo
enddo
call davidson_run(handler, v_0, s_0, size(v_0,1))
do istate=1,N_st
do i=1,n
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
enddo
enddo
deallocate(shortcut, sort_idx, sorted, version)
deallocate(ut)
end
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
!
! S2_jj : array of <j|S^2|j>
! Default should be 1,N_det,0,1
END_DOC
integer, intent(in) :: N_st,n,Nint, sze_8
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st)
double precision, intent(in) :: u_0(sze_8,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
double precision, intent(in) :: u_t(N_st,N_det)
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
integer(bit_kind) :: sorted_i(Nint)
PROVIDE ref_bitmask_energy N_int
select case (N_int)
case (1)
call H_S2_u_0_nstates_openmp_work_1(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
case (2)
call H_S2_u_0_nstates_openmp_work_2(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
case (3)
call H_S2_u_0_nstates_openmp_work_3(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
case (4)
call H_S2_u_0_nstates_openmp_work_4(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
case default
call H_S2_u_0_nstates_openmp_work_N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
end select
end
BEGIN_TEMPLATE
subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
!
! Default should be 1,N_det,0,1
END_DOC
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
double precision, intent(in) :: u_t(N_st,N_det)
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
double precision :: hij, sij
integer :: i,j,k,l
integer :: k_a, k_b, l_a, l_b, m_a, m_b
integer :: istate
integer :: krow, kcol, krow_b, kcol_b
integer :: lrow, lcol
integer :: mrow, mcol
integer(bit_kind) :: spindet($N_int)
integer(bit_kind) :: tmp_det($N_int,2)
integer(bit_kind) :: tmp_det2($N_int,2)
integer(bit_kind) :: tmp_det3($N_int,2)
integer(bit_kind), allocatable :: buffer(:,:)
integer :: n_doubles
integer, allocatable :: doubles(:)
integer, allocatable :: singles_a(:)
integer, allocatable :: singles_b(:)
integer, allocatable :: idx(:), idx0(:)
integer :: maxab, n_singles_a, n_singles_b, kcol_prev, nmax
integer*8 :: k8
double precision, allocatable :: v_t(:,:), s_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
allocate(idx0(maxab))
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8
integer, external :: align_double
integer :: blockb, blockb2, istep
double precision :: ave_workload, workload, target_workload_inv
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
do i=1,maxab
idx0(i) = i
enddo
N_st_8 = align_double(N_st)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
allocate(ut(N_st_8,n))
v_0 = 0.d0
s_0 = 0.d0
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
! Prepare the array of all alpha single excitations
! -------------------------------------------------
PROVIDE N_int
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
!$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
allocate(vt(N_st_8,n),st(N_st_8,n))
Vt = 0.d0
St = 0.d0
!$OMP SHARED(psi_bilinear_matrix_rows, N_det, &
!$OMP psi_bilinear_matrix_columns, &
!$OMP psi_det_alpha_unique, psi_det_beta_unique, &
!$OMP n_det_alpha_unique, n_det_beta_unique, N_int, &
!$OMP psi_bilinear_matrix_transp_rows, &
!$OMP psi_bilinear_matrix_transp_columns, &
!$OMP psi_bilinear_matrix_transp_order, N_st, &
!$OMP psi_bilinear_matrix_order_transp_reverse, &
!$OMP psi_bilinear_matrix_columns_loc, &
!$OMP istart, iend, istep, &
!$OMP ishift, idx0, u_t, maxab, v_0, s_0) &
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
!$OMP lcol, lrow, l_a, l_b, nmax, &
!$OMP buffer, doubles, n_doubles, &
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, v_t, &
!$OMP singles_a, n_singles_a, singles_b, &
!$OMP n_singles_b, s_t, k8)
! Alpha/Beta double excitations
! =============================
allocate( buffer($N_int,maxab), &
singles_a(maxab), &
singles_b(maxab), &
doubles(maxab), &
idx(maxab), &
v_t(N_st,N_det), s_t(N_st,N_det))
kcol_prev=-1
!$OMP DO
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(sort_idx(i,2),istate)
enddo
enddo
!$OMP END DO
v_t = 0.d0
s_t = 0.d0
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,2)
do i=shortcut(sh,2),shortcut(sh+1,2)-1
org_i = sort_idx(i,2)
do j=shortcut(sh,2),shortcut(sh+1,2)-1
org_j = sort_idx(j,2)
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
if (ext > 4) exit
end do
if(ext == 4) then
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
end if
end do
end do
enddo
!$OMP END DO
!$OMP DO
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(sort_idx(i,1),istate)
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(dynamic,64)
do k_a=istart+ishift,iend,istep
!$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0,1)
do sh2=1,shortcut(0,1)
if (sh==sh2) cycle
krow = psi_bilinear_matrix_rows(k_a)
kcol = psi_bilinear_matrix_columns(k_a)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
if (kcol /= kcol_prev) then
call get_all_spin_singles_$N_int( &
psi_det_beta_unique(1,kcol+1), idx0(kcol+1), &
tmp_det(1,2), N_det_beta_unique-kcol, &
singles_b, n_singles_b)
endif
kcol_prev = kcol
exa = 0
do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do
if(exa > 2) then
cycle
end if
! Loop over singly excited beta columns > current column
! ------------------------------------------------------
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do i=1,n_singles_b
lcol = singles_b(i)
do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
enddo
enddo
exa = 0
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do j=shortcut(sh,1),i-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
l_a = psi_bilinear_matrix_columns_loc(lcol)
nmax = psi_bilinear_matrix_columns_loc(lcol+1) - l_a
do j=1,nmax
lrow = psi_bilinear_matrix_rows(l_a)
buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow)
idx(j) = l_a
l_a = l_a+1
enddo
j = j-1
do j=i+1,shortcut(sh+1,1)-1
if (i==j) cycle
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
call get_all_spin_singles_$N_int( &
buffer, idx, tmp_det(1,1), j, &
singles_a, n_singles_a )
! Loop over alpha singles
! -----------------------
do k = 1,n_singles_a
l_a = singles_a(k)
lrow = psi_bilinear_matrix_rows(l_a)
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij)
call get_s2(tmp_det,tmp_det2,$N_int,sij)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP CRITICAL (u0Hu0)
do istate=1,N_st
do i=1,n
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
s_0(i,istate) = s_0(i,istate) + st(istate,i)
!$OMP DO SCHEDULE(dynamic,64)
do k_a=istart+ishift,iend,istep
! Single and double alpha excitations
! ===================================
! Initial determinant is at k_a in alpha-major representation
! -----------------------------------------------------------------------
krow = psi_bilinear_matrix_rows(k_a)
kcol = psi_bilinear_matrix_columns(k_a)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
! Initial determinant is at k_b in beta-major representation
! ----------------------------------------------------------------------
k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
spindet(1:$N_int) = tmp_det(1:$N_int,1)
! Loop inside the beta column to gather all the connected alphas
l_a = k_a+1
nmax = min(N_det_alpha_unique, N_det - l_a)
do i=1,nmax
lcol = psi_bilinear_matrix_columns(l_a)
if (lcol /= kcol) exit
lrow = psi_bilinear_matrix_rows(l_a)
buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow)
idx(i) = l_a
l_a = l_a+1
enddo
i = i-1
call get_all_spin_singles_and_doubles_$N_int( &
buffer, idx, spindet, i, &
singles_a, doubles, n_singles_a, n_doubles )
! Compute Hij for all alpha singles
! ----------------------------------
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
do i=1,n_singles_a
l_a = singles_a(i)
lrow = psi_bilinear_matrix_rows(l_a)
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
do l=1,N_st
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0
enddo
enddo
! Compute Hij for all alpha doubles
! ----------------------------------
do i=1,n_doubles
l_a = doubles(i)
lrow = psi_bilinear_matrix_rows(l_a)
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
do l=1,N_st
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0
enddo
enddo
! Single and double beta excitations
! ==================================
! Initial determinant is at k_a in alpha-major representation
! -----------------------------------------------------------------------
krow = psi_bilinear_matrix_rows(k_a)
kcol = psi_bilinear_matrix_columns(k_a)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
spindet(1:$N_int) = tmp_det(1:$N_int,2)
! Initial determinant is at k_b in beta-major representation
! -----------------------------------------------------------------------
k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
! Loop inside the alpha row to gather all the connected betas
l_b = k_b+1
nmax = min(N_det_beta_unique, N_det - l_b)
do i=1,nmax
lrow = psi_bilinear_matrix_transp_rows(l_b)
if (lrow /= krow) exit
lcol = psi_bilinear_matrix_transp_columns(l_b)
buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
idx(i) = l_b
l_b = l_b+1
enddo
i = i-1
call get_all_spin_singles_and_doubles_$N_int( &
buffer, idx, spindet, i, &
singles_b, doubles, n_singles_b, n_doubles )
! Compute Hij for all beta singles
! ----------------------------------
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
do i=1,n_singles_b
l_b = singles_b(i)
lcol = psi_bilinear_matrix_transp_columns(l_b)
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 2, hij)
l_a = psi_bilinear_matrix_transp_order(l_b)
do l=1,N_st
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0
enddo
enddo
! Compute Hij for all beta doubles
! ----------------------------------
do i=1,n_doubles
l_b = doubles(i)
lcol = psi_bilinear_matrix_transp_columns(l_b)
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
l_a = psi_bilinear_matrix_transp_order(l_b)
do l=1,N_st
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0
enddo
enddo
! Diagonal contribution
! =====================
! Initial determinant is at k_a in alpha-major representation
! -----------------------------------------------------------------------
krow = psi_bilinear_matrix_rows(k_a)
kcol = psi_bilinear_matrix_columns(k_a)
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
double precision, external :: diag_H_mat_elem, diag_S_mat_elem
hij = diag_H_mat_elem(tmp_det,$N_int)
sij = diag_S_mat_elem(tmp_det,$N_int)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
enddo
end do
!$OMP END DO NOWAIT
deallocate(buffer, singles_a, singles_b, doubles, idx)
!$OMP CRITICAL
do l=1,N_st
do i=1, N_det
v_0(i,l) = v_0(i,l) + v_t(l,i)
s_0(i,l) = s_0(i,l) + s_t(l,i)
enddo
enddo
!$OMP END CRITICAL (u0Hu0)
!$OMP END CRITICAL
deallocate(v_t, s_t)
deallocate(vt,st)
!$OMP BARRIER
!$OMP END PARALLEL
do istate=1,N_st
do i=1,n
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
enddo
enddo
deallocate (shortcut, sort_idx, sorted, version, ut)
end
SUBST [ N_int ]
1;;
2;;
3;;
4;;
N_int;;
END_TEMPLATE

View File

@ -0,0 +1,517 @@
subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
!
END_DOC
integer, intent(in) :: N_st,n,Nint, sze
double precision, intent(out) :: v_0(sze,N_st)
double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8
integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
N_st_8 = align_double(N_st)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
allocate( ut(N_st_8,n))
v_0 = 0.d0
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
!$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
allocate(vt(N_st_8,n),st(N_st_8,n))
Vt = 0.d0
St = 0.d0
!$OMP DO
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(sort_idx(i,2),istate)
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(static,1)
do sh=1,shortcut(0,2)
do i=shortcut(sh,2),shortcut(sh+1,2)-1
org_i = sort_idx(i,2)
do j=shortcut(sh,2),shortcut(sh+1,2)-1
org_j = sort_idx(j,2)
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
if (ext > 4) exit
end do
if(ext == 4) then
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
end if
end do
end do
enddo
!$OMP END DO
!$OMP DO
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(sort_idx(i,1),istate)
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(static,1)
do sh=1,shortcut(0,1)
do sh2=1,shortcut(0,1)
if (sh==sh2) cycle
exa = 0
do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
enddo
enddo
exa = 0
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do j=shortcut(sh,1),i-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
do j=i+1,shortcut(sh+1,1)-1
if (i==j) cycle
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
enddo
enddo
!$OMP END DO
do istate=1,N_st
do i=1,n
!$OMP ATOMIC
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
enddo
enddo
deallocate(vt,st)
!$OMP END PARALLEL
do istate=1,N_st
do i=1,n
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
enddo
enddo
deallocate (shortcut, sort_idx, sorted, version, ut)
end
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
use bitmasks
implicit none
BEGIN_DOC
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
!
! n : number of determinants
!
! H_jj : array of <j|H|j>
!
! S2_jj : array of <j|S^2|j>
END_DOC
integer, intent(in) :: N_st,n,Nint, sze
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
integer :: i,j,k,l, jj,ii
integer :: i0, j0
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
integer :: N_st_8
integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
N_st_8 = align_double(N_st)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
ASSERT (n>0)
PROVIDE ref_bitmask_energy
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
allocate( ut(N_st_8,n))
v_0 = 0.d0
s_0 = 0.d0
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
!$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
allocate(vt(N_st_8,n),st(N_st_8,n))
Vt = 0.d0
St = 0.d0
!$OMP DO
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(sort_idx(i,2),istate)
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(static,4)
do sh=1,shortcut(0,2)
do i=shortcut(sh,2),shortcut(sh+1,2)-1
org_i = sort_idx(i,2)
do j=shortcut(sh,2),shortcut(sh+1,2)-1
org_j = sort_idx(j,2)
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
if (ext > 4) exit
end do
if(ext == 4) then
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
end if
end do
end do
enddo
!$OMP END DO
!$OMP DO
do i=1,n
do istate=1,N_st
ut(istate,i) = u_0(sort_idx(i,1),istate)
enddo
enddo
!$OMP END DO
!$OMP DO SCHEDULE(static,4)
do sh=1,shortcut(0,1)
do sh2=1,shortcut(0,1)
if (sh==sh2) cycle
exa = 0
do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do
if(exa > 2) then
cycle
end if
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
enddo
enddo
exa = 0
do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i,1)
do ni=1,Nint
sorted_i(ni) = sorted(ni,i,1)
enddo
do j=shortcut(sh,1),i-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
do j=i+1,shortcut(sh+1,1)-1
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
if (ext > 4) cycle
do ni=2,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
if (ext > 4) exit
end do
if(ext <= 4) then
org_j = sort_idx(j,1)
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
if (hij /= 0.d0) then
do istate=1,n_st
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
enddo
endif
if (ext /= 2) then
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
if (s2 /= 0.d0) then
do istate=1,n_st
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
enddo
endif
endif
endif
enddo
enddo
enddo
!$OMP END DO
do istate=1,N_st
do i=1,n
!$OMP ATOMIC
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
!$OMP ATOMIC
s_0(i,istate) = s_0(i,istate) + st(istate,i)
enddo
enddo
deallocate(vt,st)
!$OMP END PARALLEL
do istate=1,N_st
do i=1,n
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
enddo
enddo
deallocate (shortcut, sort_idx, sorted, version, ut)
end
subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
use bitmasks
implicit none
integer, intent(in) :: N_st,n,Nint, sze
integer(bit_kind), intent(in) :: keys_tmp(Nint,2,n)
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st)
double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n)
PROVIDE ref_bitmask_energy
double precision, allocatable :: vt(:,:)
integer, allocatable :: idx(:)
integer :: i,j, jj, l
double precision :: hij
do i=1,n
v_0(i,:) = H_jj(i) * u_0(i,:)
enddo
allocate(idx(0:n), vt(N_st,n))
Vt = 0.d0
!$OMP PARALLEL DO DEFAULT(shared) PRIVATE(i,idx,jj,j,degree,exc,phase,hij,l) SCHEDULE(static,1)
do i=2,n
idx(0) = i
call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx)
do jj=1,idx(0)
j = idx(jj)
double precision :: phase
integer :: degree
integer :: exc(0:2,2,2)
call get_excitation(keys_tmp(1,1,j),keys_tmp(1,1,i),exc,degree,phase,Nint)
! if ((degree == 2).and.(exc(0,1,1)==1)) then
! continue
! else
! cycle
! endif
! if ((degree == 2).and.(exc(0,1,1)==1)) cycle
! if ((degree > 1)) cycle
! if (exc(0,1,2) /= 0) cycle
! if (exc(0,1,1) == 2) cycle
! if (exc(0,1,2) == 2) cycle
! if ((degree==1).and.(exc(0,1,2) == 1)) cycle
call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
do l=1,N_st
!$OMP ATOMIC
vt (l,i) = vt (l,i) + hij*u_0(j,l)
!$OMP ATOMIC
vt (l,j) = vt (l,j) + hij*u_0(i,l)
enddo
enddo
enddo
!$OMP END PARALLEL DO
do i=1,n
v_0(i,:) = v_0(i,:) + vt(:,i)
enddo
end

View File

@ -1,3 +1,35 @@
double precision function diag_S_mat_elem(key_i,Nint)
implicit none
use bitmasks
include 'Utils/constants.include.F'
integer :: Nint
integer(bit_kind), intent(in) :: key_i(Nint,2)
BEGIN_DOC
! Returns <i|S^2|i>
END_DOC
integer :: nup, i
integer(bit_kind) :: xorvec(N_int_max)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
do i=1,Nint
xorvec(i) = xor(key_i(i,1),key_i(i,2))
enddo
do i=1,Nint
xorvec(i) = iand(xorvec(i),key_i(i,1))
enddo
nup = 0
do i=1,Nint
if (xorvec(i) /= 0_bit_kind) then
nup += popcnt(xorvec(i))
endif
enddo
diag_S_mat_elem = dble(nup)
end
subroutine get_s2(key_i,key_j,Nint,s2)
implicit none
use bitmasks
@ -25,11 +57,9 @@ subroutine get_s2(key_i,key_j,Nint,s2)
endif
endif
case(0)
nup = 0
do i=1,Nint
nup += popcnt(iand(xor(key_i(i,1),key_i(i,2)),key_i(i,1)))
enddo
s2 = dble(nup)
double precision, external :: diag_S_mat_elem
!DIR$ FORCEINLINE
s2 = diag_S_mat_elem(key_i,Nint)
end select
end

View File

@ -1,32 +1,60 @@
subroutine get_excitation_degree(key1,key2,degree,Nint)
use bitmasks
include 'Utils/constants.include.F'
implicit none
BEGIN_DOC
! Returns the excitation degree between two determinants
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: key1(Nint,2)
integer(bit_kind), intent(in) :: key2(Nint,2)
integer(bit_kind), intent(in) :: key1(Nint*2)
integer(bit_kind), intent(in) :: key2(Nint*2)
integer, intent(out) :: degree
integer(bit_kind) :: xorvec(2*N_int_max)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
integer :: l
ASSERT (Nint > 0)
degree = popcnt(xor( key1(1,1), key2(1,1))) + &
popcnt(xor( key1(1,2), key2(1,2)))
!DIR$ NOUNROLL
do l=2,Nint
degree = degree+ popcnt(xor( key1(l,1), key2(l,1))) + &
popcnt(xor( key1(l,2), key2(l,2)))
enddo
ASSERT (degree >= 0)
select case (Nint)
case (1)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
degree = sum(popcnt(xorvec(1:2)))
case (2)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
xorvec(3) = xor( key1(3), key2(3))
xorvec(4) = xor( key1(4), key2(4))
degree = sum(popcnt(xorvec(1:4)))
case (3)
do l=1,6
xorvec(l) = xor( key1(l), key2(l))
enddo
degree = sum(popcnt(xorvec(1:6)))
case (4)
do l=1,8
xorvec(l) = xor( key1(l), key2(l))
enddo
degree = sum(popcnt(xorvec(1:8)))
case default
do l=1,ishft(Nint,1)
xorvec(l) = xor( key1(l), key2(l))
enddo
degree = sum(popcnt(xorvec(1:l)))
end select
degree = ishft(degree,-1)
end
subroutine get_excitation(det1,det2,exc,degree,phase,Nint)
use bitmasks
implicit none
@ -139,72 +167,6 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
end
subroutine decode_exc_int2(exc,degree,h1,p1,h2,p2,s1,s2)
use bitmasks
implicit none
BEGIN_DOC
! Decodes the exc arrays returned by get_excitation.
! h1,h2 : Holes
! p1,p2 : Particles
! s1,s2 : Spins (1:alpha, 2:beta)
! degree : Degree of excitation
END_DOC
integer, intent(in) :: exc(0:2,2,2),degree
integer*2, intent(out) :: h1,h2,p1,p2,s1,s2
ASSERT (degree > 0)
ASSERT (degree < 3)
select case(degree)
case(2)
if (exc(0,1,1) == 2) then
h1 = exc(1,1,1)
h2 = exc(2,1,1)
p1 = exc(1,2,1)
p2 = exc(2,2,1)
s1 = 1
s2 = 1
else if (exc(0,1,2) == 2) then
h1 = exc(1,1,2)
h2 = exc(2,1,2)
p1 = exc(1,2,2)
p2 = exc(2,2,2)
s1 = 2
s2 = 2
else
h1 = exc(1,1,1)
h2 = exc(1,1,2)
p1 = exc(1,2,1)
p2 = exc(1,2,2)
s1 = 1
s2 = 2
endif
case(1)
if (exc(0,1,1) == 1) then
h1 = exc(1,1,1)
h2 = 0
p1 = exc(1,2,1)
p2 = 0
s1 = 1
s2 = 0
else
h1 = exc(1,1,2)
h2 = 0
p1 = exc(1,2,2)
p2 = 0
s1 = 2
s2 = 0
endif
case(0)
h1 = 0
p1 = 0
h2 = 0
p2 = 0
s1 = 0
s2 = 0
end select
end
subroutine get_double_excitation(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
@ -925,22 +887,29 @@ subroutine create_minilist(key_mask, fullList, miniList, idx_miniList, N_fullLis
N_miniList = 0
integer :: e_ab
e_ab = n_a+n_b
do i=1,N_fullList
e_a = n_a - popcnt(iand(fullList(1, 1, i), key_mask(1, 1)))
e_b = n_b - popcnt(iand(fullList(1, 2, i), key_mask(1, 2)))
e_a = e_ab - popcnt(iand(fullList(1, 1, i), key_mask(1, 1))) &
- popcnt(iand(fullList(1, 2, i), key_mask(1, 2)))
do ni=2,nint
e_a -= popcnt(iand(fullList(ni, 1, i), key_mask(ni, 1)))
e_b -= popcnt(iand(fullList(ni, 2, i), key_mask(ni, 2)))
e_a = e_a - popcnt(iand(fullList(ni, 1, i), key_mask(ni, 1))) &
- popcnt(iand(fullList(ni, 2, i), key_mask(ni, 2)))
end do
if(e_a + e_b <= 2) then
N_miniList = N_miniList + 1
do ni=1,Nint
miniList(ni,1,N_miniList) = fullList(ni,1,i)
miniList(ni,2,N_miniList) = fullList(ni,2,i)
enddo
idx_miniList(N_miniList) = i
end if
if(e_a > 2) then
cycle
endif
N_miniList = N_miniList + 1
miniList(1,1,N_miniList) = fullList(1,1,i)
miniList(1,2,N_miniList) = fullList(1,2,i)
do ni=2,Nint
miniList(ni,1,N_miniList) = fullList(ni,1,i)
miniList(ni,2,N_miniList) = fullList(ni,2,i)
enddo
idx_miniList(N_miniList) = i
end do
end subroutine
@ -1041,13 +1010,15 @@ subroutine i_H_psi(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
double precision :: phase
integer :: exc(0:2,2,2)
double precision :: hij
integer :: idx(0:Ndet)
integer, allocatable :: idx(:)
ASSERT (Nint > 0)
ASSERT (N_int == Nint)
ASSERT (Nstate > 0)
ASSERT (Ndet > 0)
ASSERT (Ndet_max >= Ndet)
allocate(idx(0:Ndet))
i_H_psi_array = 0.d0
call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx)
@ -1089,7 +1060,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,
double precision :: phase
integer :: exc(0:2,2,2)
double precision :: hij
integer :: idx(0:Ndet)
integer, allocatable :: idx(:)
BEGIN_DOC
! Computes <i|H|Psi> = \sum_J c_J <i|H|J>.
!
@ -1102,6 +1073,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,
ASSERT (Nstate > 0)
ASSERT (Ndet > 0)
ASSERT (Ndet_max >= Ndet)
allocate(idx(0:Ndet))
i_H_psi_array = 0.d0
call filter_connected_i_H_psi0(keys,key,Nint,N_minilist,idx)
@ -1148,7 +1120,8 @@ subroutine i_H_psi_sec_ord(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array
double precision :: phase
integer :: exc(0:2,2,2)
double precision :: hij
integer :: idx(0:Ndet),n_interact
integer,allocatable :: idx(:)
integer :: n_interact
BEGIN_DOC
! <key|H|psi> for the various Nstates
END_DOC
@ -1158,6 +1131,7 @@ subroutine i_H_psi_sec_ord(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array
ASSERT (Nstate > 0)
ASSERT (Ndet > 0)
ASSERT (Ndet_max >= Ndet)
allocate(idx(0:Ndet))
i_H_psi_array = 0.d0
call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx)
n_interact = 0
@ -1207,7 +1181,7 @@ subroutine i_H_psi_SC2(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array,idx
double precision :: phase
integer :: exc(0:2,2,2)
double precision :: hij
integer :: idx(0:Ndet)
integer,allocatable :: idx(:)
ASSERT (Nint > 0)
ASSERT (N_int == Nint)
@ -1215,6 +1189,7 @@ subroutine i_H_psi_SC2(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array,idx
ASSERT (Ndet > 0)
ASSERT (Ndet_max >= Ndet)
i_H_psi_array = 0.d0
allocate(idx(0:Ndet))
call filter_connected_i_H_psi0_SC2(keys,key,Nint,Ndet,idx,idx_repeat)
do ii=1,idx(0)
i = idx(ii)
@ -1254,7 +1229,7 @@ subroutine i_H_psi_SC2_verbose(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_a
double precision :: phase
integer :: exc(0:2,2,2)
double precision :: hij
integer :: idx(0:Ndet)
integer,allocatable :: idx(:)
ASSERT (Nint > 0)
ASSERT (N_int == Nint)
@ -1262,6 +1237,7 @@ subroutine i_H_psi_SC2_verbose(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_a
ASSERT (Ndet > 0)
ASSERT (Ndet_max >= Ndet)
i_H_psi_array = 0.d0
allocate(idx(0:Ndet))
call filter_connected_i_H_psi0_SC2(keys,key,Nint,Ndet,idx,idx_repeat)
print*,'--------'
do ii=1,idx(0)
@ -2140,8 +2116,8 @@ end
subroutine get_phase(key1,key2,phase,Nint)
use bitmasks
implicit none
integer(bit_kind), intent(in) :: key1(Nint,2), key2(Nint,2)
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: key1(Nint,2), key2(Nint,2)
double precision, intent(out) :: phase
BEGIN_DOC
! Returns the phase between key1 and key2
@ -2192,3 +2168,424 @@ subroutine u_0_H_u_0_stored(e_0,u_0,hmatrix,sze)
call matrix_vector_product(u_0,v_0,hmatrix,sze,sze)
e_0 = u_dot_v(v_0,u_0,sze)
end
! Spin-determinant routines
! -------------------------
subroutine get_excitation_degree_spin(key1,key2,degree,Nint)
use bitmasks
include 'Utils/constants.include.F'
implicit none
BEGIN_DOC
! Returns the excitation degree between two determinants
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: key1(Nint)
integer(bit_kind), intent(in) :: key2(Nint)
integer, intent(out) :: degree
integer(bit_kind) :: xorvec(N_int_max)
integer :: l
ASSERT (Nint > 0)
select case (Nint)
case (1)
xorvec(1) = xor( key1(1), key2(1))
degree = popcnt(xorvec(1))
case (2)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
degree = popcnt(xorvec(1))+popcnt(xorvec(2))
case (3)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
xorvec(3) = xor( key1(3), key2(3))
degree = sum(popcnt(xorvec(1:3)))
case (4)
xorvec(1) = xor( key1(1), key2(1))
xorvec(2) = xor( key1(2), key2(2))
xorvec(3) = xor( key1(3), key2(3))
xorvec(4) = xor( key1(4), key2(4))
degree = sum(popcnt(xorvec(1:4)))
case default
do l=1,N_int
xorvec(l) = xor( key1(l), key2(l))
enddo
degree = sum(popcnt(xorvec(1:Nint)))
end select
degree = ishft(degree,-1)
end
subroutine get_excitation_spin(det1,det2,exc,degree,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the excitation operators between two determinants and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint)
integer(bit_kind), intent(in) :: det2(Nint)
integer, intent(out) :: exc(0:2,2)
integer, intent(out) :: degree
double precision, intent(out) :: phase
! exc(number,hole/particle)
! ex :
! exc(0,1) = number of holes
! exc(0,2) = number of particles
! exc(1,2) = first particle
! exc(1,1) = first hole
ASSERT (Nint > 0)
!DIR$ FORCEINLINE
call get_excitation_degree_spin(det1,det2,degree,Nint)
select case (degree)
case (3:)
degree = -1
return
case (2)
call get_double_excitation_spin(det1,det2,exc,phase,Nint)
return
case (1)
call get_mono_excitation_spin(det1,det2,exc,phase,Nint)
return
case(0)
return
end select
end
subroutine decode_exc_spin(exc,h1,p1,h2,p2)
use bitmasks
implicit none
BEGIN_DOC
! Decodes the exc arrays returned by get_excitation.
! h1,h2 : Holes
! p1,p2 : Particles
END_DOC
integer, intent(in) :: exc(0:2,2)
integer, intent(out) :: h1,h2,p1,p2
select case (exc(0,1))
case(2)
h1 = exc(1,1)
h2 = exc(2,1)
p1 = exc(1,2)
p2 = exc(2,2)
case(1)
h1 = exc(1,1)
h2 = 0
p1 = exc(1,2)
p2 = 0
case default
h1 = 0
p1 = 0
h2 = 0
p2 = 0
end select
end
subroutine get_double_excitation_spin(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the two excitation operators between two doubly excited spin-determinants
! and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint)
integer(bit_kind), intent(in) :: det2(Nint)
integer, intent(out) :: exc(0:2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
ASSERT (Nint > 0)
nperm = 0
exc(0,1) = 0
exc(0,2) = 0
idx_particle = 0
idx_hole = 0
ishift = 1-bit_kind_size
do l=1,Nint
ishift = ishift + bit_kind_size
if (det1(l) == det2(l)) then
cycle
endif
tmp = xor( det1(l), det2(l) )
particle = iand(tmp, det2(l))
hole = iand(tmp, det1(l))
do while (particle /= 0_bit_kind)
tz = trailz(particle)
idx_particle = idx_particle + 1
exc(0,2) = exc(0,2) + 1
exc(idx_particle,2) = tz+ishift
particle = iand(particle,particle-1_bit_kind)
enddo
if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2
exit
endif
do while (hole /= 0_bit_kind)
tz = trailz(hole)
idx_hole = idx_hole + 1
exc(0,1) = exc(0,1) + 1
exc(idx_hole,1) = tz+ishift
hole = iand(hole,hole-1_bit_kind)
enddo
if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2
exit
endif
enddo
select case (exc(0,1))
case(1)
low = min(exc(1,1), exc(1,2))
high = max(exc(1,1), exc(1,2))
ASSERT (low > 0)
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
ASSERT (high > 0)
k = ishft(high-1,-bit_kind_shift)+1
m = iand(high-1,bit_kind_size-1)+1
if (j==k) then
nperm = nperm + popcnt(iand(det1(j), &
iand( ibset(0_bit_kind,m-1)-1_bit_kind, &
ibclr(-1_bit_kind,n)+1_bit_kind ) ))
else
nperm = nperm + popcnt(iand(det1(k), &
ibset(0_bit_kind,m-1)-1_bit_kind))
if (n < bit_kind_size) then
nperm = nperm + popcnt(iand(det1(j), ibclr(-1_bit_kind,n) +1_bit_kind))
endif
do i=j+1,k-1
nperm = nperm + popcnt(det1(i))
end do
endif
case (2)
do i=1,2
low = min(exc(i,1), exc(i,2))
high = max(exc(i,1), exc(i,2))
ASSERT (low > 0)
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
ASSERT (high > 0)
k = ishft(high-1,-bit_kind_shift)+1
m = iand(high-1,bit_kind_size-1)+1
if (j==k) then
nperm = nperm + popcnt(iand(det1(j), &
iand( ibset(0_bit_kind,m-1)-1_bit_kind, &
ibclr(-1_bit_kind,n)+1_bit_kind ) ))
else
nperm = nperm + popcnt(iand(det1(k), &
ibset(0_bit_kind,m-1)-1_bit_kind))
if (n < bit_kind_size) then
nperm = nperm + popcnt(iand(det1(j), ibclr(-1_bit_kind,n) +1_bit_kind))
endif
do l=j+1,k-1
nperm = nperm + popcnt(det1(l))
end do
endif
enddo
a = min(exc(1,1), exc(1,2))
b = max(exc(1,1), exc(1,2))
c = min(exc(2,1), exc(2,2))
d = max(exc(2,1), exc(2,2))
if (c>a .and. c<b .and. d>b) then
nperm = nperm + 1
endif
end select
phase = phase_dble(iand(nperm,1))
end
subroutine get_mono_excitation_spin(det1,det2,exc,phase,Nint)
use bitmasks
implicit none
BEGIN_DOC
! Returns the excitation operator between two singly excited determinants and the phase
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: det1(Nint)
integer(bit_kind), intent(in) :: det2(Nint)
integer, intent(out) :: exc(0:2,2)
double precision, intent(out) :: phase
integer :: tz
integer :: l, idx_hole, idx_particle, ishift
integer :: nperm
integer :: i,j,k,m,n
integer :: high, low
integer :: a,b,c,d
integer(bit_kind) :: hole, particle, tmp
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
ASSERT (Nint > 0)
nperm = 0
exc(0,1) = 0
exc(0,2) = 0
ishift = 1-bit_kind_size
do l=1,Nint
ishift = ishift + bit_kind_size
if (det1(l) == det2(l)) then
cycle
endif
tmp = xor( det1(l), det2(l) )
particle = iand(tmp, det2(l))
hole = iand(tmp, det1(l))
if (particle /= 0_bit_kind) then
tz = trailz(particle)
exc(0,2) = 1
exc(1,2) = tz+ishift
endif
if (hole /= 0_bit_kind) then
tz = trailz(hole)
exc(0,1) = 1
exc(1,1) = tz+ishift
endif
if ( iand(exc(0,1),exc(0,2)) /= 1) then ! exc(0,1)/=1 and exc(0,2) /= 1
cycle
endif
low = min(exc(1,1),exc(1,2))
high = max(exc(1,1),exc(1,2))
ASSERT (low > 0)
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
ASSERT (high > 0)
k = ishft(high-1,-bit_kind_shift)+1
m = iand(high-1,bit_kind_size-1)+1
if (j==k) then
nperm = popcnt(iand(det1(j), &
iand(ibset(0_bit_kind,m-1)-1_bit_kind,ibclr(-1_bit_kind,n)+1_bit_kind)))
else
nperm = nperm + popcnt(iand(det1(k),ibset(0_bit_kind,m-1)-1_bit_kind))
if (n < bit_kind_size) then
nperm = nperm + popcnt(iand(det1(j),ibclr(-1_bit_kind,n)+1_bit_kind))
endif
do i=j+1,k-1
nperm = nperm + popcnt(det1(i))
end do
endif
phase = phase_dble(iand(nperm,1))
return
enddo
end
subroutine i_H_j_mono_spin(key_i,key_j,Nint,spin,hij)
use bitmasks
implicit none
BEGIN_DOC
! Returns <i|H|j> where i and j are determinants differing by a single excitation
END_DOC
integer, intent(in) :: Nint, spin
integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
double precision, intent(out) :: hij
integer :: exc(0:2,2)
double precision :: phase
PROVIDE big_array_exchange_integrals mo_bielec_integrals_in_map
call get_mono_excitation_spin(key_i(1,spin),key_j(1,spin),exc,phase,Nint)
call get_mono_excitation_from_fock(key_i,key_j,exc(1,1),exc(1,2),spin,phase,hij)
end
subroutine i_H_j_double_spin(key_i,key_j,Nint,hij)
use bitmasks
implicit none
BEGIN_DOC
! Returns <i|H|j> where i and j are determinants differing by a same-spin double excitation
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: key_i(Nint), key_j(Nint)
double precision, intent(out) :: hij
integer :: exc(0:2,2)
double precision :: phase
double precision, external :: get_mo_bielec_integral
PROVIDE big_array_exchange_integrals mo_bielec_integrals_in_map
call get_double_excitation_spin(key_i,key_j,exc,phase,Nint)
hij = phase*(get_mo_bielec_integral( &
exc(1,1), &
exc(2,1), &
exc(1,2), &
exc(2,2), mo_integrals_map) - &
get_mo_bielec_integral( &
exc(1,1), &
exc(2,1), &
exc(2,2), &
exc(1,2), mo_integrals_map) )
end
subroutine i_H_j_double_alpha_beta(key_i,key_j,Nint,hij)
use bitmasks
implicit none
BEGIN_DOC
! Returns <i|H|j> where i and j are determinants differing by an opposite-spin double excitation
END_DOC
integer, intent(in) :: Nint
integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
double precision, intent(out) :: hij
integer :: exc(0:2,2,2)
double precision :: phase, phase2
double precision, external :: get_mo_bielec_integral
PROVIDE big_array_exchange_integrals mo_bielec_integrals_in_map
call get_mono_excitation_spin(key_i(1,1),key_j(1,1),exc(0,1,1),phase,Nint)
call get_mono_excitation_spin(key_i(1,2),key_j(1,2),exc(0,1,2),phase2,Nint)
phase = phase*phase2
if (exc(1,1,1) == exc(1,2,2)) then
hij = phase * big_array_exchange_integrals(exc(1,1,1),exc(1,1,2),exc(1,2,1))
else if (exc(1,2,1) == exc(1,1,2)) then
hij = phase * big_array_exchange_integrals(exc(1,2,1),exc(1,1,1),exc(1,2,2))
else
hij = phase*get_mo_bielec_integral( &
exc(1,1,1), &
exc(1,1,2), &
exc(1,2,1), &
exc(1,2,2) ,mo_integrals_map)
endif
end

View File

@ -64,9 +64,9 @@ BEGIN_TEMPLATE
integer :: i,j,k
integer, allocatable :: iorder(:)
integer(8), allocatable :: bit_tmp(:)
integer(8) :: last_key
integer(8), external :: spin_det_search_key
integer*8, allocatable :: bit_tmp(:)
integer*8 :: last_key
integer*8, external :: spin_det_search_key
logical,allocatable :: duplicate(:)
allocate ( iorder(N_det), bit_tmp(N_det), duplicate(N_det) )
@ -386,26 +386,31 @@ END_PROVIDER
!==============================================================================!
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_rows, (N_det) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_rows , (N_det) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns, (N_det) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order , (N_det) ]
use bitmasks
implicit none
BEGIN_DOC
! Sparse coefficient matrix if the wave function is expressed in a bilinear form :
! D_a^t C D_b
!
! Rows are alpha determinants and columns are beta.
!
! Order refers to psi_det
END_DOC
integer :: i,j,k, l
integer(bit_kind) :: tmp_det(N_int,2)
integer :: idx
integer, external :: get_index_in_psi_det_sorted_bit
PROVIDE psi_coef_sorted_bit
integer, allocatable :: iorder(:), to_sort(:)
integer*8, allocatable :: to_sort(:)
integer, external :: get_index_in_psi_det_alpha_unique
integer, external :: get_index_in_psi_det_beta_unique
allocate(iorder(N_det), to_sort(N_det))
allocate(to_sort(N_det))
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l)
do k=1,N_det
i = get_index_in_psi_det_alpha_unique(psi_det(1,1,k),N_int)
j = get_index_in_psi_det_beta_unique (psi_det(1,2,k),N_int)
@ -415,16 +420,146 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states)
enddo
psi_bilinear_matrix_rows(k) = i
psi_bilinear_matrix_columns(k) = j
to_sort(k) = N_det_alpha_unique * (j-1) + i
iorder(k) = k
to_sort(k) = int(N_det_alpha_unique,8) * int(j-1,8) + int(i,8)
psi_bilinear_matrix_order(k) = k
enddo
call isort(to_sort, iorder, N_det)
call iset_order(psi_bilinear_matrix_rows,iorder,N_det)
call iset_order(psi_bilinear_matrix_columns,iorder,N_det)
call dset_order(psi_bilinear_matrix_values,iorder,N_det)
deallocate(iorder,to_sort)
!$OMP END PARALLEL DO
call i8sort(to_sort, psi_bilinear_matrix_order, N_det)
call iset_order(psi_bilinear_matrix_rows,psi_bilinear_matrix_order,N_det)
call iset_order(psi_bilinear_matrix_columns,psi_bilinear_matrix_order,N_det)
do l=1,N_states
call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det)
enddo
deallocate(to_sort)
END_PROVIDER
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_reverse , (N_det) ]
use bitmasks
implicit none
BEGIN_DOC
! Order which allors to go from psi_bilinear_matrix to psi_det
END_DOC
integer :: k
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k)
do k=1,N_det
psi_bilinear_matrix_order_reverse(psi_bilinear_matrix_order(k)) = k
enddo
!$OMP END PARALLEL DO
END_PROVIDER
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1) ]
use bitmasks
implicit none
BEGIN_DOC
! Sparse coefficient matrix if the wave function is expressed in a bilinear form :
! D_a^t C D_b
!
! Rows are alpha determinants and columns are beta.
!
! Order refers to psi_det
END_DOC
integer :: i,j,k, l
l = psi_bilinear_matrix_columns(1)
psi_bilinear_matrix_columns_loc(l) = 1
do k=2,N_det
if (psi_bilinear_matrix_columns(k) == psi_bilinear_matrix_columns(k-1)) then
cycle
else
l = psi_bilinear_matrix_columns(k)
psi_bilinear_matrix_columns_loc(l) = k
endif
enddo
psi_bilinear_matrix_columns_loc(N_det_beta_unique+1) = N_det+1
END_PROVIDER
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows , (N_det) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_columns, (N_det) ]
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_order , (N_det) ]
use bitmasks
implicit none
BEGIN_DOC
! Transpose of psi_bilinear_matrix
! D_b^t C^t D_a
!
! Rows are Alpha determinants and columns are beta, but the matrix is stored in row major
! format
END_DOC
integer :: i,j,k,l
PROVIDE psi_coef_sorted_bit
integer*8, allocatable :: to_sort(:)
allocate(to_sort(N_det))
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
!$OMP DO COLLAPSE(2)
do l=1,N_states
do k=1,N_det
psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l)
enddo
enddo
!$OMP ENDDO
!$OMP DO
do k=1,N_det
psi_bilinear_matrix_transp_columns(k) = psi_bilinear_matrix_columns(k)
psi_bilinear_matrix_transp_rows (k) = psi_bilinear_matrix_rows (k)
i = psi_bilinear_matrix_transp_columns(k)
j = psi_bilinear_matrix_transp_rows (k)
to_sort(k) = int(N_det_beta_unique,8) * int(j-1,8) + int(i,8)
psi_bilinear_matrix_transp_order(k) = k
enddo
!$OMP ENDDO
!$OMP END PARALLEL
call i8radix_sort(to_sort, psi_bilinear_matrix_transp_order, N_det,-1)
call iset_order(psi_bilinear_matrix_transp_rows,psi_bilinear_matrix_transp_order,N_det)
call iset_order(psi_bilinear_matrix_transp_columns,psi_bilinear_matrix_transp_order,N_det)
do l=1,N_states
call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det)
enddo
deallocate(to_sort)
END_PROVIDER
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_unique+1) ]
use bitmasks
implicit none
BEGIN_DOC
! Location of the columns in the psi_bilinear_matrix
END_DOC
integer :: i,j,k, l
l = psi_bilinear_matrix_transp_rows(1)
psi_bilinear_matrix_transp_rows_loc(l) = 1
do k=2,N_det
if (psi_bilinear_matrix_transp_rows(k) == psi_bilinear_matrix_transp_rows(k-1)) then
cycle
else
l = psi_bilinear_matrix_transp_rows(k)
psi_bilinear_matrix_transp_rows_loc(l) = k
endif
enddo
psi_bilinear_matrix_transp_rows_loc(N_det_alpha_unique+1) = N_det+1
END_PROVIDER
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ]
use bitmasks
implicit none
BEGIN_DOC
! Order which allows to go from psi_bilinear_matrix_order_transp to psi_bilinear_matrix
END_DOC
integer :: k
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k)
do k=1,N_det
psi_bilinear_matrix_order_transp_reverse(psi_bilinear_matrix_transp_order(k)) = k
enddo
!$OMP END PARALLEL DO
END_PROVIDER
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix, (N_det_alpha_unique,N_det_beta_unique,N_states) ]
implicit none
BEGIN_DOC
@ -506,7 +641,7 @@ subroutine generate_all_alpha_beta_det_products
! Create a wave function from all possible alpha x beta determinants
END_DOC
integer :: i,j,k,l
integer :: idx, iproc
integer :: iproc
integer, external :: get_index_in_psi_det_sorted_bit
integer(bit_kind), allocatable :: tmp_det(:,:,:)
logical, external :: is_in_wavefunction
@ -540,3 +675,489 @@ subroutine generate_all_alpha_beta_det_products
end
<<<<<<< HEAD
=======
subroutine get_all_spin_singles_and_doubles(buffer, idx, spindet, Nint, size_buffer, singles, doubles, n_singles, n_doubles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the single and double excitations in the list of
! unique alpha determinants.
!
! /!\ : The buffer is transposed !
!
END_DOC
integer, intent(in) :: Nint, size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer(Nint,size_buffer)
integer(bit_kind), intent(in) :: spindet(Nint)
integer, intent(out) :: singles(size_buffer)
integer, intent(out) :: doubles(size_buffer)
integer, intent(out) :: n_singles
integer, intent(out) :: n_doubles
select case (Nint)
case (1)
call get_all_spin_singles_and_doubles_1(buffer, idx, spindet(1), size_buffer, singles, doubles, n_singles, n_doubles)
case (2)
call get_all_spin_singles_and_doubles_2(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
case (3)
call get_all_spin_singles_and_doubles_3(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
case (4)
call get_all_spin_singles_and_doubles_4(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
case default
call get_all_spin_singles_and_doubles_N_int(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
end select
end
subroutine get_all_spin_singles(buffer, idx, spindet, Nint, size_buffer, singles, n_singles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the single excitations in the list of
! unique alpha determinants.
!
END_DOC
integer, intent(in) :: Nint, size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer(Nint,size_buffer)
integer(bit_kind), intent(in) :: spindet(Nint)
integer, intent(out) :: singles(size_buffer)
integer, intent(out) :: n_singles
select case (N_int)
case (1)
call get_all_spin_singles_1(buffer, idx, spindet(1), size_buffer, singles, n_singles)
return
case (2)
call get_all_spin_singles_2(buffer, idx, spindet, size_buffer, singles, n_singles)
case (3)
call get_all_spin_singles_3(buffer, idx, spindet, size_buffer, singles, n_singles)
case (4)
call get_all_spin_singles_4(buffer, idx, spindet, size_buffer, singles, n_singles)
case default
call get_all_spin_singles_N_int(buffer, idx, spindet, size_buffer, singles, n_singles)
end select
end
subroutine get_all_spin_doubles(buffer, idx, spindet, Nint, size_buffer, doubles, n_doubles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the double excitations in the list of
! unique alpha determinants.
!
END_DOC
integer, intent(in) :: Nint, size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer(Nint,size_buffer)
integer(bit_kind), intent(in) :: spindet(Nint)
integer, intent(out) :: doubles(size_buffer)
integer, intent(out) :: n_doubles
select case (N_int)
case (1)
call get_all_spin_doubles_1(buffer, idx, spindet(1), size_buffer, doubles, n_doubles)
case (2)
call get_all_spin_doubles_2(buffer, idx, spindet, size_buffer, doubles, n_doubles)
case (3)
call get_all_spin_doubles_3(buffer, idx, spindet, size_buffer, doubles, n_doubles)
case (4)
call get_all_spin_doubles_4(buffer, idx, spindet, size_buffer, doubles, n_doubles)
case default
call get_all_spin_doubles_N_int(buffer, idx, spindet, size_buffer, doubles, n_doubles)
end select
end
subroutine copy_psi_bilinear_to_psi(psi, isize)
implicit none
BEGIN_DOC
! Overwrites psi_det and psi_coef with the wf in bilinear order
END_DOC
integer, intent(in) :: isize
integer(bit_kind), intent(out) :: psi(N_int,2,isize)
integer :: i,j,k,l
do k=1,isize
i = psi_bilinear_matrix_rows(k)
j = psi_bilinear_matrix_columns(k)
psi(1:N_int,1,k) = psi_det_alpha_unique(1:N_int,i)
psi(1:N_int,2,k) = psi_det_beta_unique(1:N_int,j)
enddo
end
BEGIN_PROVIDER [ integer, singles_alpha_size ]
implicit none
BEGIN_DOC
! Dimension of the singles_alpha array
END_DOC
singles_alpha_size = elec_alpha_num * (mo_tot_num - elec_alpha_num)
END_PROVIDER
BEGIN_PROVIDER [ integer*8, singles_alpha_csc_idx, (N_det_alpha_unique+1) ]
&BEGIN_PROVIDER [ integer*8, singles_alpha_csc_size ]
implicit none
BEGIN_DOC
! Dimension of the singles_alpha array
END_DOC
integer :: i,j
integer, allocatable :: idx0(:), s(:)
allocate (idx0(N_det_alpha_unique))
do i=1, N_det_alpha_unique
idx0(i) = i
enddo
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, &
!$OMP idx0, N_int, singles_alpha_csc, &
!$OMP singles_alpha_size, singles_alpha_csc_idx) &
!$OMP PRIVATE(i,s,j)
allocate (s(singles_alpha_size))
!$OMP DO SCHEDULE(static,1)
do i=1, N_det_alpha_unique
call get_all_spin_singles( &
psi_det_alpha_unique, idx0, psi_det_alpha_unique(1,i), N_int, &
N_det_alpha_unique, s, j)
singles_alpha_csc_idx(i+1) = int(j,8)
enddo
!$OMP END DO
deallocate(s)
!$OMP END PARALLEL
deallocate(idx0)
singles_alpha_csc_idx(1) = 1_8
do i=2, N_det_alpha_unique+1
singles_alpha_csc_idx(i) = singles_alpha_csc_idx(i) + singles_alpha_csc_idx(i-1)
enddo
singles_alpha_csc_size = singles_alpha_csc_idx(N_det_alpha_unique+1)
END_PROVIDER
BEGIN_PROVIDER [ integer, singles_alpha_csc, (singles_alpha_csc_size) ]
implicit none
BEGIN_DOC
! Dimension of the singles_alpha array
END_DOC
integer :: i, k
integer, allocatable :: idx0(:)
allocate (idx0(N_det_alpha_unique))
do i=1, N_det_alpha_unique
idx0(i) = i
enddo
!$OMP PARALLEL DO DEFAULT(NONE) &
!$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, &
!$OMP idx0, N_int, singles_alpha_csc, singles_alpha_csc_idx) &
!$OMP PRIVATE(i,k) SCHEDULE(static,1)
do i=1, N_det_alpha_unique
call get_all_spin_singles( &
psi_det_alpha_unique, idx0, psi_det_alpha_unique(1,i), N_int, &
N_det_alpha_unique, singles_alpha_csc(singles_alpha_csc_idx(i)), &
k)
enddo
!$OMP END PARALLEL DO
deallocate(idx0)
END_PROVIDER
subroutine get_all_spin_singles_and_doubles_1(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the single and double excitations in the list of
! unique alpha determinants.
!
! /!\ : The buffer is transposed !
!
END_DOC
integer, intent(in) :: size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer(size_buffer)
integer(bit_kind), intent(in) :: spindet
integer, intent(out) :: singles(size_buffer)
integer, intent(out) :: doubles(size_buffer)
integer, intent(out) :: n_singles
integer, intent(out) :: n_doubles
integer :: i
include 'Utils/constants.include.F'
integer :: degree
n_singles = 1
n_doubles = 1
!DIR$ VECTOR ALIGNED
do i=1,size_buffer
degree = popcnt( xor( spindet, buffer(i) ) )
if ( degree == 4 ) then
doubles(n_doubles) = idx(i)
n_doubles = n_doubles+1
else if ( degree == 2 ) then
singles(n_singles) = idx(i)
n_singles = n_singles+1
endif
enddo
n_singles = n_singles-1
n_doubles = n_doubles-1
end
subroutine get_all_spin_singles_1(buffer, idx, spindet, size_buffer, singles, n_singles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the single excitations in the list of
! unique alpha determinants.
!
END_DOC
integer, intent(in) :: size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer(size_buffer)
integer(bit_kind), intent(in) :: spindet
integer, intent(out) :: singles(size_buffer)
integer, intent(out) :: n_singles
integer :: i
integer :: degree
include 'Utils/constants.include.F'
n_singles = 1
do i=1,size_buffer
degree = popcnt(xor( spindet, buffer(i) ))
singles(n_singles) = idx(i)
if (degree == 2) then
n_singles = n_singles+1
endif
enddo
n_singles = n_singles-1
end
subroutine get_all_spin_doubles_1(buffer, idx, spindet, size_buffer, doubles, n_doubles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the double excitations in the list of
! unique alpha determinants.
!
END_DOC
integer, intent(in) :: size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer(size_buffer)
integer(bit_kind), intent(in) :: spindet
integer, intent(out) :: doubles(size_buffer)
integer, intent(out) :: n_doubles
integer :: i
include 'Utils/constants.include.F'
integer :: degree
n_doubles = 1
!DIR$ VECTOR ALIGNED
do i=1,size_buffer
degree = popcnt(xor( spindet, buffer(i) ))
if ( degree == 4 ) then
doubles(n_doubles) = idx(i)
n_doubles = n_doubles+1
endif
enddo
n_doubles = n_doubles-1
end
BEGIN_TEMPLATE
subroutine get_all_spin_singles_and_doubles_$N_int(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the single and double excitations in the list of
! unique alpha determinants.
!
! /!\ : The buffer is transposed !
!
END_DOC
integer, intent(in) :: size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer($N_int,size_buffer)
integer(bit_kind), intent(in) :: spindet($N_int)
integer, intent(out) :: singles(size_buffer)
integer, intent(out) :: doubles(size_buffer)
integer, intent(out) :: n_singles
integer, intent(out) :: n_doubles
integer :: i,k
integer(bit_kind) :: xorvec($N_int)
integer :: degree
integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec, degree
n_singles = 1
n_doubles = 1
!DIR$ VECTOR ALIGNED
do i=1,size_buffer
do k=1,$N_int
xorvec(k) = xor( spindet(k), buffer(k,i) )
enddo
if (xorvec(1) /= 0_8) then
degree = popcnt(xorvec(1))
else
degree = 0
endif
do k=2,$N_int
!DIR$ VECTOR ALIGNED
if ( (degree <= 4).and.(xorvec(k) /= 0_8) ) then
degree = degree + popcnt(xorvec(k))
endif
enddo
if ( degree == 4 ) then
doubles(n_doubles) = idx(i)
n_doubles = n_doubles+1
else if ( degree == 2 ) then
singles(n_singles) = idx(i)
n_singles = n_singles+1
endif
enddo
n_singles = n_singles-1
n_doubles = n_doubles-1
end
subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, singles, n_singles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the single excitations in the list of
! unique alpha determinants.
!
END_DOC
integer, intent(in) :: size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer($N_int,size_buffer)
integer(bit_kind), intent(in) :: spindet($N_int)
integer, intent(out) :: singles(size_buffer)
integer, intent(out) :: n_singles
integer :: i,k
include 'Utils/constants.include.F'
integer(bit_kind) :: xorvec($N_int)
integer :: degree
integer, external :: align_double
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
n_singles = 1
!DIR$ VECTOR ALIGNED
do i=1,size_buffer
do k=1,$N_int
xorvec(k) = xor( spindet(k), buffer(k,i) )
enddo
if (xorvec(1) /= 0_8) then
degree = popcnt(xorvec(1))
else
degree = 0
endif
do k=2,$N_int
if ( (degree <= 2).and.(xorvec(k) /= 0_8) ) then
degree = degree + popcnt(xorvec(k))
endif
enddo
if ( degree == 2 ) then
singles(n_singles) = idx(i)
n_singles = n_singles+1
endif
enddo
n_singles = n_singles-1
end
subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, doubles, n_doubles)
use bitmasks
implicit none
BEGIN_DOC
!
! Returns the indices of all the double excitations in the list of
! unique alpha determinants.
!
END_DOC
integer, intent(in) :: size_buffer, idx(size_buffer)
integer(bit_kind), intent(in) :: buffer($N_int,size_buffer)
integer(bit_kind), intent(in) :: spindet($N_int)
integer, intent(out) :: doubles(size_buffer)
integer, intent(out) :: n_doubles
integer :: i,k, degree
include 'Utils/constants.include.F'
integer(bit_kind) :: xorvec($N_int)
n_doubles = 1
!DIR$ VECTOR ALIGNED
do i=1,size_buffer
do k=1,$N_int
xorvec(k) = xor( spindet(k), buffer(k,i) )
enddo
if (xorvec(1) /= 0_8) then
degree = popcnt(xorvec(1))
else
degree = 0
endif
do k=2,$N_int
!DIR$ VECTOR ALIGNED
if ( (degree <= 4).and.(xorvec(k) /= 0_8) ) then
degree = degree + popcnt(xorvec(k))
endif
enddo
if ( degree == 4 ) then
doubles(n_doubles) = idx(i)
n_doubles = n_doubles+1
endif
enddo
n_doubles = n_doubles-1
end
SUBST [ N_int ]
2;;
3;;
4;;
N_int;;
END_TEMPLATE

View File

@ -349,7 +349,7 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ]
integral = ao_bielec_integral(1,1,1,1)
real :: map_mb
double precision :: map_mb
PROVIDE read_ao_integrals disk_access_ao_integrals
if (read_ao_integrals) then
print*,'Reading the AO integrals'

View File

@ -196,7 +196,7 @@ subroutine add_integrals_to_map(mask_ijkl)
integer :: size_buffer
integer(key_kind),allocatable :: buffer_i(:)
real(integral_kind),allocatable :: buffer_value(:)
real :: map_mb
double precision :: map_mb
integer :: i1,j1,k1,l1, ii1, kmax, thread_num
integer :: i2,i3,i4
@ -503,7 +503,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
integer :: size_buffer
integer(key_kind),allocatable :: buffer_i(:)
real(integral_kind),allocatable :: buffer_value(:)
real :: map_mb
double precision :: map_mb
integer :: i1,j1,k1,l1, ii1, kmax, thread_num
integer :: i2,i3,i4
@ -817,7 +817,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
integer :: size_buffer
integer(key_kind),allocatable :: buffer_i(:)
real(integral_kind),allocatable :: buffer_value(:)
real :: map_mb
double precision :: map_mb
integer :: i1,j1,k1,l1, ii1, kmax, thread_num
integer :: i2,i3,i4

View File

@ -12,17 +12,15 @@ BEGIN_TEMPLATE
$type :: xtmp
integer :: i, i0, j, jmax
do i=1,isize
do i=2,isize
xtmp = x(i)
i0 = iorder(i)
j = i-1
do j=i-1,1,-1
if ( x(j) > xtmp ) then
x(j+1) = x(j)
iorder(j+1) = iorder(j)
else
exit
endif
j=i-1
do while (j>0)
if ((x(j) <= xtmp)) exit
x(j+1) = x(j)
iorder(j+1) = iorder(j)
j=j-1
enddo
x(j+1) = xtmp
iorder(j+1) = i0
@ -158,6 +156,38 @@ BEGIN_TEMPLATE
end subroutine heap_$Xsort_big
subroutine sorted_$Xnumber(x,isize,n)
implicit none
BEGIN_DOC
! Returns the number of sorted elements
END_DOC
integer, intent(in) :: isize
$type, intent(in) :: x(isize)
integer, intent(out) :: n
integer :: i
n=1
if (isize < 2) then
return
endif
do i=2,isize
if (x(i-1) <= x(i)) then
n=n+1
endif
enddo
end
SUBST [ X, type ]
; real ;;
d ; double precision ;;
i ; integer ;;
i8 ; integer*8 ;;
i2 ; integer*2 ;;
END_TEMPLATE
BEGIN_TEMPLATE
subroutine $Xsort(x,iorder,isize)
implicit none
BEGIN_DOC
@ -168,16 +198,42 @@ BEGIN_TEMPLATE
integer,intent(in) :: isize
$type,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize)
if (isize < 32) then
integer :: n
if (isize < 2) then
return
endif
call sorted_$Xnumber(x,isize,n)
if (isize == n) then
return
endif
if ( isize < 32+n) then
call insertion_$Xsort(x,iorder,isize)
else
call heap_$Xsort(x,iorder,isize)
endif
end subroutine $Xsort
SUBST [ X, type, Y ]
; real ; i ;;
d ; double precision ; i8 ;;
END_TEMPLATE
BEGIN_TEMPLATE
subroutine $Xsort(x,iorder,isize)
implicit none
BEGIN_DOC
! Sort array x(isize).
! iorder in input should be (1,2,3,...,isize), and in output
! contains the new order of the elements.
END_DOC
integer,intent(in) :: isize
$type,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize)
integer :: n
call $Xradix_sort(x,iorder,isize,-1)
end subroutine $Xsort
SUBST [ X, type ]
; real ;;
d ; double precision ;;
i ; integer ;;
i8 ; integer*8 ;;
i2 ; integer*2 ;;
@ -232,17 +288,15 @@ BEGIN_TEMPLATE
$type :: xtmp
integer*8 :: i, i0, j, jmax
do i=1_8,isize
do i=2_8,isize
xtmp = x(i)
i0 = iorder(i)
j = i-1_8
do j=i-1_8,1_8,-1_8
if ( x(j) > xtmp ) then
x(j+1_8) = x(j)
iorder(j+1_8) = iorder(j)
else
exit
endif
do while (j>0_8)
if (x(j)<=xtmp) exit
x(j+1_8) = x(j)
iorder(j+1_8) = iorder(j)
j = j-1_8
enddo
x(j+1_8) = xtmp
iorder(j+1_8) = i0
@ -292,63 +346,107 @@ BEGIN_TEMPLATE
! contains the new order of the elements.
! iradix should be -1 in input.
END_DOC
$int_type, intent(in) :: isize
$int_type, intent(inout) :: iorder(isize)
$type, intent(inout) :: x(isize)
integer*$int_type, intent(in) :: isize
integer*$int_type, intent(inout) :: iorder(isize)
integer*$type, intent(inout) :: x(isize)
integer, intent(in) :: iradix
integer :: iradix_new
$type, allocatable :: x2(:), x1(:)
$type :: i4
$int_type, allocatable :: iorder1(:),iorder2(:)
$int_type :: i0, i1, i2, i3, i
integer, parameter :: integer_size=$octets
$type, parameter :: zero=$zero
$type :: mask
integer :: nthreads, omp_get_num_threads
integer*$type, allocatable :: x2(:), x1(:)
integer*$type :: i4 ! data type
integer*$int_type, allocatable :: iorder1(:),iorder2(:)
integer*$int_type :: i0, i1, i2, i3, i ! index type
integer*$type :: mask
integer :: err
!DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1
if (iradix == -1) then
! Find most significant bit
i0 = 0_8
i4 = -1_8
do i=1,isize
i4 = max(i4,x(i))
enddo
i3 = i4 ! Type conversion
iradix_new = integer_size-1-leadz(i3)
mask = ibset(zero,iradix_new)
nthreads = 1
! nthreads = 1+ishft(omp_get_num_threads(),-1)
integer :: err
allocate(x1(isize/nthreads+1),iorder1(isize/nthreads+1),x2(isize/nthreads+1),iorder2(isize/nthreads+1),stat=err)
if (iradix == -1) then ! Sort Positive and negative
allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to allocate arrays'
stop
endif
i1=1_8
i2=1_8
do i=1,isize
if (iand(mask,x(i)) == zero) then
i1=1_$int_type
i2=1_$int_type
do i=1_$int_type,isize
if (x(i) < 0_$type) then
iorder1(i1) = iorder(i)
x1(i1) = x(i)
i1 = i1+1_8
x1(i1) = -x(i)
i1 = i1+1_$int_type
else
iorder2(i2) = iorder(i)
x2(i2) = x(i)
i2 = i2+1_8
i2 = i2+1_$int_type
endif
enddo
i1=i1-1_8
i2=i2-1_8
i1=i1-1_$int_type
i2=i2-1_$int_type
do i=1_$int_type,i2
iorder(i1+i) = iorder2(i)
x(i1+i) = x2(i)
enddo
deallocate(x2,iorder2,stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to deallocate arrays x2, iorder2'
stop
endif
do i=1,i1
if (i1 > 1_$int_type) then
call $Xradix_sort$big(x1,iorder1,i1,-2)
do i=1_$int_type,i1
x(i) = -x1(1_$int_type+i1-i)
iorder(i) = iorder1(1_$int_type+i1-i)
enddo
endif
deallocate(x1,iorder1,stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
stop
endif
if (i2>1_$int_type) then
call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
endif
return
else if (iradix == -2) then ! Positive
! Find most significant bit
i0 = 0_$int_type
i4 = maxval(x)
iradix_new = max($integer_size-1-leadz(i4),1)
mask = ibset(0_$type,iradix_new)
allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
if (err /= 0) then
print *, irp_here, ': Unable to allocate arrays'
stop
endif
i1=1_$int_type
i2=1_$int_type
do i=1_$int_type,isize
if (iand(mask,x(i)) == 0_$type) then
iorder1(i1) = iorder(i)
x1(i1) = x(i)
i1 = i1+1_$int_type
else
iorder2(i2) = iorder(i)
x2(i2) = x(i)
i2 = i2+1_$int_type
endif
enddo
i1=i1-1_$int_type
i2=i2-1_$int_type
do i=1_$int_type,i1
iorder(i0+i) = iorder1(i)
x(i0+i) = x1(i)
enddo
@ -361,7 +459,7 @@ BEGIN_TEMPLATE
endif
do i=1,i2
do i=1_$int_type,i2
iorder(i0+i) = iorder2(i)
x(i0+i) = x2(i)
enddo
@ -373,12 +471,12 @@ BEGIN_TEMPLATE
endif
if (i3>1) then
if (i3>1_$int_type) then
call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
endif
if (isize-i3>1) then
call $Xradix_sort$big(x(i3+1),iorder(i3+1),isize-i3,iradix_new-1)
if (isize-i3>1_$int_type) then
call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
endif
return
@ -399,25 +497,25 @@ BEGIN_TEMPLATE
endif
mask = ibset(zero,iradix)
i0=1
i1=1
mask = ibset(0_$type,iradix)
i0=1_$int_type
i1=1_$int_type
do i=1,isize
if (iand(mask,x(i)) == zero) then
do i=1_$int_type,isize
if (iand(mask,x(i)) == 0_$type) then
iorder(i0) = iorder(i)
x(i0) = x(i)
i0 = i0+1
i0 = i0+1_$int_type
else
iorder2(i1) = iorder(i)
x2(i1) = x(i)
i1 = i1+1
i1 = i1+1_$int_type
endif
enddo
i0=i0-1
i1=i1-1
i0=i0-1_$int_type
i1=i1-1_$int_type
do i=1,i1
do i=1_$int_type,i1
iorder(i0+i) = iorder2(i)
x(i0+i) = x2(i)
enddo
@ -434,8 +532,8 @@ BEGIN_TEMPLATE
endif
if (i1>1) then
call $Xradix_sort$big(x(i0+1),iorder(i0+1),i1,iradix-1)
if (i1>1_$int_type) then
call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
endif
if (i0>1) then
call $Xradix_sort$big(x,iorder,i0,iradix-1)
@ -443,12 +541,12 @@ BEGIN_TEMPLATE
end
SUBST [ X, type, octets, is_big, big, int_type, zero ]
i ; integer ; 32 ; .False. ; ; integer ; 0;;
i8 ; integer*8 ; 32 ; .False. ; ; integer ; 0_8;;
i2 ; integer*2 ; 32 ; .False. ; ; integer ; 0;;
i ; integer ; 64 ; .True. ; _big ; integer*8 ; 0 ;;
i8 ; integer*8 ; 64 ; .True. ; _big ; integer*8 ; 0_8 ;;
SUBST [ X, type, integer_size, is_big, big, int_type ]
i ; 4 ; 32 ; .False. ; ; 4 ;;
i8 ; 8 ; 64 ; .False. ; ; 4 ;;
i2 ; 2 ; 16 ; .False. ; ; 4 ;;
i ; 4 ; 32 ; .True. ; _big ; 8 ;;
i8 ; 8 ; 64 ; .True. ; _big ; 8 ;;
END_TEMPLATE

View File

@ -1,11 +1,8 @@
use f77_zmq
use omp_lib
integer, pointer :: thread_id
integer(omp_lock_kind) :: zmq_lock
BEGIN_PROVIDER [ integer(ZMQ_PTR), zmq_context ]
BEGIN_PROVIDER [ integer(ZMQ_PTR), zmq_context ]
&BEGIN_PROVIDER [ integer(omp_lock_kind), zmq_lock ]
use f77_zmq
implicit none
BEGIN_DOC
@ -94,7 +91,7 @@ subroutine switch_qp_run_to_master
print *, 'This run should be started with the qp_run command'
stop -1
endif
qp_run_address = trim(buffer)
qp_run_address = adjustl(buffer)
print *, 'Switched to qp_run master : ', trim(qp_run_address)
integer :: i
@ -235,8 +232,8 @@ function new_zmq_pull_socket()
if (zmq_context == 0_ZMQ_PTR) then
stop 'zmq_context is uninitialized'
endif
new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL)
! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP)
! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL)
new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP)
call omp_unset_lock(zmq_lock)
if (new_zmq_pull_socket == 0_ZMQ_PTR) then
stop 'Unable to create zmq pull socket'
@ -312,8 +309,8 @@ function new_zmq_push_socket(thread)
if (zmq_context == 0_ZMQ_PTR) then
stop 'zmq_context is uninitialized'
endif
new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH)
! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ)
! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH)
new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ)
call omp_unset_lock(zmq_lock)
if (new_zmq_push_socket == 0_ZMQ_PTR) then
stop 'Unable to create zmq push socket'
@ -407,7 +404,9 @@ subroutine end_zmq_sub_socket(zmq_socket_sub)
integer(ZMQ_PTR), intent(in) :: zmq_socket_sub
integer :: rc
call omp_set_lock(zmq_lock)
rc = f77_zmq_close(zmq_socket_sub)
call omp_unset_lock(zmq_lock)
if (rc /= 0) then
print *, 'f77_zmq_close(zmq_socket_sub)'
stop 'error'
@ -426,7 +425,9 @@ subroutine end_zmq_pair_socket(zmq_socket_pair)
integer :: rc
character*(8), external :: zmq_port
call omp_set_lock(zmq_lock)
rc = f77_zmq_close(zmq_socket_pair)
call omp_unset_lock(zmq_lock)
if (rc /= 0) then
print *, 'f77_zmq_close(zmq_socket_pair)'
stop 'error'
@ -444,7 +445,14 @@ subroutine end_zmq_pull_socket(zmq_socket_pull)
integer :: rc
character*(8), external :: zmq_port
rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,0,4)
if (rc /= 0) then
stop 'Unable to set ZMQ_LINGER on pull socket'
endif
call omp_set_lock(zmq_lock)
rc = f77_zmq_close(zmq_socket_pull)
call omp_unset_lock(zmq_lock)
if (rc /= 0) then
print *, 'f77_zmq_close(zmq_socket_pull)'
stop 'error'
@ -469,7 +477,9 @@ subroutine end_zmq_push_socket(zmq_socket_push,thread)
stop 'Unable to set ZMQ_LINGER on push socket'
endif
call omp_set_lock(zmq_lock)
rc = f77_zmq_close(zmq_socket_push)
call omp_unset_lock(zmq_lock)
if (rc /= 0) then
print *, 'f77_zmq_close(zmq_socket_push)'
stop 'error'
@ -500,10 +510,17 @@ subroutine new_parallel_job(zmq_to_qp_run_socket,name_in)
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
call omp_set_lock(zmq_lock)
zmq_context = f77_zmq_ctx_new ()
call omp_unset_lock(zmq_lock)
if (zmq_context == 0_ZMQ_PTR) then
stop 'ZMQ_PTR is null'
endif
! rc = f77_zmq_ctx_set(zmq_context, ZMQ_IO_THREADS, nproc)
! if (rc /= 0) then
! print *, 'Unable to set the number of ZMQ IO threads to', nproc
! endif
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
name = name_in
sze = len(trim(name))
@ -584,7 +601,10 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,name_in)
zmq_state = 'No_state'
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call omp_set_lock(zmq_lock)
rc = f77_zmq_ctx_term(zmq_context)
zmq_context = 0_ZMQ_PTR
call omp_unset_lock(zmq_lock)
if (rc /= 0) then
print *, 'Unable to terminate ZMQ context'
stop 'error'
@ -684,10 +704,43 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task)
character*(*), intent(in) :: task
integer :: rc, sze
character*(512) :: message
character(len=:), allocatable :: message
message='add_task '//trim(zmq_state)//' '//trim(task)
sze = len(message)
rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0)
if (rc /= sze) then
print *, rc, sze
print *, irp_here,': f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)'
stop 'error'
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, sze-1, 0)
if (message(1:rc) /= 'ok') then
print *, trim(task)
print *, 'Unable to add the next task'
stop -1
endif
end
subroutine add_task_to_taskserver_send(zmq_to_qp_run_socket,task)
use f77_zmq
implicit none
BEGIN_DOC
! Get a task from the task server
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
character*(*), intent(in) :: task
integer :: rc, sze
character(len=:), allocatable :: message
sze = len(trim(task))+12+len(trim(zmq_state))
message = repeat(' ',sze)
write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task)
sze = len(trim(message))
rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)
if (rc /= sze) then
print *, rc, sze
@ -695,10 +748,20 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task)
stop 'error'
endif
end
subroutine add_task_to_taskserver_recv(zmq_to_qp_run_socket)
use f77_zmq
implicit none
BEGIN_DOC
! Get a task from the task server
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
integer :: rc, sze
character*(512) :: message
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
message = trim(message(1:rc))
if (trim(message) /= 'ok') then
print *, trim(task)
if (message(1:rc) /= 'ok') then
print *, 'Unable to add the next task'
stop -1
endif
@ -726,8 +789,7 @@ subroutine task_done_to_taskserver(zmq_to_qp_run_socket, worker_id, task_id)
endif
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
message = trim(message(1:rc))
if (trim(message) /= 'ok') then
if (trim(message(1:rc)) /= 'ok') then
print *, 'Unable to send task_done message'
stop -1
endif
@ -752,17 +814,17 @@ subroutine get_task_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id,task)
write(message,*) 'get_task '//trim(zmq_state), worker_id
sze = len(trim(message))
rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)
rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0)
if (rc /= sze) then
print *, irp_here, ':f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)'
stop 'error'
endif
message = repeat(' ',512)
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
message = trim(message(1:rc))
read(message,*) reply
read(message(1:rc),*) reply
if (trim(reply) == 'get_task_reply') then
read(message,*) reply, task_id
read(message(1:rc),*) reply, task_id
rc = 15
do while (message(rc:rc) == ' ')
rc += 1
@ -938,3 +1000,4 @@ subroutine wait_for_states(state_wait,state,n)
end