mirror of
https://github.com/LCPQ/quantum_package
synced 2024-12-22 12:23:48 +01:00
Merge scemama
This commit is contained in:
commit
20f2fff7b2
@ -670,11 +670,13 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
|
||||
if(banned(p1,p2)) cycle
|
||||
if(mat(1, p1, p2) == 0d0) cycle
|
||||
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
|
||||
logical, external :: is_in_wavefunction
|
||||
if (is_in_wavefunction(det,N_int)) then
|
||||
|
||||
if (do_ddci) then
|
||||
logical, external :: is_a_two_holes_two_particles
|
||||
if (is_a_two_holes_two_particles(det)) then
|
||||
cycle
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
|
||||
max_e_pert = 0d0
|
||||
@ -1205,3 +1207,127 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
|
||||
end do genl
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine ZMQ_selection(N_in, pt2)
|
||||
use f77_zmq
|
||||
use selection_types
|
||||
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
integer, intent(in) :: N_in
|
||||
type(selection_buffer) :: b
|
||||
integer :: i, N
|
||||
integer, external :: omp_get_thread_num
|
||||
double precision, intent(out) :: pt2(N_states)
|
||||
integer, parameter :: maxtasks=10000
|
||||
|
||||
|
||||
N = max(N_in,1)
|
||||
if (.True.) then
|
||||
PROVIDE pt2_e0_denominator
|
||||
provide nproc
|
||||
call new_parallel_job(zmq_to_qp_run_socket,"selection")
|
||||
call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator))
|
||||
call create_selection_buffer(N, N*2, b)
|
||||
endif
|
||||
|
||||
character*(20*maxtasks) :: task
|
||||
task = ' '
|
||||
|
||||
integer :: k
|
||||
k=0
|
||||
do i= 1, N_det_generators
|
||||
k = k+1
|
||||
write(task(20*(k-1)+1:20*k),'(I9,1X,I9,''|'')') i, N
|
||||
if (k>=maxtasks) then
|
||||
k=0
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
|
||||
endif
|
||||
enddo
|
||||
if (k > 0) then
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
|
||||
endif
|
||||
call zmq_set_running(zmq_to_qp_run_socket)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1)
|
||||
i = omp_get_thread_num()
|
||||
if (i==0) then
|
||||
call selection_collector(b, pt2)
|
||||
else
|
||||
call selection_slave_inproc(i)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
call end_parallel_job(zmq_to_qp_run_socket, 'selection')
|
||||
if (N_in > 0) then
|
||||
call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN
|
||||
call copy_H_apply_buffer_to_wf()
|
||||
if (s2_eig) then
|
||||
call make_s2_eigenfunction
|
||||
endif
|
||||
call save_wavefunction
|
||||
endif
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine selection_slave_inproc(i)
|
||||
implicit none
|
||||
integer, intent(in) :: i
|
||||
|
||||
call run_selection_slave(1,i,pt2_e0_denominator)
|
||||
end
|
||||
|
||||
subroutine selection_collector(b, pt2)
|
||||
use f77_zmq
|
||||
use selection_types
|
||||
use bitmasks
|
||||
implicit none
|
||||
|
||||
|
||||
type(selection_buffer), intent(inout) :: b
|
||||
double precision, intent(out) :: pt2(N_states)
|
||||
double precision :: pt2_mwen(N_states)
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
|
||||
integer(ZMQ_PTR), external :: new_zmq_pull_socket
|
||||
integer(ZMQ_PTR) :: zmq_socket_pull
|
||||
|
||||
integer :: msg_size, rc, more
|
||||
integer :: acc, i, j, robin, N, ntask
|
||||
double precision, allocatable :: val(:)
|
||||
integer(bit_kind), allocatable :: det(:,:,:)
|
||||
integer, allocatable :: task_id(:)
|
||||
integer :: done
|
||||
real :: time, time0
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
zmq_socket_pull = new_zmq_pull_socket()
|
||||
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det))
|
||||
done = 0
|
||||
more = 1
|
||||
pt2(:) = 0d0
|
||||
call CPU_TIME(time0)
|
||||
do while (more == 1)
|
||||
call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask)
|
||||
pt2 += pt2_mwen
|
||||
do i=1, N
|
||||
call add_to_selection_buffer(b, det(1,1,i), val(i))
|
||||
end do
|
||||
|
||||
do i=1, ntask
|
||||
if(task_id(i) == 0) then
|
||||
print *, "Error in collector"
|
||||
endif
|
||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
|
||||
end do
|
||||
done += ntask
|
||||
call CPU_TIME(time)
|
||||
! print *, "DONE" , done, time - time0
|
||||
end do
|
||||
|
||||
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
call end_zmq_pull_socket(zmq_socket_pull)
|
||||
call sort_selection_buffer(b)
|
||||
end subroutine
|
||||
|
||||
|
580
plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f
Normal file
580
plugins/Full_CI_ZMQ/pt2_stoch_routines.irp.f
Normal file
@ -0,0 +1,580 @@
|
||||
BEGIN_PROVIDER [ integer, fragment_first ]
|
||||
implicit none
|
||||
fragment_first = first_det_of_teeth(1)
|
||||
END_PROVIDER
|
||||
|
||||
subroutine ZMQ_pt2(pt2,relative_error)
|
||||
use f77_zmq
|
||||
use selection_types
|
||||
|
||||
implicit none
|
||||
|
||||
character(len=64000) :: task
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_to_qp_run_socket2
|
||||
type(selection_buffer) :: b
|
||||
integer, external :: omp_get_thread_num
|
||||
double precision, intent(in) :: relative_error
|
||||
double precision, intent(out) :: pt2(N_states)
|
||||
|
||||
|
||||
double precision, allocatable :: pt2_detail(:,:), comb(:)
|
||||
logical, allocatable :: computed(:)
|
||||
integer, allocatable :: tbc(:)
|
||||
integer :: i, j, k, Ncomb, generator_per_task, i_generator_end
|
||||
integer, external :: pt2_find
|
||||
|
||||
double precision :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
|
||||
double precision, external :: omp_get_wtime
|
||||
double precision :: time0, time
|
||||
|
||||
allocate(pt2_detail(N_states, N_det_generators), comb(N_det_generators/2), computed(N_det_generators), tbc(0:size_tbc))
|
||||
sumabove = 0d0
|
||||
sum2above = 0d0
|
||||
Nabove = 0d0
|
||||
|
||||
provide nproc fragment_first fragment_count mo_bielec_integrals_in_map mo_mono_elec_integral pt2_weight
|
||||
|
||||
!call random_seed()
|
||||
|
||||
computed = .false.
|
||||
|
||||
tbc(0) = first_det_of_comb - 1
|
||||
do i=1, tbc(0)
|
||||
tbc(i) = i
|
||||
computed(i) = .true.
|
||||
end do
|
||||
|
||||
pt2_detail = 0d0
|
||||
time0 = omp_get_wtime()
|
||||
print *, "time - avg - err - n_combs"
|
||||
generator_per_task = 1
|
||||
do while(.true.)
|
||||
|
||||
call write_time(6)
|
||||
call new_parallel_job(zmq_to_qp_run_socket,"pt2")
|
||||
call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator))
|
||||
call create_selection_buffer(1, 1*2, b)
|
||||
|
||||
Ncomb=size(comb)
|
||||
call get_carlo_workbatch(computed, comb, Ncomb, tbc)
|
||||
|
||||
call write_time(6)
|
||||
|
||||
|
||||
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
|
||||
integer :: ipos
|
||||
logical :: tasks
|
||||
tasks = .False.
|
||||
ipos=1
|
||||
|
||||
do i=1,tbc(0)
|
||||
if(tbc(i) > fragment_first) then
|
||||
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') 0, tbc(i)
|
||||
ipos += 20
|
||||
if (ipos > 63980) then
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20)))
|
||||
ipos=1
|
||||
tasks = .True.
|
||||
endif
|
||||
else
|
||||
do j=1,fragment_count
|
||||
write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, tbc(i)
|
||||
ipos += 20
|
||||
if (ipos > 63980) then
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20)))
|
||||
ipos=1
|
||||
tasks = .True.
|
||||
endif
|
||||
end do
|
||||
end if
|
||||
end do
|
||||
if (ipos > 1) then
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos-20)))
|
||||
tasks = .True.
|
||||
endif
|
||||
|
||||
if (tasks) then
|
||||
call zmq_set_running(zmq_to_qp_run_socket)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc+1) &
|
||||
!$OMP PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
if (i==0) then
|
||||
call pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2)
|
||||
else
|
||||
call pt2_slave_inproc(i)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
call end_parallel_job(zmq_to_qp_run_socket, 'pt2')
|
||||
|
||||
else
|
||||
pt2 = 0.d0
|
||||
do i=1,N_det_generators
|
||||
do k=1,N_states
|
||||
pt2(k) = pt2(k) + pt2_detail(k,i)
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
|
||||
tbc(0) = 0
|
||||
if (pt2(1) /= 0.d0) then
|
||||
exit
|
||||
endif
|
||||
end do
|
||||
|
||||
deallocate(pt2_detail, comb, computed, tbc)
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine do_carlo(tbc, Ncomb, comb, pt2_detail, computed, sumabove, sum2above, Nabove)
|
||||
integer, intent(in) :: tbc(0:size_tbc), Ncomb
|
||||
logical, intent(in) :: computed(N_det_generators)
|
||||
double precision, intent(in) :: comb(Ncomb), pt2_detail(N_states, N_det_generators)
|
||||
double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
|
||||
integer :: i, dets(comb_teeth)
|
||||
double precision :: myVal, myVal2
|
||||
|
||||
mainLoop : do i=1,Ncomb
|
||||
call get_comb(comb(i), dets, comb_teeth)
|
||||
do j=1,comb_teeth
|
||||
if(.not.(computed(dets(j)))) then
|
||||
exit mainLoop
|
||||
end if
|
||||
end do
|
||||
|
||||
myVal = 0d0
|
||||
myVal2 = 0d0
|
||||
do j=comb_teeth,1,-1
|
||||
myVal += pt2_detail(1, dets(j)) * pt2_weight_inv(dets(j)) * comb_step
|
||||
sumabove(j) += myVal
|
||||
sum2above(j) += myVal*myVal
|
||||
Nabove(j) += 1
|
||||
end do
|
||||
end do mainLoop
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine pt2_slave_inproc(i)
|
||||
implicit none
|
||||
integer, intent(in) :: i
|
||||
|
||||
call run_pt2_slave(1,i,pt2_e0_denominator)
|
||||
end
|
||||
|
||||
subroutine pt2_collector(b, tbc, comb, Ncomb, computed, pt2_detail, sumabove, sum2above, Nabove, relative_error, pt2)
|
||||
use f77_zmq
|
||||
use selection_types
|
||||
use bitmasks
|
||||
implicit none
|
||||
|
||||
|
||||
integer, intent(in) :: Ncomb
|
||||
double precision, intent(inout) :: pt2_detail(N_states, N_det_generators)
|
||||
double precision, intent(in) :: comb(Ncomb), relative_error
|
||||
logical, intent(inout) :: computed(N_det_generators)
|
||||
integer, intent(in) :: tbc(0:size_tbc)
|
||||
double precision, intent(inout) :: sumabove(comb_teeth), sum2above(comb_teeth), Nabove(comb_teeth)
|
||||
double precision, intent(out) :: pt2(N_states)
|
||||
|
||||
|
||||
type(selection_buffer), intent(inout) :: b
|
||||
double precision, allocatable :: pt2_mwen(:,:)
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
|
||||
integer(ZMQ_PTR), external :: new_zmq_pull_socket
|
||||
integer(ZMQ_PTR) :: zmq_socket_pull
|
||||
|
||||
integer :: msg_size, rc, more
|
||||
integer :: acc, i, j, robin, N, ntask
|
||||
double precision, allocatable :: val(:)
|
||||
integer(bit_kind), allocatable :: det(:,:,:)
|
||||
integer, allocatable :: task_id(:)
|
||||
integer :: done, Nindex
|
||||
integer, allocatable :: index(:)
|
||||
double precision, save :: time0 = -1.d0
|
||||
double precision :: time, timeLast
|
||||
double precision, external :: omp_get_wtime
|
||||
integer :: tooth, firstTBDcomb, orgTBDcomb
|
||||
integer, allocatable :: parts_to_get(:)
|
||||
logical, allocatable :: actually_computed(:)
|
||||
|
||||
allocate(actually_computed(N_det_generators), parts_to_get(N_det_generators), &
|
||||
pt2_mwen(N_states, N_det_generators) )
|
||||
do i=1,N_det_generators
|
||||
actually_computed(i) = computed(i)
|
||||
enddo
|
||||
|
||||
parts_to_get(:) = 1
|
||||
if(fragment_first > 0) then
|
||||
do i=1,fragment_first
|
||||
parts_to_get(i) = fragment_count
|
||||
enddo
|
||||
endif
|
||||
|
||||
do i=1,tbc(0)
|
||||
actually_computed(tbc(i)) = .false.
|
||||
end do
|
||||
|
||||
orgTBDcomb = Nabove(1)
|
||||
firstTBDcomb = 1
|
||||
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
zmq_socket_pull = new_zmq_pull_socket()
|
||||
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators), index(1))
|
||||
more = 1
|
||||
if (time0 < 0.d0) then
|
||||
time0 = omp_get_wtime()
|
||||
endif
|
||||
timeLast = time0
|
||||
|
||||
print *, 'N_deterministic = ', first_det_of_teeth(1)-1
|
||||
pullLoop : do while (more == 1)
|
||||
call pull_pt2_results(zmq_socket_pull, Nindex, index, pt2_mwen, task_id, ntask)
|
||||
do i=1,Nindex
|
||||
pt2_detail(1:N_states, index(i)) += pt2_mwen(1:N_states,i)
|
||||
parts_to_get(index(i)) -= 1
|
||||
if(parts_to_get(index(i)) < 0) then
|
||||
print *, i, index(i), parts_to_get(index(i)), Nindex
|
||||
print *, "PARTS ??"
|
||||
print *, parts_to_get
|
||||
stop "PARTS ??"
|
||||
end if
|
||||
if(parts_to_get(index(i)) == 0) actually_computed(index(i)) = .true.
|
||||
end do
|
||||
|
||||
do i=1, ntask
|
||||
if(task_id(i) == 0) then
|
||||
print *, "Error in collector"
|
||||
endif
|
||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
|
||||
end do
|
||||
|
||||
time = omp_get_wtime()
|
||||
|
||||
if(time - timeLast > 1d1 .or. more /= 1) then
|
||||
timeLast = time
|
||||
do i=1, first_det_of_teeth(1)-1
|
||||
if(.not.(actually_computed(i))) then
|
||||
print *, "PT2 : deterministic part not finished"
|
||||
cycle pullLoop
|
||||
end if
|
||||
end do
|
||||
|
||||
double precision :: E0, avg, eqt, prop
|
||||
call do_carlo(tbc, Ncomb+1-firstTBDcomb, comb(firstTBDcomb), pt2_detail, actually_computed, sumabove, sum2above, Nabove)
|
||||
firstTBDcomb = Nabove(1) - orgTBDcomb + 1
|
||||
if(Nabove(1) < 2d0) cycle
|
||||
call get_first_tooth(actually_computed, tooth)
|
||||
|
||||
done = 0
|
||||
do i=first_det_of_teeth(tooth), first_det_of_teeth(tooth+1)-1
|
||||
if(actually_computed(i)) done = done + 1
|
||||
end do
|
||||
|
||||
E0 = sum(pt2_detail(1,:first_det_of_teeth(tooth)-1))
|
||||
prop = ((1d0 - dfloat(comb_teeth - tooth + 1) * comb_step) - pt2_cweight(first_det_of_teeth(tooth)-1))
|
||||
prop = prop * pt2_weight_inv(first_det_of_teeth(tooth))
|
||||
E0 += pt2_detail(1,first_det_of_teeth(tooth)) * prop
|
||||
avg = E0 + (sumabove(tooth) / Nabove(tooth))
|
||||
eqt = sqrt(1d0 / (Nabove(tooth)-1) * abs(sum2above(tooth) / Nabove(tooth) - (sumabove(tooth)/Nabove(tooth))**2))
|
||||
time = omp_get_wtime()
|
||||
if (dabs(eqt/avg) < relative_error) then
|
||||
pt2(1) = avg
|
||||
! exit pullLoop
|
||||
else
|
||||
print "(4(G22.13), 4(I9))", time - time0, avg, eqt, Nabove(tooth), tooth, first_det_of_teeth(tooth)-1, done, first_det_of_teeth(tooth+1)-first_det_of_teeth(tooth)
|
||||
endif
|
||||
end if
|
||||
end do pullLoop
|
||||
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
call end_zmq_pull_socket(zmq_socket_pull)
|
||||
call sort_selection_buffer(b)
|
||||
end subroutine
|
||||
|
||||
integer function pt2_find(v, w, sze, imin, imax)
|
||||
implicit none
|
||||
integer, intent(in) :: sze, imin, imax
|
||||
double precision, intent(in) :: v, w(sze)
|
||||
integer :: i,l,h
|
||||
integer, parameter :: block=64
|
||||
|
||||
l = imin
|
||||
h = imax-1
|
||||
|
||||
do while(h-l >= block)
|
||||
i = ishft(h+l,-1)
|
||||
if(w(i+1) > v) then
|
||||
h = i-1
|
||||
else
|
||||
l = i+1
|
||||
end if
|
||||
end do
|
||||
!DIR$ LOOP COUNT (64)
|
||||
do pt2_find=l,h
|
||||
if(w(pt2_find) >= v) then
|
||||
exit
|
||||
end if
|
||||
end do
|
||||
end function
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, comb_teeth ]
|
||||
implicit none
|
||||
comb_teeth = 100
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
|
||||
subroutine get_first_tooth(computed, first_teeth)
|
||||
implicit none
|
||||
logical, intent(in) :: computed(N_det_generators)
|
||||
integer, intent(out) :: first_teeth
|
||||
integer :: i, first_det
|
||||
|
||||
first_det = 1
|
||||
first_teeth = 1
|
||||
do i=first_det_of_comb, N_det_generators
|
||||
if(.not.(computed(i))) then
|
||||
first_det = i
|
||||
exit
|
||||
end if
|
||||
end do
|
||||
|
||||
do i=comb_teeth, 1, -1
|
||||
if(first_det_of_teeth(i) < first_det) then
|
||||
first_teeth = i
|
||||
exit
|
||||
end if
|
||||
end do
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine get_last_full_tooth(computed, last_tooth)
|
||||
implicit none
|
||||
logical, intent(in) :: computed(N_det_generators)
|
||||
integer, intent(out) :: last_tooth
|
||||
integer :: i, j, missing
|
||||
|
||||
last_tooth = 0
|
||||
combLoop : do i=comb_teeth, 1, -1
|
||||
missing = 1+ ishft(first_det_of_teeth(i+1)-first_det_of_teeth(i),-12) ! /4096
|
||||
do j=first_det_of_teeth(i), first_det_of_teeth(i+1)-1
|
||||
if(.not.computed(j)) then
|
||||
missing -= 1
|
||||
if(missing < 0) cycle combLoop
|
||||
end if
|
||||
end do
|
||||
last_tooth = i
|
||||
exit
|
||||
end do combLoop
|
||||
end subroutine
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, size_tbc ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Size of the tbc array
|
||||
END_DOC
|
||||
size_tbc = N_det_generators + fragment_count*fragment_first
|
||||
END_PROVIDER
|
||||
|
||||
subroutine get_carlo_workbatch(computed, comb, Ncomb, tbc)
|
||||
implicit none
|
||||
integer, intent(inout) :: Ncomb
|
||||
double precision, intent(out) :: comb(Ncomb)
|
||||
integer, intent(inout) :: tbc(0:size_tbc)
|
||||
logical, intent(inout) :: computed(N_det_generators)
|
||||
integer :: i, j, last_full, dets(comb_teeth), tbc_save
|
||||
integer :: icount, n
|
||||
n = tbc(0)
|
||||
icount = 0
|
||||
call RANDOM_NUMBER(comb)
|
||||
do i=1,size(comb)
|
||||
comb(i) = comb(i) * comb_step
|
||||
tbc_save = tbc(0)
|
||||
!DIR$ FORCEINLINE
|
||||
call add_comb(comb(i), computed, tbc, size_tbc, comb_teeth)
|
||||
if (tbc(0) < size(tbc)) then
|
||||
Ncomb = i
|
||||
else
|
||||
tbc(0) = tbc_save
|
||||
return
|
||||
endif
|
||||
icount = icount + tbc(0) - tbc_save
|
||||
if ((i>1000).and.(icount > n)) then
|
||||
call get_filling_teeth(computed, tbc)
|
||||
icount = 0
|
||||
n = ishft(tbc_save,-4)
|
||||
endif
|
||||
enddo
|
||||
call get_filling_teeth(computed, tbc)
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine get_filling_teeth(computed, tbc)
|
||||
implicit none
|
||||
integer, intent(inout) :: tbc(0:size_tbc)
|
||||
logical, intent(inout) :: computed(N_det_generators)
|
||||
integer :: i, j, k, last_full, dets(comb_teeth)
|
||||
|
||||
call get_last_full_tooth(computed, last_full)
|
||||
if(last_full /= 0) then
|
||||
if (tbc(0) > size(tbc) - first_det_of_teeth(last_full+1) -2) then
|
||||
return
|
||||
endif
|
||||
k = tbc(0)+1
|
||||
do j=1,first_det_of_teeth(last_full+1)-1
|
||||
if(.not.(computed(j))) then
|
||||
tbc(k) = j
|
||||
k=k+1
|
||||
computed(j) = .true.
|
||||
end if
|
||||
end do
|
||||
tbc(0) = k-1
|
||||
end if
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine reorder_tbc(tbc)
|
||||
implicit none
|
||||
integer, intent(inout) :: tbc(0:size_tbc)
|
||||
logical, allocatable :: ltbc(:)
|
||||
integer :: i, ci
|
||||
|
||||
allocate(ltbc(size_tbc))
|
||||
ltbc(:) = .false.
|
||||
do i=1,tbc(0)
|
||||
ltbc(tbc(i)) = .true.
|
||||
end do
|
||||
|
||||
ci = 0
|
||||
do i=1,size_tbc
|
||||
if(ltbc(i)) then
|
||||
ci = ci+1
|
||||
tbc(ci) = i
|
||||
end if
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine get_comb(stato, dets, ct)
|
||||
implicit none
|
||||
integer, intent(in) :: ct
|
||||
double precision, intent(in) :: stato
|
||||
integer, intent(out) :: dets(ct)
|
||||
double precision :: curs
|
||||
integer :: j
|
||||
integer, external :: pt2_find
|
||||
|
||||
curs = 1d0 - stato
|
||||
do j = comb_teeth, 1, -1
|
||||
!DIR$ FORCEINLINE
|
||||
dets(j) = pt2_find(curs, pt2_cweight,size(pt2_cweight), first_det_of_teeth(j), first_det_of_teeth(j+1))
|
||||
curs -= comb_step
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine add_comb(comb, computed, tbc, stbc, ct)
|
||||
implicit none
|
||||
integer, intent(in) :: stbc, ct
|
||||
double precision, intent(in) :: comb
|
||||
logical, intent(inout) :: computed(N_det_generators)
|
||||
integer, intent(inout) :: tbc(0:stbc)
|
||||
integer :: i, k, l, dets(ct)
|
||||
|
||||
!DIR$ FORCEINLINE
|
||||
call get_comb(comb, dets, ct)
|
||||
|
||||
k=tbc(0)+1
|
||||
do i = 1, ct
|
||||
l = dets(i)
|
||||
if(.not.(computed(l))) then
|
||||
tbc(k) = l
|
||||
k = k+1
|
||||
computed(l) = .true.
|
||||
end if
|
||||
end do
|
||||
tbc(0) = k-1
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ double precision, pt2_weight, (N_det_generators) ]
|
||||
&BEGIN_PROVIDER [ double precision, pt2_cweight, (N_det_generators) ]
|
||||
&BEGIN_PROVIDER [ double precision, pt2_cweight_cache, (N_det_generators) ]
|
||||
&BEGIN_PROVIDER [ double precision, comb_step ]
|
||||
&BEGIN_PROVIDER [ integer, first_det_of_teeth, (comb_teeth+1) ]
|
||||
&BEGIN_PROVIDER [ integer, first_det_of_comb ]
|
||||
implicit none
|
||||
integer :: i
|
||||
double precision :: norm_left, stato
|
||||
integer, external :: pt2_find
|
||||
|
||||
pt2_weight(1) = psi_coef_generators(1,1)**2
|
||||
pt2_cweight(1) = psi_coef_generators(1,1)**2
|
||||
|
||||
do i=2,N_det_generators
|
||||
pt2_weight(i) = psi_coef_generators(i,1)**2
|
||||
pt2_cweight(i) = pt2_cweight(i-1) + psi_coef_generators(i,1)**2
|
||||
end do
|
||||
|
||||
do i=1,N_det_generators
|
||||
pt2_weight(i) = pt2_weight(i) / pt2_cweight(N_det_generators)
|
||||
pt2_cweight(i) = pt2_cweight(i) / pt2_cweight(N_det_generators)
|
||||
enddo
|
||||
|
||||
norm_left = 1d0
|
||||
|
||||
comb_step = 1d0/dfloat(comb_teeth)
|
||||
first_det_of_comb = 1
|
||||
do i=1,N_det_generators
|
||||
if(pt2_weight(i)/norm_left < comb_step*.5d0) then
|
||||
first_det_of_comb = i
|
||||
exit
|
||||
end if
|
||||
norm_left -= pt2_weight(i)
|
||||
end do
|
||||
|
||||
comb_step = (1d0 - pt2_cweight(first_det_of_comb-1)) * comb_step
|
||||
|
||||
stato = 1d0 - comb_step
|
||||
iloc = N_det_generators
|
||||
do i=comb_teeth, 1, -1
|
||||
integer :: iloc
|
||||
iloc = pt2_find(stato, pt2_cweight, N_det_generators, 1, iloc)
|
||||
first_det_of_teeth(i) = iloc
|
||||
stato -= comb_step
|
||||
end do
|
||||
first_det_of_teeth(comb_teeth+1) = N_det_generators + 1
|
||||
first_det_of_teeth(1) = first_det_of_comb
|
||||
if(first_det_of_teeth(1) /= first_det_of_comb) then
|
||||
print *, 'Error in ', irp_here
|
||||
stop "comb provider"
|
||||
endif
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ double precision, pt2_weight_inv, (N_det_generators) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Inverse of pt2_weight array
|
||||
END_DOC
|
||||
integer :: i
|
||||
do i=1,N_det_generators
|
||||
pt2_weight_inv(i) = 1.d0/pt2_weight(i)
|
||||
enddo
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -46,7 +46,7 @@ subroutine run_selection_slave(thread,iproc,energy)
|
||||
if(buf%N == 0) then
|
||||
! Only first time
|
||||
call create_selection_buffer(N, N*2, buf)
|
||||
call create_selection_buffer(N, N*3, buf2)
|
||||
call create_selection_buffer(N, N*2, buf2)
|
||||
else
|
||||
if(N /= buf%N) stop "N changed... wtf man??"
|
||||
end if
|
||||
@ -66,8 +66,10 @@ subroutine run_selection_slave(thread,iproc,energy)
|
||||
call push_selection_results(zmq_socket_push, pt2, buf, task_id(1), ctask)
|
||||
do i=1,buf%cur
|
||||
call add_to_selection_buffer(buf2, buf%det(1,1,i), buf%val(i))
|
||||
enddo
|
||||
if (buf2%cur == buf2%N) then
|
||||
call sort_selection_buffer(buf2)
|
||||
endif
|
||||
enddo
|
||||
buf%mini = buf2%mini
|
||||
pt2 = 0d0
|
||||
buf%cur = 0
|
||||
|
@ -27,7 +27,7 @@ subroutine add_to_selection_buffer(b, det, val)
|
||||
|
||||
if(dabs(val) >= b%mini) then
|
||||
b%cur += 1
|
||||
b%det(:,:,b%cur) = det(:,:)
|
||||
b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2)
|
||||
b%val(b%cur) = val
|
||||
if(b%cur == size(b%val)) then
|
||||
call sort_selection_buffer(b)
|
||||
@ -41,29 +41,32 @@ subroutine sort_selection_buffer(b)
|
||||
implicit none
|
||||
|
||||
type(selection_buffer), intent(inout) :: b
|
||||
double precision, allocatable :: vals(:), absval(:)
|
||||
double precision, allocatable:: absval(:)
|
||||
integer, allocatable :: iorder(:)
|
||||
integer(bit_kind), allocatable :: detmp(:,:,:)
|
||||
double precision, pointer :: vals(:)
|
||||
integer(bit_kind), pointer :: detmp(:,:,:)
|
||||
integer :: i, nmwen
|
||||
logical, external :: detEq
|
||||
nmwen = min(b%N, b%cur)
|
||||
|
||||
|
||||
allocate(iorder(b%cur), detmp(N_int, 2, nmwen), absval(b%cur), vals(nmwen))
|
||||
allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)), absval(b%cur), vals(size(b%val)))
|
||||
absval = -dabs(b%val(:b%cur))
|
||||
do i=1,b%cur
|
||||
iorder(i) = i
|
||||
end do
|
||||
call dsort(absval, iorder, b%cur)
|
||||
|
||||
do i=1, nmwen
|
||||
detmp(:,:,i) = b%det(:,:,iorder(i))
|
||||
detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
|
||||
detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))
|
||||
vals(i) = b%val(iorder(i))
|
||||
end do
|
||||
b%det(:,:,:nmwen) = detmp(:,:,:)
|
||||
b%det(:,:,nmwen+1:) = 0_bit_kind
|
||||
b%val(:nmwen) = vals(:)
|
||||
b%val(nmwen+1:) = 0d0
|
||||
do i=nmwen+1, size(vals)
|
||||
vals(i) = 0.d0
|
||||
enddo
|
||||
deallocate(b%det, b%val)
|
||||
b%det => detmp
|
||||
b%val => vals
|
||||
b%mini = max(b%mini,dabs(b%val(b%N)))
|
||||
b%cur = nmwen
|
||||
end subroutine
|
||||
|
@ -12,8 +12,8 @@ program selection_slave
|
||||
end
|
||||
|
||||
subroutine provide_everything
|
||||
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context mo_mono_elec_integral
|
||||
! PROVIDE pt2_e0_denominator mo_tot_num N_int
|
||||
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context
|
||||
PROVIDE pt2_e0_denominator mo_tot_num N_int fragment_count
|
||||
end
|
||||
|
||||
subroutine run_wf
|
||||
@ -23,7 +23,7 @@ subroutine run_wf
|
||||
integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
double precision :: energy(N_states)
|
||||
character*(64) :: states(2)
|
||||
character*(64) :: states(4)
|
||||
integer :: rc, i
|
||||
|
||||
call provide_everything
|
||||
@ -31,6 +31,7 @@ subroutine run_wf
|
||||
zmq_context = f77_zmq_ctx_new ()
|
||||
states(1) = 'selection'
|
||||
states(2) = 'davidson'
|
||||
states(3) = 'pt2'
|
||||
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
|
||||
@ -52,7 +53,7 @@ subroutine run_wf
|
||||
|
||||
!$OMP PARALLEL PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
call selection_slave_tcp(i, energy)
|
||||
call run_selection_slave(0,i,energy)
|
||||
!$OMP END PARALLEL
|
||||
print *, 'Selection done'
|
||||
|
||||
@ -62,46 +63,29 @@ subroutine run_wf
|
||||
! --------
|
||||
|
||||
print *, 'Davidson'
|
||||
call davidson_miniserver_get()
|
||||
!$OMP PARALLEL PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
call davidson_slave_tcp(i)
|
||||
!$OMP END PARALLEL
|
||||
print *, 'Davidson done'
|
||||
|
||||
else if (trim(zmq_state) == 'pt2') then
|
||||
|
||||
! PT2
|
||||
! ---
|
||||
|
||||
print *, 'PT2'
|
||||
call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
|
||||
|
||||
logical :: lstop
|
||||
lstop = .False.
|
||||
!$OMP PARALLEL PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
call run_pt2_slave(0,i,energy,lstop)
|
||||
!$OMP END PARALLEL
|
||||
print *, 'PT2 done'
|
||||
|
||||
endif
|
||||
|
||||
end do
|
||||
end
|
||||
|
||||
subroutine update_energy(energy)
|
||||
implicit none
|
||||
double precision, intent(in) :: energy(N_states)
|
||||
BEGIN_DOC
|
||||
! Update energy when it is received from ZMQ
|
||||
END_DOC
|
||||
integer :: j,k
|
||||
do j=1,N_states
|
||||
do k=1,N_det
|
||||
CI_eigenvectors(k,j) = psi_coef(k,j)
|
||||
enddo
|
||||
enddo
|
||||
call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int)
|
||||
if (.True.) then
|
||||
do k=1,N_states
|
||||
ci_electronic_energy(k) = energy(k)
|
||||
enddo
|
||||
TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors
|
||||
endif
|
||||
|
||||
call write_double(6,ci_energy,'Energy')
|
||||
end
|
||||
|
||||
subroutine selection_slave_tcp(i,energy)
|
||||
implicit none
|
||||
double precision, intent(in) :: energy(N_states)
|
||||
integer, intent(in) :: i
|
||||
|
||||
call run_selection_slave(0,i,energy)
|
||||
end
|
||||
|
||||
|
126
plugins/Full_CI_ZMQ/zmq_selection.irp.f
Normal file
126
plugins/Full_CI_ZMQ/zmq_selection.irp.f
Normal file
@ -0,0 +1,126 @@
|
||||
subroutine ZMQ_selection(N_in, pt2)
|
||||
use f77_zmq
|
||||
use selection_types
|
||||
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
integer, intent(in) :: N_in
|
||||
type(selection_buffer) :: b
|
||||
integer :: i, N
|
||||
integer, external :: omp_get_thread_num
|
||||
double precision, intent(out) :: pt2(N_states)
|
||||
integer, parameter :: maxtasks=10000
|
||||
|
||||
|
||||
PROVIDE fragment_count
|
||||
|
||||
N = max(N_in,1)
|
||||
if (.True.) then
|
||||
PROVIDE pt2_e0_denominator
|
||||
provide nproc
|
||||
call new_parallel_job(zmq_to_qp_run_socket,"selection")
|
||||
call zmq_put_psi(zmq_to_qp_run_socket,1,pt2_e0_denominator,size(pt2_e0_denominator))
|
||||
call create_selection_buffer(N, N*2, b)
|
||||
endif
|
||||
|
||||
character*(20*maxtasks) :: task
|
||||
task = ' '
|
||||
|
||||
integer :: k
|
||||
k=0
|
||||
do i= 1, N_det_generators
|
||||
k = k+1
|
||||
write(task(20*(k-1)+1:20*k),'(I9,1X,I9,''|'')') i, N
|
||||
if (k>=maxtasks) then
|
||||
k=0
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
|
||||
endif
|
||||
end do
|
||||
if (k > 0) then
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,task)
|
||||
endif
|
||||
call zmq_set_running(zmq_to_qp_run_socket)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(shared) SHARED(b, pt2) PRIVATE(i) NUM_THREADS(nproc+1)
|
||||
i = omp_get_thread_num()
|
||||
if (i==0) then
|
||||
call selection_collector(b, pt2)
|
||||
else
|
||||
call selection_slave_inproc(i)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
call end_parallel_job(zmq_to_qp_run_socket, 'selection')
|
||||
if (N_in > 0) then
|
||||
call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
|
||||
call copy_H_apply_buffer_to_wf()
|
||||
if (s2_eig) then
|
||||
call make_s2_eigenfunction
|
||||
endif
|
||||
call save_wavefunction
|
||||
endif
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine selection_slave_inproc(i)
|
||||
implicit none
|
||||
integer, intent(in) :: i
|
||||
|
||||
call run_selection_slave(1,i,pt2_e0_denominator)
|
||||
end
|
||||
|
||||
subroutine selection_collector(b, pt2)
|
||||
use f77_zmq
|
||||
use selection_types
|
||||
use bitmasks
|
||||
implicit none
|
||||
|
||||
|
||||
type(selection_buffer), intent(inout) :: b
|
||||
double precision, intent(out) :: pt2(N_states)
|
||||
double precision :: pt2_mwen(N_states)
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
|
||||
integer(ZMQ_PTR), external :: new_zmq_pull_socket
|
||||
integer(ZMQ_PTR) :: zmq_socket_pull
|
||||
|
||||
integer :: msg_size, rc, more
|
||||
integer :: acc, i, j, robin, N, ntask
|
||||
double precision, allocatable :: val(:)
|
||||
integer(bit_kind), allocatable :: det(:,:,:)
|
||||
integer, allocatable :: task_id(:)
|
||||
integer :: done
|
||||
real :: time, time0
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
zmq_socket_pull = new_zmq_pull_socket()
|
||||
allocate(val(b%N), det(N_int, 2, b%N), task_id(N_det_generators))
|
||||
done = 0
|
||||
more = 1
|
||||
pt2(:) = 0d0
|
||||
call CPU_TIME(time0)
|
||||
do while (more == 1)
|
||||
call pull_selection_results(zmq_socket_pull, pt2_mwen, val(1), det(1,1,1), N, task_id, ntask)
|
||||
pt2 += pt2_mwen
|
||||
do i=1, N
|
||||
call add_to_selection_buffer(b, det(1,1,i), val(i))
|
||||
end do
|
||||
|
||||
do i=1, ntask
|
||||
if(task_id(i) == 0) then
|
||||
print *, "Error in collector"
|
||||
endif
|
||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more)
|
||||
end do
|
||||
done += ntask
|
||||
call CPU_TIME(time)
|
||||
! print *, "DONE" , done, time - time0
|
||||
end do
|
||||
|
||||
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
call end_zmq_pull_socket(zmq_socket_pull)
|
||||
call sort_selection_buffer(b)
|
||||
end subroutine
|
||||
|
@ -90,13 +90,13 @@ subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
|
||||
psi_det_size = psi_det_size_read
|
||||
TOUCH psi_det_size N_det N_states
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,0)
|
||||
if (rc /= N_int*2*N_det*bit_kind) then
|
||||
print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,0)
|
||||
if (rc /= psi_det_size*N_states*8) then
|
||||
print *, '77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)'
|
||||
stop 'error'
|
||||
|
@ -7,13 +7,13 @@ default: 1.e-12
|
||||
[n_states_diag]
|
||||
type: States_number
|
||||
doc: Number of states to consider during the Davdison diagonalization
|
||||
default: 10
|
||||
default: 4
|
||||
interface: ezfio,provider,ocaml
|
||||
|
||||
[davidson_sze_max]
|
||||
type: Strictly_positive_int
|
||||
doc: Number of micro-iterations before re-contracting
|
||||
default: 10
|
||||
default: 8
|
||||
interface: ezfio,provider,ocaml
|
||||
|
||||
[state_following]
|
||||
|
@ -1,191 +1,6 @@
|
||||
|
||||
!brought to you by garniroy inc.
|
||||
|
||||
use bitmasks
|
||||
use f77_zmq
|
||||
|
||||
subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
|
||||
implicit none
|
||||
|
||||
|
||||
integer , intent(in) :: blockb, bs, blockb2, istep
|
||||
integer , intent(inout) :: N
|
||||
integer , intent(inout) :: idx(bs)
|
||||
double precision , intent(inout) :: vt(N_states_diag, bs)
|
||||
double precision , intent(inout) :: st(N_states_diag, bs)
|
||||
|
||||
integer :: i,ii, j, sh, sh2, exa, ext, org_i, org_j, istate, ni, endi
|
||||
integer(bit_kind) :: sorted_i(N_int)
|
||||
double precision :: s2, hij
|
||||
logical, allocatable :: wrotten(:)
|
||||
|
||||
allocate(wrotten(bs))
|
||||
wrotten = .false.
|
||||
PROVIDE dav_det
|
||||
|
||||
ii=0
|
||||
sh = blockb
|
||||
do sh2=1,shortcut_(0,1)
|
||||
exa = 0
|
||||
do ni=1,N_int
|
||||
exa = exa + popcnt(xor(version_(ni,sh,1), version_(ni,sh2,1)))
|
||||
end do
|
||||
if(exa > 2) cycle
|
||||
|
||||
do i=blockb2+shortcut_(sh,1),shortcut_(sh+1,1)-1, istep
|
||||
ii = i - shortcut_(blockb,1) + 1
|
||||
|
||||
org_i = sort_idx_(i,1)
|
||||
do ni=1,N_int
|
||||
sorted_i(ni) = sorted_(ni,i,1)
|
||||
enddo
|
||||
|
||||
do j=shortcut_(sh2,1), shortcut_(sh2+1,1)-1
|
||||
if(i == j) cycle
|
||||
org_j = sort_idx_(j,1)
|
||||
ext = exa
|
||||
do ni=1,N_int
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted_(ni,j,1)))
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
vt (:,ii) = 0d0
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
|
||||
if (blockb <= shortcut_(0,2)) then
|
||||
sh=blockb
|
||||
do sh2=sh, shortcut_(0,2), shortcut_(0,1)
|
||||
do i=blockb2+shortcut_(sh2,2),shortcut_(sh2+1,2)-1, istep
|
||||
ii += 1
|
||||
org_i = sort_idx_(i,2)
|
||||
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
|
||||
if(i == j) cycle
|
||||
org_j = sort_idx_(j,2)
|
||||
ext = 0
|
||||
do ni=1,N_int
|
||||
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
|
||||
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
|
||||
if(.not. wrotten(ii)) then
|
||||
wrotten(ii) = .true.
|
||||
idx(ii) = org_i
|
||||
vt (:,ii) = 0d0
|
||||
st (:,ii) = 0d0
|
||||
end if
|
||||
do istate=1,N_states_diag
|
||||
vt (istate,ii) += hij*dav_ut(istate,org_j)
|
||||
st (istate,ii) += s2*dav_ut(istate,org_j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
endif
|
||||
|
||||
N=0
|
||||
do i=1,bs
|
||||
if(wrotten(i)) then
|
||||
N += 1
|
||||
idx(N) = idx(i)
|
||||
vt(:,N) = vt(:,i)
|
||||
st(:,N) = st(:,i)
|
||||
end if
|
||||
end do
|
||||
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
|
||||
subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
|
||||
implicit none
|
||||
|
||||
|
||||
integer , intent(in) :: N
|
||||
integer , intent(in) :: idx(N)
|
||||
double precision , intent(in) :: vt(N_states_diag, N)
|
||||
double precision , intent(in) :: st(N_states_diag, N)
|
||||
double precision , intent(inout) :: v0t(N_states_diag,dav_size)
|
||||
double precision , intent(inout) :: s0t(N_states_diag,dav_size)
|
||||
|
||||
integer :: i, j, k
|
||||
|
||||
!DIR$ IVDEP
|
||||
do i=1,N
|
||||
k = idx(i)
|
||||
!DIR$ IVDEP
|
||||
do j=1,N_states_diag
|
||||
v0t(j,k) = v0t(j,k) + vt(j,i)
|
||||
s0t(j,k) = s0t(j,k) + st(j,i)
|
||||
enddo
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
|
||||
integer, intent(in) :: n, n_st_8
|
||||
double precision, intent(in) :: ut(n_st_8,n)
|
||||
integer :: i,k
|
||||
|
||||
|
||||
dav_size = n
|
||||
touch dav_size
|
||||
|
||||
do i=1,n
|
||||
do k=1,N_int
|
||||
dav_det(k,1,i) = psi_det(k,1,i)
|
||||
dav_det(k,2,i) = psi_det(k,2,i)
|
||||
enddo
|
||||
enddo
|
||||
do i=1,n
|
||||
do k=1,N_states_diag
|
||||
dav_ut(k,i) = ut(k,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
touch dav_det dav_ut
|
||||
|
||||
call new_parallel_job(zmq_to_qp_run_socket,"davidson")
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket
|
||||
integer ,intent(in) :: blockb, blockb2, istep
|
||||
character*(512) :: task
|
||||
|
||||
|
||||
write(task,*) blockb, blockb2, istep
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket, task)
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_slave_inproc(i)
|
||||
implicit none
|
||||
@ -211,8 +26,6 @@ subroutine davidson_run_slave(thread,iproc)
|
||||
integer, intent(in) :: thread, iproc
|
||||
|
||||
integer :: worker_id, task_id, blockb
|
||||
character*(512) :: task
|
||||
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
|
||||
@ -231,7 +44,7 @@ subroutine davidson_run_slave(thread,iproc)
|
||||
return
|
||||
end if
|
||||
|
||||
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
||||
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
|
||||
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
call end_zmq_push_socket(zmq_socket_push,thread)
|
||||
@ -239,338 +52,346 @@ end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
|
||||
subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, worker_id)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR),intent(in) :: zmq_socket_push
|
||||
integer,intent(in) :: worker_id
|
||||
integer,intent(in) :: worker_id, N_st, sze
|
||||
integer :: task_id
|
||||
character*(512) :: task
|
||||
character*(512) :: msg
|
||||
integer :: imin, imax, ishift, istep
|
||||
|
||||
double precision, allocatable :: v_0(:,:), s_0(:,:), u_t(:,:)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_0, s_0
|
||||
|
||||
integer :: blockb, blockb2, istep
|
||||
integer :: N
|
||||
integer , allocatable :: idx(:)
|
||||
double precision , allocatable :: vt(:,:)
|
||||
double precision , allocatable :: st(:,:)
|
||||
! Get wave function (u_t)
|
||||
! -----------------------
|
||||
|
||||
integer :: bs, i, j
|
||||
|
||||
allocate(idx(1), vt(1,1), st(1,1))
|
||||
|
||||
do
|
||||
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
|
||||
if(task_id == 0) exit
|
||||
read (task,*) blockb, blockb2, istep
|
||||
bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1)
|
||||
do i=blockb, shortcut_(0,2), shortcut_(0,1)
|
||||
do j=i, min(i, shortcut_(0,2))
|
||||
bs += shortcut_(j+1,2) - shortcut_(j, 2)
|
||||
end do
|
||||
end do
|
||||
if(bs > size(idx)) then
|
||||
deallocate(idx, vt, st)
|
||||
allocate(idx(bs))
|
||||
allocate(vt(N_states_diag, bs))
|
||||
allocate(st(N_states_diag, bs))
|
||||
integer :: rc
|
||||
write(msg, *) 'get_psi ', worker_id
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
|
||||
if (rc /= len(trim(msg))) then
|
||||
print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
call davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
|
||||
if (msg(1:13) /= 'get_psi_reply') then
|
||||
print *, rc, trim(msg)
|
||||
print *, 'Error in get_psi_reply'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
integer :: N_states_read, N_det_read, psi_det_size_read
|
||||
integer :: N_det_selectors_read, N_det_generators_read
|
||||
double precision :: energy(N_st)
|
||||
|
||||
read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, &
|
||||
N_det_generators_read, N_det_selectors_read
|
||||
|
||||
if (rc /= worker_id) then
|
||||
print *, 'Wrong worker ID'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
if (N_states_read /= N_st) then
|
||||
print *, N_st
|
||||
stop 'error : N_st'
|
||||
endif
|
||||
|
||||
if (N_det_read /= N_det) then
|
||||
N_det = N_det_read
|
||||
TOUCH N_det
|
||||
endif
|
||||
|
||||
|
||||
allocate(v_0(sze,N_st), s_0(sze,N_st),u_t(N_st,N_det))
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,0)
|
||||
if (rc /= N_int*2*N_det*bit_kind) then
|
||||
print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)*8,0)
|
||||
if (rc /= size(u_t)*8) then
|
||||
print *, rc, size(u_t)*8
|
||||
print *, 'f77_zmq_recv(zmq_to_qp_run_socket,u_t,size(u_t)×8,0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)
|
||||
if (rc /= N_st*8) then
|
||||
print *, '77_zmq_recv(zmq_to_qp_run_socket,energy,N_st*8,0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
! Run tasks
|
||||
! ---------
|
||||
|
||||
do
|
||||
v_0 = 0.d0
|
||||
s_0 = 0.d0
|
||||
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
|
||||
if(task_id == 0) exit
|
||||
read (msg,*) imin, imax, ishift, istep
|
||||
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,N_det,imin,imax,ishift,istep)
|
||||
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
|
||||
call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
|
||||
call davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
|
||||
end do
|
||||
deallocate(v_0, s_0, u_t)
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id)
|
||||
subroutine davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
|
||||
integer ,intent(in) :: task_id
|
||||
|
||||
integer ,intent(in) :: blockb, blocke
|
||||
integer ,intent(in) :: N
|
||||
integer ,intent(in) :: idx(N)
|
||||
double precision ,intent(in) :: vt(N_states_diag, N)
|
||||
double precision ,intent(in) :: st(N_states_diag, N)
|
||||
double precision ,intent(in) :: v_0(N_det,N_states_diag)
|
||||
double precision ,intent(in) :: s_0(N_det,N_states_diag)
|
||||
integer :: rc
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, blockb, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "davidson_push_results failed to push blockb"
|
||||
rc = f77_zmq_send( zmq_socket_push, v_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push vt"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, blocke, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "davidson_push_results failed to push blocke"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, N, 4, ZMQ_SNDMORE)
|
||||
if(rc /= 4) stop "davidson_push_results failed to push N"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, idx, 4*N, ZMQ_SNDMORE)
|
||||
if(rc /= 4*N) stop "davidson_push_results failed to push idx"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, vt, 8*N_states_diag* N, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push vt"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, st, 8*N_states_diag* N, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to push st"
|
||||
rc = f77_zmq_send( zmq_socket_push, s_0, 8*N_states_diag*N_det, ZMQ_SNDMORE)
|
||||
if(rc /= 8*N_states_diag* N_det) stop "davidson_push_results failed to push st"
|
||||
|
||||
rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0)
|
||||
if(rc /= 4) stop "davidson_push_results failed to push task_id"
|
||||
|
||||
! Activate is zmq_socket_push is a REQ
|
||||
integer :: idummy
|
||||
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
|
||||
if (rc /= 4) then
|
||||
print *, irp_here, ': f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
|
||||
subroutine davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
|
||||
integer ,intent(out) :: task_id
|
||||
integer ,intent(out) :: blockb, blocke
|
||||
integer ,intent(out) :: N
|
||||
integer ,intent(out) :: idx(*)
|
||||
double precision ,intent(out) :: vt(N_states_diag, *)
|
||||
double precision ,intent(out) :: st(N_states_diag, *)
|
||||
double precision ,intent(out) :: v_0(N_det,N_states_diag)
|
||||
double precision ,intent(out) :: s_0(N_det,N_states_diag)
|
||||
|
||||
integer :: rc
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, blockb, 4, 0)
|
||||
if(rc /= 4) stop "davidson_push_results failed to pull blockb"
|
||||
rc = f77_zmq_recv( zmq_socket_pull, v_0, 8*N_det*N_states_diag, 0)
|
||||
if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull v_0"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, blocke, 4, 0)
|
||||
if(rc /= 4) stop "davidson_push_results failed to pull blocke"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
|
||||
if(rc /= 4) stop "davidson_push_results failed to pull N"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, idx, 4*N, 0)
|
||||
if(rc /= 4*N) stop "davidson_push_results failed to pull idx"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, vt, 8*N_states_diag* N, 0)
|
||||
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull vt"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, st, 8*N_states_diag* N, 0)
|
||||
if(rc /= 8*N_states_diag* N) stop "davidson_push_results failed to pull st"
|
||||
rc = f77_zmq_recv( zmq_socket_pull, s_0, 8*N_det*N_states_diag, 0)
|
||||
if(rc /= 8*N_det*N_states_diag) stop "davidson_push_results failed to pull s_0"
|
||||
|
||||
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
|
||||
if(rc /= 4) stop "davidson_pull_results failed to pull task_id"
|
||||
|
||||
! Activate if zmq_socket_pull is a REP
|
||||
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
|
||||
if (rc /= 4) then
|
||||
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LDA)
|
||||
subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze, N_st)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer :: LDA
|
||||
integer, intent(in) :: sze, N_st
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
|
||||
|
||||
double precision ,intent(inout) :: v0(LDA, N_states_diag)
|
||||
double precision ,intent(inout) :: s0(LDA, N_states_diag)
|
||||
double precision ,intent(inout) :: v0(sze, N_st)
|
||||
double precision ,intent(inout) :: s0(sze, N_st)
|
||||
|
||||
integer :: more, task_id, taskn
|
||||
|
||||
integer :: blockb, blocke
|
||||
integer :: N
|
||||
integer , allocatable :: idx(:)
|
||||
double precision , allocatable :: vt(:,:), v0t(:,:), s0t(:,:)
|
||||
double precision , allocatable :: st(:,:)
|
||||
|
||||
integer :: msize
|
||||
|
||||
msize = (1 + max_blocksize)*2
|
||||
allocate(idx(msize))
|
||||
allocate(vt(N_states_diag, msize))
|
||||
allocate(st(N_states_diag, msize))
|
||||
allocate(v0t(N_states_diag, dav_size))
|
||||
allocate(s0t(N_states_diag, dav_size))
|
||||
|
||||
v0t = 00.d0
|
||||
s0t = 00.d0
|
||||
|
||||
more = 1
|
||||
|
||||
do while (more == 1)
|
||||
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
|
||||
!DIR$ FORCEINLINE
|
||||
call davidson_collect(N, idx, vt, st , v0t, s0t)
|
||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
||||
end do
|
||||
deallocate(idx,vt,st)
|
||||
integer :: more, task_id
|
||||
|
||||
double precision, allocatable :: v_0(:,:), s_0(:,:)
|
||||
integer :: i,j
|
||||
!DIR$ IVDEP
|
||||
do j=1,N_states_diag
|
||||
!DIR$ IVDEP
|
||||
do i=1,dav_size
|
||||
v0(i,j) = v0t(j,i)
|
||||
s0(i,j) = s0t(j,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(v0t,s0t)
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
|
||||
integer :: LDA
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR) :: zmq_collector
|
||||
integer(ZMQ_PTR), external :: new_zmq_pull_socket
|
||||
integer(ZMQ_PTR) :: zmq_socket_pull
|
||||
|
||||
integer :: i
|
||||
integer, external :: omp_get_thread_num
|
||||
|
||||
double precision , intent(inout) :: v0(LDA, N_states_diag)
|
||||
double precision , intent(inout) :: s0(LDA, N_states_diag)
|
||||
|
||||
call zmq_set_running(zmq_to_qp_run_socket)
|
||||
|
||||
zmq_collector = new_zmq_to_qp_run_socket()
|
||||
allocate(v_0(N_det,N_st), s_0(N_det,N_st))
|
||||
v0 = 0.d0
|
||||
s0 = 0.d0
|
||||
more = 1
|
||||
zmq_socket_pull = new_zmq_pull_socket()
|
||||
i = omp_get_thread_num()
|
||||
|
||||
|
||||
PROVIDE nproc
|
||||
|
||||
!$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
if (i == 0 ) then
|
||||
call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
|
||||
call end_zmq_to_qp_run_socket(zmq_collector)
|
||||
do while (more == 1)
|
||||
call davidson_pull_results(zmq_socket_pull, v_0, s_0, task_id)
|
||||
do j=1,N_st
|
||||
do i=1,N_det
|
||||
v0(i,j) = v0(i,j) + v_0(i,j)
|
||||
s0(i,j) = s0(i,j) + s_0(i,j)
|
||||
enddo
|
||||
enddo
|
||||
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
|
||||
end do
|
||||
deallocate(v_0,s_0)
|
||||
call end_zmq_pull_socket(zmq_socket_pull)
|
||||
call davidson_miniserver_end()
|
||||
else if (i == 1 ) then
|
||||
call davidson_miniserver_run ()
|
||||
else
|
||||
call davidson_slave_inproc(i)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
|
||||
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
subroutine davidson_miniserver_run()
|
||||
|
||||
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
|
||||
use omp_lib
|
||||
use bitmasks
|
||||
use f77_zmq
|
||||
implicit none
|
||||
integer(ZMQ_PTR) responder
|
||||
character*(64) address
|
||||
character(len=:), allocatable :: buffer
|
||||
integer rc
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
||||
!
|
||||
! n : number of determinants
|
||||
!
|
||||
! H_jj : array of <j|H|j>
|
||||
!
|
||||
! S2_jj : array of <j|S^2|j>
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st, sze
|
||||
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
|
||||
double precision, intent(inout):: u_0(sze,N_st)
|
||||
integer :: i,j,k
|
||||
integer :: ithread
|
||||
double precision, allocatable :: u_t(:,:)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
|
||||
|
||||
allocate (character(len=20) :: buffer)
|
||||
address = 'tcp://*:11223'
|
||||
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
|
||||
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
|
||||
PROVIDE ref_bitmask_energy nproc
|
||||
|
||||
responder = f77_zmq_socket(zmq_context, ZMQ_REP)
|
||||
rc = f77_zmq_bind(responder,address)
|
||||
|
||||
do
|
||||
rc = f77_zmq_recv(responder, buffer, 5, 0)
|
||||
if (buffer(1:rc) /= 'end') then
|
||||
rc = f77_zmq_send (responder, dav_size, 4, ZMQ_SNDMORE)
|
||||
rc = f77_zmq_send (responder, dav_det, 16*N_int*dav_size, ZMQ_SNDMORE)
|
||||
rc = f77_zmq_send (responder, dav_ut, 8*dav_size*N_states_diag, 0)
|
||||
else
|
||||
rc = f77_zmq_send (responder, "end", 3, 0)
|
||||
exit
|
||||
allocate(u_t(N_st,N_det))
|
||||
do k=1,N_st
|
||||
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
|
||||
enddo
|
||||
call dtranspose( &
|
||||
u_0, &
|
||||
size(u_0, 1), &
|
||||
u_t, &
|
||||
size(u_t, 1), &
|
||||
N_det, N_st)
|
||||
|
||||
|
||||
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
|
||||
|
||||
if(N_st /= N_states_diag .or. sze < N_det) stop "assert fail in H_S2_u_0_nstates"
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
ASSERT (n>0)
|
||||
|
||||
call new_parallel_job(zmq_to_qp_run_socket,'davidson')
|
||||
|
||||
character*(512) :: task
|
||||
integer :: rc
|
||||
double precision :: energy(N_st)
|
||||
energy = 0.d0
|
||||
|
||||
task = ' '
|
||||
write(task,*) 'put_psi ', 1, N_st, N_det, N_det
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket,trim(task),len(trim(task)),ZMQ_SNDMORE)
|
||||
if (rc /= len(trim(task))) then
|
||||
print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(task),len(trim(task)),ZMQ_SNDMORE)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)
|
||||
if (rc /= N_int*2*N_det*bit_kind) then
|
||||
print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket,u_t,size(u_t)*8,ZMQ_SNDMORE)
|
||||
if (rc /= size(u_t)*8) then
|
||||
print *, 'f77_zmq_send(zmq_to_qp_run_socket,u_t,size(u_t)*8,ZMQ_SNDMORE)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket,energy,N_st*8,0)
|
||||
if (rc /= N_st*8) then
|
||||
print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket,task,len(task),0)
|
||||
if (task(1:rc) /= 'put_psi_reply 1') then
|
||||
print *, rc, trim(task)
|
||||
print *, 'Error in put_psi_reply'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
deallocate(u_t)
|
||||
|
||||
|
||||
! Create tasks
|
||||
! ============
|
||||
|
||||
integer :: istep, imin, imax, ishift
|
||||
double precision :: w, max_workload, N_det_inv, di
|
||||
max_workload = 1000000.d0
|
||||
w = 0.d0
|
||||
istep=8
|
||||
ishift=0
|
||||
imin=1
|
||||
N_det_inv = 1.d0/dble(N_det)
|
||||
di = dble(N_det)
|
||||
do imax=1,N_det
|
||||
di = di-1.d0
|
||||
w = w + di*N_det_inv
|
||||
if (w > max_workload) then
|
||||
do ishift=0,istep-1
|
||||
write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task))
|
||||
enddo
|
||||
imin = imax+1
|
||||
w = 0.d0
|
||||
endif
|
||||
enddo
|
||||
|
||||
rc = f77_zmq_close(responder)
|
||||
end subroutine
|
||||
if (w > 0.d0) then
|
||||
imax = N_det
|
||||
do ishift=0,istep-1
|
||||
write(task,'(4(I9,1X),1A)') imin, imax, ishift, istep, '|'
|
||||
call add_task_to_taskserver(zmq_to_qp_run_socket,trim(task))
|
||||
enddo
|
||||
endif
|
||||
|
||||
|
||||
subroutine davidson_miniserver_end()
|
||||
implicit none
|
||||
use f77_zmq
|
||||
v_0 = 0.d0
|
||||
s_0 = 0.d0
|
||||
|
||||
integer(ZMQ_PTR) requester
|
||||
character*(64) address
|
||||
integer rc
|
||||
character*(64) buf
|
||||
call omp_set_nested(.True.)
|
||||
call zmq_set_running(zmq_to_qp_run_socket)
|
||||
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
|
||||
ithread = omp_get_thread_num()
|
||||
if (ithread == 0 ) then
|
||||
call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, N_det, N_st)
|
||||
else
|
||||
call davidson_slave_inproc(1)
|
||||
endif
|
||||
!$OMP END PARALLEL
|
||||
call end_parallel_job(zmq_to_qp_run_socket, 'davidson')
|
||||
|
||||
address = trim(qp_run_address)//':11223'
|
||||
requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
|
||||
rc = f77_zmq_connect(requester,address)
|
||||
|
||||
rc = f77_zmq_send(requester, "end", 3, 0)
|
||||
rc = f77_zmq_recv(requester, buf, 3, 0)
|
||||
rc = f77_zmq_close(requester)
|
||||
end subroutine
|
||||
|
||||
|
||||
subroutine davidson_miniserver_get()
|
||||
implicit none
|
||||
use f77_zmq
|
||||
|
||||
integer(ZMQ_PTR) requester
|
||||
character*(64) address
|
||||
character*(20) buffer
|
||||
integer rc
|
||||
|
||||
address = trim(qp_run_address)//':11223'
|
||||
|
||||
requester = f77_zmq_socket(zmq_context, ZMQ_REQ)
|
||||
rc = f77_zmq_connect(requester,address)
|
||||
|
||||
rc = f77_zmq_send(requester, "Hello", 5, 0)
|
||||
rc = f77_zmq_recv(requester, dav_size, 4, 0)
|
||||
TOUCH dav_size
|
||||
rc = f77_zmq_recv(requester, dav_det, 16*N_int*dav_size, 0)
|
||||
rc = f77_zmq_recv(requester, dav_ut, 8*dav_size*N_states_diag, 0)
|
||||
TOUCH dav_det dav_ut
|
||||
|
||||
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer(bit_kind), dav_det, (N_int, 2, dav_size) ]
|
||||
&BEGIN_PROVIDER [ double precision, dav_ut, (N_states_diag, dav_size) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Temporary arrays for parallel davidson
|
||||
!
|
||||
! Touched in davidson_miniserver_get
|
||||
END_DOC
|
||||
dav_det = 0_bit_kind
|
||||
dav_ut = -huge(1.d0)
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, dav_size ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Size of the arrays for Davidson
|
||||
!
|
||||
! Touched in davidson_miniserver_get
|
||||
END_DOC
|
||||
dav_size = 1
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, shortcut_, (0:dav_size+1, 2) ]
|
||||
&BEGIN_PROVIDER [ integer(bit_kind), version_, (N_int, dav_size, 2) ]
|
||||
&BEGIN_PROVIDER [ integer(bit_kind), sorted_, (N_int, dav_size, 2) ]
|
||||
&BEGIN_PROVIDER [ integer, sort_idx_, (dav_size, 2) ]
|
||||
&BEGIN_PROVIDER [ integer, max_blocksize ]
|
||||
implicit none
|
||||
call sort_dets_ab_v(dav_det, sorted_(1,1,1), sort_idx_(1,1), shortcut_(0,1), version_(1,1,1), dav_size, N_int)
|
||||
call sort_dets_ba_v(dav_det, sorted_(1,1,2), sort_idx_(1,2), shortcut_(0,2), version_(1,1,2), dav_size, N_int)
|
||||
max_blocksize = max(shortcut_(0,1), shortcut_(0,2))
|
||||
END_PROVIDER
|
||||
do k=1,N_st
|
||||
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||
enddo
|
||||
end
|
||||
|
||||
|
||||
|
@ -16,24 +16,17 @@ program davidson_slave
|
||||
state = 'Waiting'
|
||||
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
|
||||
do
|
||||
call wait_for_state(zmq_state,state)
|
||||
if(trim(state) /= "davidson") exit
|
||||
call davidson_miniserver_get()
|
||||
|
||||
integer :: rc, i
|
||||
|
||||
print *, 'Davidson slave running'
|
||||
|
||||
!$OMP PARALLEL PRIVATE(i)
|
||||
i = omp_get_thread_num()
|
||||
call davidson_slave_tcp(i)
|
||||
!$OMP END PARALLEL
|
||||
end do
|
||||
end
|
||||
|
||||
subroutine provide_everything
|
||||
PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states_diag zmq_context
|
||||
PROVIDE mo_bielec_integrals_in_map psi_det_sorted_bit N_states_diag zmq_context ref_bitmask_energy
|
||||
end subroutine
|
||||
|
||||
|
||||
|
@ -302,7 +302,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
|
||||
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
|
||||
double precision, intent(out) :: energies(N_st_diag)
|
||||
|
||||
integer :: sze_8
|
||||
integer :: iter
|
||||
integer :: i,j,k,l,m
|
||||
logical :: converged
|
||||
@ -365,13 +364,12 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
|
||||
write(iunit,'(A)') trim(write_buffer)
|
||||
|
||||
integer, external :: align_double
|
||||
sze_8 = align_double(sze)
|
||||
|
||||
allocate( &
|
||||
kl_pairs(2,N_st_diag*(N_st_diag+1)/2), &
|
||||
W(sze_8,N_st_diag,davidson_sze_max), &
|
||||
U(sze_8,N_st_diag,davidson_sze_max), &
|
||||
R(sze_8,N_st_diag), &
|
||||
W(sze,N_st_diag,davidson_sze_max), &
|
||||
U(sze,N_st_diag,davidson_sze_max), &
|
||||
R(sze,N_st_diag), &
|
||||
h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), &
|
||||
y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), &
|
||||
residual_norm(N_st_diag), &
|
||||
@ -426,7 +424,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
|
||||
! Compute |W_k> = \sum_i |i><i|H|u_k>
|
||||
! -----------------------------------------
|
||||
|
||||
call H_u_0_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,N_st_diag,sze_8)
|
||||
call H_u_0_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,N_st_diag,sze)
|
||||
! do k=1,N_st
|
||||
! if(store_full_H_mat.and.sze.le.n_det_max_stored)then
|
||||
! call H_u_0_stored(W(1,k,iter),U(1,k,iter),H_matrix_all_dets,sze)
|
||||
|
@ -23,41 +23,33 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
|
||||
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
|
||||
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
|
||||
double precision, intent(out) :: energies(N_st_diag), s2_out(N_st_diag)
|
||||
double precision, allocatable :: H_jj(:), S2_jj(:)
|
||||
double precision, allocatable :: H_jj(:)
|
||||
|
||||
double precision :: diag_h_mat_elem
|
||||
double precision :: diag_H_mat_elem, diag_S_mat_elem
|
||||
integer :: i
|
||||
ASSERT (N_st > 0)
|
||||
ASSERT (sze > 0)
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
PROVIDE mo_bielec_integrals_in_map
|
||||
allocate(H_jj(sze), S2_jj(sze))
|
||||
allocate(H_jj(sze) )
|
||||
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP SHARED(sze,H_jj,S2_jj, dets_in,Nint) &
|
||||
!$OMP SHARED(sze,H_jj, dets_in,Nint) &
|
||||
!$OMP PRIVATE(i)
|
||||
!$OMP DO SCHEDULE(guided)
|
||||
!$OMP DO SCHEDULE(static)
|
||||
do i=1,sze
|
||||
H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint)
|
||||
call get_s2(dets_in(1,1,i),dets_in(1,1,i),Nint,S2_jj(i))
|
||||
H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
if (disk_based_davidson) then
|
||||
call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
|
||||
else
|
||||
call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
|
||||
endif
|
||||
do i=1,N_st_diag
|
||||
s2_out(i) = S2_jj(i)
|
||||
enddo
|
||||
deallocate (H_jj,S2_jj)
|
||||
call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
|
||||
deallocate (H_jj)
|
||||
end
|
||||
|
||||
|
||||
subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
|
||||
subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
@ -65,7 +57,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
!
|
||||
! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
|
||||
!
|
||||
! S2_jj : specific diagonal S^2 matrix elements
|
||||
! S2_out : Output : s^2
|
||||
!
|
||||
! dets_in : bitmasks corresponding to determinants
|
||||
!
|
||||
@ -87,12 +79,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint
|
||||
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
|
||||
double precision, intent(in) :: H_jj(sze)
|
||||
double precision, intent(inout) :: S2_jj(sze)
|
||||
double precision, intent(inout) :: s2_out(N_st_diag)
|
||||
integer, intent(in) :: iunit
|
||||
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
|
||||
double precision, intent(out) :: energies(N_st_diag)
|
||||
|
||||
integer :: sze_8
|
||||
integer :: iter
|
||||
integer :: i,j,k,l,m
|
||||
logical :: converged
|
||||
@ -122,7 +113,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
stop -1
|
||||
endif
|
||||
|
||||
PROVIDE nuclear_repulsion expected_s2
|
||||
integer, external :: align_double
|
||||
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
|
||||
|
||||
PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
|
||||
|
||||
call write_time(iunit)
|
||||
call wall_time(wall)
|
||||
@ -134,6 +128,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
call write_int(iunit,N_st,'Number of states')
|
||||
call write_int(iunit,N_st_diag,'Number of states in diagonalization')
|
||||
call write_int(iunit,sze,'Number of determinants')
|
||||
r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
|
||||
+ 4.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
|
||||
call write_double(iunit, r1, 'Memory(Gb)')
|
||||
write(iunit,'(A)') ''
|
||||
write_buffer = '===== '
|
||||
do i=1,N_st
|
||||
@ -151,14 +148,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
enddo
|
||||
write(iunit,'(A)') trim(write_buffer)
|
||||
|
||||
integer, external :: align_double
|
||||
sze_8 = align_double(sze)
|
||||
|
||||
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
|
||||
allocate( &
|
||||
W(sze_8,N_st_diag*itermax), &
|
||||
U(sze_8,N_st_diag*itermax), &
|
||||
S(sze_8,N_st_diag*itermax), &
|
||||
! Large
|
||||
W(sze,N_st_diag*itermax), &
|
||||
U(sze,N_st_diag*itermax), &
|
||||
S(sze,N_st_diag*itermax), &
|
||||
|
||||
! Small
|
||||
h(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
y(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
s_(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
@ -223,8 +220,11 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
! -----------------------------------------
|
||||
|
||||
|
||||
! call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
|
||||
call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
|
||||
if (distributed_davidson) then
|
||||
call H_S2_u_0_nstates_zmq (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
|
||||
else
|
||||
call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
|
||||
endif
|
||||
|
||||
|
||||
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
|
||||
@ -400,7 +400,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
endif
|
||||
enddo
|
||||
|
||||
write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(1:3,1:N_st)
|
||||
write(iunit,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter, to_print(1:3,1:N_st)
|
||||
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
|
||||
do k=1,N_st
|
||||
if (residual_norm(k) > 1.e8) then
|
||||
@ -424,7 +424,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
|
||||
do k=1,N_st_diag
|
||||
energies(k) = lambda(k)
|
||||
S2_jj(k) = s2(k)
|
||||
s2_out(k) = s2(k)
|
||||
enddo
|
||||
write_buffer = '===== '
|
||||
do i=1,N_st
|
||||
@ -444,439 +444,3 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
|
||||
)
|
||||
end
|
||||
|
||||
subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
|
||||
use bitmasks
|
||||
use mmap_module
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Davidson diagonalization with specific diagonal elements of the H matrix
|
||||
!
|
||||
! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
|
||||
!
|
||||
! S2_jj : specific diagonal S^2 matrix elements
|
||||
!
|
||||
! dets_in : bitmasks corresponding to determinants
|
||||
!
|
||||
! u_in : guess coefficients on the various states. Overwritten
|
||||
! on exit
|
||||
!
|
||||
! dim_in : leftmost dimension of u_in
|
||||
!
|
||||
! sze : Number of determinants
|
||||
!
|
||||
! N_st : Number of eigenstates
|
||||
!
|
||||
! N_st_diag : Number of states in which H is diagonalized. Assumed > sze
|
||||
!
|
||||
! iunit : Unit for the I/O
|
||||
!
|
||||
! Initial guess vectors are not necessarily orthonormal
|
||||
END_DOC
|
||||
integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint
|
||||
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
|
||||
double precision, intent(in) :: H_jj(sze)
|
||||
double precision, intent(inout) :: S2_jj(sze)
|
||||
integer, intent(in) :: iunit
|
||||
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
|
||||
double precision, intent(out) :: energies(N_st_diag)
|
||||
|
||||
integer :: sze_8
|
||||
integer :: iter
|
||||
integer :: i,j,k,l,m
|
||||
logical :: converged
|
||||
|
||||
double precision :: u_dot_v, u_dot_u
|
||||
|
||||
integer :: k_pairs, kl
|
||||
|
||||
integer :: iter2
|
||||
double precision, pointer :: W(:,:), U(:,:), S(:,:), overlap(:,:)
|
||||
double precision, allocatable :: y(:,:), h(:,:), lambda(:), s2(:)
|
||||
double precision, allocatable :: c(:), s_(:,:), s_tmp(:,:)
|
||||
double precision :: diag_h_mat_elem
|
||||
double precision, allocatable :: residual_norm(:)
|
||||
character*(16384) :: write_buffer
|
||||
double precision :: to_print(3,N_st)
|
||||
double precision :: cpu, wall
|
||||
logical :: state_ok(N_st_diag*davidson_sze_max)
|
||||
integer :: shift, shift2, itermax
|
||||
include 'constants.include.F'
|
||||
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
|
||||
if (N_st_diag*3 > sze) then
|
||||
print *, 'error in Davidson :'
|
||||
print *, 'Increase n_det_max_jacobi to ', N_st_diag*3
|
||||
stop -1
|
||||
endif
|
||||
|
||||
PROVIDE nuclear_repulsion expected_s2
|
||||
|
||||
call write_time(iunit)
|
||||
call wall_time(wall)
|
||||
call cpu_time(cpu)
|
||||
write(iunit,'(A)') ''
|
||||
write(iunit,'(A)') 'Davidson Diagonalization'
|
||||
write(iunit,'(A)') '------------------------'
|
||||
write(iunit,'(A)') ''
|
||||
call write_int(iunit,N_st,'Number of states')
|
||||
call write_int(iunit,N_st_diag,'Number of states in diagonalization')
|
||||
call write_int(iunit,sze,'Number of determinants')
|
||||
write(iunit,'(A)') ''
|
||||
write_buffer = '===== '
|
||||
do i=1,N_st
|
||||
write_buffer = trim(write_buffer)//' ================ =========== ==========='
|
||||
enddo
|
||||
write(iunit,'(A)') trim(write_buffer)
|
||||
write_buffer = ' Iter'
|
||||
do i=1,N_st
|
||||
write_buffer = trim(write_buffer)//' Energy S^2 Residual '
|
||||
enddo
|
||||
write(iunit,'(A)') trim(write_buffer)
|
||||
write_buffer = '===== '
|
||||
do i=1,N_st
|
||||
write_buffer = trim(write_buffer)//' ================ =========== ==========='
|
||||
enddo
|
||||
write(iunit,'(A)') trim(write_buffer)
|
||||
|
||||
integer, external :: align_double
|
||||
integer :: fd(3)
|
||||
type(c_ptr) :: c_pointer(3)
|
||||
sze_8 = align_double(sze)
|
||||
|
||||
itermax = min(davidson_sze_max, sze/N_st_diag)
|
||||
|
||||
call mmap( &
|
||||
trim(ezfio_work_dir)//'U', &
|
||||
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
|
||||
8, fd(1), .False., c_pointer(1))
|
||||
call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) )
|
||||
|
||||
call mmap( &
|
||||
trim(ezfio_work_dir)//'W', &
|
||||
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
|
||||
8, fd(2), .False., c_pointer(2))
|
||||
call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) )
|
||||
|
||||
call mmap( &
|
||||
trim(ezfio_work_dir)//'S', &
|
||||
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
|
||||
8, fd(3), .False., c_pointer(3))
|
||||
call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) )
|
||||
|
||||
allocate( &
|
||||
h(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
y(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
s_(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
s_tmp(N_st_diag*itermax,N_st_diag*itermax), &
|
||||
overlap(N_st_diag*itermax, N_st_diag*itermax), &
|
||||
residual_norm(N_st_diag), &
|
||||
c(N_st_diag*itermax), &
|
||||
s2(N_st_diag*itermax), &
|
||||
lambda(N_st_diag*itermax))
|
||||
|
||||
h = 0.d0
|
||||
U = 0.d0
|
||||
W = 0.d0
|
||||
S = 0.d0
|
||||
y = 0.d0
|
||||
s_ = 0.d0
|
||||
s_tmp = 0.d0
|
||||
|
||||
|
||||
ASSERT (N_st > 0)
|
||||
ASSERT (N_st_diag >= N_st)
|
||||
ASSERT (sze > 0)
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
|
||||
! Davidson iterations
|
||||
! ===================
|
||||
|
||||
converged = .False.
|
||||
|
||||
double precision :: r1, r2
|
||||
do k=N_st+1,N_st_diag
|
||||
u_in(k,k) = 10.d0
|
||||
do i=1,sze
|
||||
call random_number(r1)
|
||||
r1 = dsqrt(-2.d0*dlog(r1))
|
||||
r2 = dtwo_pi*r2
|
||||
u_in(i,k) = r1*dcos(r2)
|
||||
enddo
|
||||
enddo
|
||||
do k=1,N_st_diag
|
||||
call normalize(u_in(1,k),sze)
|
||||
enddo
|
||||
|
||||
|
||||
do while (.not.converged)
|
||||
|
||||
do k=1,N_st_diag
|
||||
do i=1,sze
|
||||
U(i,k) = u_in(i,k)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
do iter=1,itermax-1
|
||||
|
||||
shift = N_st_diag*(iter-1)
|
||||
shift2 = N_st_diag*iter
|
||||
|
||||
call ortho_qr(U,size(U,1),sze,shift2)
|
||||
|
||||
! Compute |W_k> = \sum_i |i><i|H|u_k>
|
||||
! -----------------------------------------
|
||||
|
||||
|
||||
! call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
|
||||
call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
|
||||
|
||||
|
||||
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
|
||||
! -------------------------------------------
|
||||
|
||||
do k=1,iter
|
||||
shift = N_st_diag*(k-1)
|
||||
call dgemm('T','N', N_st_diag, shift2, sze, &
|
||||
1.d0, U(1,shift+1), size(U,1), W, size(W,1), &
|
||||
0.d0, h(shift+1,1), size(h,1))
|
||||
|
||||
call dgemm('T','N', N_st_diag, shift2, sze, &
|
||||
1.d0, U(1,shift+1), size(U,1), S, size(S,1), &
|
||||
0.d0, s_(shift+1,1), size(s_,1))
|
||||
enddo
|
||||
|
||||
! ! Diagonalize S^2
|
||||
! ! ---------------
|
||||
!
|
||||
! call lapack_diag(s2,y,s_,size(s_,1),shift2)
|
||||
!
|
||||
!
|
||||
! ! Rotate H in the basis of eigenfunctions of s2
|
||||
! ! ---------------------------------------------
|
||||
!
|
||||
! call dgemm('N','N',shift2,shift2,shift2, &
|
||||
! 1.d0, h, size(h,1), y, size(y,1), &
|
||||
! 0.d0, s_tmp, size(s_tmp,1))
|
||||
!
|
||||
! call dgemm('T','N',shift2,shift2,shift2, &
|
||||
! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
|
||||
! 0.d0, h, size(h,1))
|
||||
!
|
||||
! ! Damp interaction between different spin states
|
||||
! ! ------------------------------------------------
|
||||
!
|
||||
! do k=1,shift2
|
||||
! do l=1,shift2
|
||||
! if (dabs(s2(k) - s2(l)) > 1.d0) then
|
||||
! h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
|
||||
! endif
|
||||
! enddo
|
||||
! enddo
|
||||
!
|
||||
! ! Rotate back H
|
||||
! ! -------------
|
||||
!
|
||||
! call dgemm('N','T',shift2,shift2,shift2, &
|
||||
! 1.d0, h, size(h,1), y, size(y,1), &
|
||||
! 0.d0, s_tmp, size(s_tmp,1))
|
||||
!
|
||||
! call dgemm('N','N',shift2,shift2,shift2, &
|
||||
! 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
|
||||
! 0.d0, h, size(h,1))
|
||||
|
||||
|
||||
! Diagonalize h
|
||||
! -------------
|
||||
call lapack_diag(lambda,y,h,size(h,1),shift2)
|
||||
|
||||
! Compute S2 for each eigenvector
|
||||
! -------------------------------
|
||||
|
||||
call dgemm('N','N',shift2,shift2,shift2, &
|
||||
1.d0, s_, size(s_,1), y, size(y,1), &
|
||||
0.d0, s_tmp, size(s_tmp,1))
|
||||
|
||||
call dgemm('T','N',shift2,shift2,shift2, &
|
||||
1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
|
||||
0.d0, s_, size(s_,1))
|
||||
|
||||
|
||||
|
||||
do k=1,shift2
|
||||
s2(k) = s_(k,k) + S_z2_Sz
|
||||
enddo
|
||||
|
||||
|
||||
if (s2_eig) then
|
||||
do k=1,shift2
|
||||
state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
|
||||
enddo
|
||||
else
|
||||
state_ok(k) = .True.
|
||||
endif
|
||||
|
||||
do k=1,shift2
|
||||
if (.not. state_ok(k)) then
|
||||
do l=k+1,shift2
|
||||
if (state_ok(l)) then
|
||||
call dswap(shift2, y(1,k), 1, y(1,l), 1)
|
||||
call dswap(1, s2(k), 1, s2(l), 1)
|
||||
call dswap(1, lambda(k), 1, lambda(l), 1)
|
||||
state_ok(k) = .True.
|
||||
state_ok(l) = .False.
|
||||
exit
|
||||
endif
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
|
||||
if (state_following) then
|
||||
|
||||
! Compute overlap with U_in
|
||||
! -------------------------
|
||||
|
||||
integer :: order(N_st_diag)
|
||||
double precision :: cmax
|
||||
overlap = -1.d0
|
||||
do k=1,shift2
|
||||
do i=1,shift2
|
||||
overlap(k,i) = dabs(y(k,i))
|
||||
enddo
|
||||
enddo
|
||||
do k=1,N_st
|
||||
cmax = -1.d0
|
||||
do i=1,shift2
|
||||
if (overlap(i,k) > cmax) then
|
||||
cmax = overlap(i,k)
|
||||
order(k) = i
|
||||
endif
|
||||
enddo
|
||||
do i=1,shift2
|
||||
overlap(order(k),i) = -1.d0
|
||||
enddo
|
||||
enddo
|
||||
overlap = y
|
||||
do k=1,N_st
|
||||
l = order(k)
|
||||
if (k /= l) then
|
||||
y(1:shift2,k) = overlap(1:shift2,l)
|
||||
endif
|
||||
enddo
|
||||
do k=1,N_st
|
||||
overlap(k,1) = lambda(k)
|
||||
overlap(k,2) = s2(k)
|
||||
enddo
|
||||
do k=1,N_st
|
||||
l = order(k)
|
||||
if (k /= l) then
|
||||
lambda(k) = overlap(l,1)
|
||||
s2(k) = overlap(l,2)
|
||||
endif
|
||||
enddo
|
||||
|
||||
endif
|
||||
|
||||
|
||||
! Express eigenvectors of h in the determinant basis
|
||||
! --------------------------------------------------
|
||||
|
||||
call dgemm('N','N', sze, N_st_diag, shift2, &
|
||||
1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
|
||||
call dgemm('N','N', sze, N_st_diag, shift2, &
|
||||
1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
|
||||
call dgemm('N','N', sze, N_st_diag, shift2, &
|
||||
1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1))
|
||||
|
||||
! Compute residual vector and davidson step
|
||||
! -----------------------------------------
|
||||
|
||||
do k=1,N_st_diag
|
||||
if (state_ok(k)) then
|
||||
do i=1,sze
|
||||
U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) &
|
||||
* (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
|
||||
)/max(H_jj(i) - lambda (k),1.d-2)
|
||||
enddo
|
||||
else
|
||||
! Randomize components with bad <S2>
|
||||
do i=1,sze-2,2
|
||||
call random_number(r1)
|
||||
call random_number(r2)
|
||||
r1 = dsqrt(-2.d0*dlog(r1))
|
||||
r2 = dtwo_pi*r2
|
||||
U(i,shift2+k) = r1*dcos(r2)
|
||||
U(i+1,shift2+k) = r1*dsin(r2)
|
||||
enddo
|
||||
do i=sze-2+1,sze
|
||||
call random_number(r1)
|
||||
call random_number(r2)
|
||||
r1 = dsqrt(-2.d0*dlog(r1))
|
||||
r2 = dtwo_pi*r2
|
||||
U(i,shift2+k) = r1*dcos(r2)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if (k <= N_st) then
|
||||
residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
|
||||
to_print(1,k) = lambda(k) + nuclear_repulsion
|
||||
to_print(2,k) = s2(k)
|
||||
to_print(3,k) = residual_norm(k)
|
||||
endif
|
||||
enddo
|
||||
|
||||
write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))') iter, to_print(1:3,1:N_st)
|
||||
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
|
||||
do k=1,N_st
|
||||
if (residual_norm(k) > 1.e8) then
|
||||
print *, ''
|
||||
stop 'Davidson failed'
|
||||
endif
|
||||
enddo
|
||||
if (converged) then
|
||||
exit
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
! Re-contract to u_in
|
||||
! -----------
|
||||
|
||||
call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, &
|
||||
U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
|
||||
|
||||
enddo
|
||||
|
||||
do k=1,N_st_diag
|
||||
energies(k) = lambda(k)
|
||||
S2_jj(k) = s2(k)
|
||||
enddo
|
||||
write_buffer = '===== '
|
||||
do i=1,N_st
|
||||
write_buffer = trim(write_buffer)//' ================ =========== ==========='
|
||||
enddo
|
||||
write(iunit,'(A)') trim(write_buffer)
|
||||
write(iunit,'(A)') ''
|
||||
call write_time(iunit)
|
||||
|
||||
call munmap( &
|
||||
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
|
||||
8, fd(1), c_pointer(1))
|
||||
|
||||
call munmap( &
|
||||
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
|
||||
8, fd(2), c_pointer(2))
|
||||
|
||||
call munmap( &
|
||||
(/ int(sze_8,8),int(N_st_diag*itermax,8) /), &
|
||||
8, fd(3), c_pointer(3))
|
||||
|
||||
deallocate ( &
|
||||
residual_norm, &
|
||||
c, overlap, &
|
||||
h, &
|
||||
y, s_, s_tmp, &
|
||||
lambda &
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -67,7 +67,6 @@ END_PROVIDER
|
||||
size(CI_eigenvectors,1),CI_electronic_energy, &
|
||||
N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,output_determinants)
|
||||
|
||||
|
||||
else if (diag_algorithm == "Lapack") then
|
||||
|
||||
allocate (eigenvectors(size(H_matrix_all_dets,1),N_det))
|
||||
|
@ -1,4 +1,4 @@
|
||||
subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
|
||||
subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
@ -7,166 +7,20 @@ subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
|
||||
! n : number of determinants
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: n,Nint, N_st, sze_8
|
||||
integer, intent(in) :: n,Nint, N_st, sze
|
||||
double precision, intent(out) :: e_0(N_st)
|
||||
double precision, intent(in) :: u_0(sze_8,N_st)
|
||||
double precision, intent(inout):: u_0(sze,N_st)
|
||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||
|
||||
double precision, allocatable :: H_jj(:), v_0(:,:)
|
||||
double precision, allocatable :: v_0(:,:), s_0(:,:)
|
||||
double precision :: u_dot_u,u_dot_v,diag_H_mat_elem
|
||||
integer :: i,j
|
||||
allocate (H_jj(n), v_0(sze_8,N_st))
|
||||
do i = 1, n
|
||||
H_jj(i) = diag_H_mat_elem(keys_tmp(1,1,i),Nint)
|
||||
enddo
|
||||
|
||||
call H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
allocate (v_0(sze,N_st),s_0(sze,N_st))
|
||||
call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
|
||||
do i=1,N_st
|
||||
e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
|
||||
enddo
|
||||
deallocate (H_jj, v_0)
|
||||
end
|
||||
|
||||
|
||||
subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0>
|
||||
!
|
||||
! n : number of determinants
|
||||
!
|
||||
! H_jj : array of <j|H|j>
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st,n,Nint, sze_8
|
||||
double precision, intent(out) :: v_0(sze_8,N_st)
|
||||
double precision, intent(in) :: u_0(sze_8,N_st)
|
||||
double precision, intent(in) :: H_jj(n)
|
||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||
double precision :: hij
|
||||
double precision, allocatable :: vt(:,:)
|
||||
double precision, allocatable :: ut(:,:)
|
||||
integer :: i,j,k,l, jj,ii
|
||||
integer :: i0, j0
|
||||
|
||||
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
|
||||
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
|
||||
integer(bit_kind) :: sorted_i(Nint)
|
||||
|
||||
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
||||
integer :: N_st_8
|
||||
|
||||
integer, external :: align_double
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
|
||||
|
||||
N_st_8 = align_double(N_st)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
ASSERT (n>0)
|
||||
PROVIDE ref_bitmask_energy
|
||||
|
||||
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
|
||||
allocate(ut(N_st_8,n))
|
||||
|
||||
v_0 = 0.d0
|
||||
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
|
||||
!$OMP SHARED(n,H_jj,keys_tmp,ut,Nint,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
|
||||
allocate(vt(N_st_8,n))
|
||||
Vt = 0.d0
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do sh=1,shortcut(0,1)
|
||||
do sh2=1,shortcut(0,1)
|
||||
exa = popcnt(xor(version(1,sh,1), version(1,sh2,1)))
|
||||
if(exa > 2) then
|
||||
cycle
|
||||
end if
|
||||
do ni=2,Nint
|
||||
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
|
||||
end do
|
||||
if(exa > 2) then
|
||||
cycle
|
||||
end if
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
enddo
|
||||
|
||||
jloop: do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
|
||||
org_j = sort_idx(j,1)
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if(ext > 4) then
|
||||
cycle jloop
|
||||
endif
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if(ext > 4) then
|
||||
cycle jloop
|
||||
endif
|
||||
end do
|
||||
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
|
||||
do istate=1,N_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
|
||||
enddo
|
||||
enddo jloop
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do sh=1,shortcut(0,2)
|
||||
do i=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_i = sort_idx(i,2)
|
||||
do j=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_j = sort_idx(j,2)
|
||||
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
|
||||
end do
|
||||
if(ext /= 4) then
|
||||
cycle
|
||||
endif
|
||||
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
|
||||
do istate=1,N_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
|
||||
enddo
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP CRITICAL
|
||||
do istate=1,N_st
|
||||
do i=n,1,-1
|
||||
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END CRITICAL
|
||||
|
||||
deallocate(vt)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
v_0(i,istate) += H_jj(i) * u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
deallocate (shortcut, sort_idx, sorted, version, ut)
|
||||
deallocate (s_0, v_0)
|
||||
end
|
||||
|
||||
BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
|
||||
@ -178,338 +32,411 @@ BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ]
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
|
||||
subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
|
||||
use bitmasks
|
||||
use f77_zmq
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
||||
!
|
||||
! n : number of determinants
|
||||
! Assumes that the determinants are in psi_det
|
||||
!
|
||||
! H_jj : array of <j|H|j>
|
||||
!
|
||||
! S2_jj : array of <j|S^2|j>
|
||||
! istart, iend, ishift, istep are used in ZMQ parallelization.
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st,n,Nint, sze_8
|
||||
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st)
|
||||
double precision, intent(in) :: u_0(sze_8,N_st)
|
||||
double precision, intent(in) :: H_jj(n), S2_jj(n)
|
||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||
double precision :: hij,s2
|
||||
double precision, allocatable :: ut(:,:)
|
||||
integer :: i,j,k,l, jj,ii
|
||||
integer :: i0, j0
|
||||
|
||||
integer, allocatable :: shortcut(:,:), sort_idx(:)
|
||||
integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
|
||||
integer(bit_kind) :: sorted_i(Nint)
|
||||
|
||||
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
||||
integer :: N_st_8
|
||||
|
||||
integer, external :: align_double
|
||||
integer :: blockb, blockb2, istep
|
||||
double precision :: ave_workload, workload, target_workload_inv
|
||||
|
||||
integer(ZMQ_PTR) :: handler
|
||||
|
||||
if(N_st /= N_states_diag .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates"
|
||||
N_st_8 = N_st ! align_double(N_st)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
ASSERT (n>0)
|
||||
PROVIDE ref_bitmask_energy
|
||||
|
||||
allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n))
|
||||
allocate(ut(N_st_8,n))
|
||||
|
||||
integer, intent(in) :: N_st,sze
|
||||
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
|
||||
integer :: k
|
||||
double precision, allocatable :: u_t(:,:)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
|
||||
allocate(u_t(N_st,N_det))
|
||||
do k=1,N_st
|
||||
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
|
||||
enddo
|
||||
v_0 = 0.d0
|
||||
s_0 = 0.d0
|
||||
call dtranspose( &
|
||||
u_0, &
|
||||
size(u_0, 1), &
|
||||
u_t, &
|
||||
size(u_t, 1), &
|
||||
N_det, N_st)
|
||||
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
|
||||
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,1,N_det,0,1)
|
||||
deallocate(u_t)
|
||||
|
||||
blockb = shortcut(0,1)
|
||||
call davidson_init(handler,n,N_st_8,ut)
|
||||
|
||||
|
||||
ave_workload = 0.d0
|
||||
do sh=1,shortcut(0,1)
|
||||
ave_workload += shortcut(0,1)
|
||||
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
ave_workload = ave_workload/dble(shortcut(0,1))
|
||||
target_workload_inv = 0.001d0/ave_workload
|
||||
|
||||
|
||||
do sh=1,shortcut(0,1),1
|
||||
workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
|
||||
do i=sh, shortcut(0,2), shortcut(0,1)
|
||||
do j=i, min(i, shortcut(0,2))
|
||||
workload += (shortcut(j+1,2) - shortcut(j, 2))**2
|
||||
end do
|
||||
end do
|
||||
istep = 1+ int(workload*target_workload_inv)
|
||||
do blockb2=0, istep-1
|
||||
call davidson_add_task(handler, sh, blockb2, istep)
|
||||
enddo
|
||||
do k=1,N_st
|
||||
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||
call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
|
||||
enddo
|
||||
|
||||
call davidson_run(handler, v_0, s_0, size(v_0,1))
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
|
||||
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
deallocate(shortcut, sort_idx, sorted, version)
|
||||
deallocate(ut)
|
||||
end
|
||||
|
||||
|
||||
|
||||
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
|
||||
subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
||||
!
|
||||
! n : number of determinants
|
||||
!
|
||||
! H_jj : array of <j|H|j>
|
||||
!
|
||||
! S2_jj : array of <j|S^2|j>
|
||||
! Default should be 1,N_det,0,1
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st,n,Nint, sze_8
|
||||
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st)
|
||||
double precision, intent(in) :: u_0(sze_8,N_st)
|
||||
double precision, intent(in) :: H_jj(n), S2_jj(n)
|
||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||
double precision :: hij,s2
|
||||
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
|
||||
integer :: i,j,k,l, jj,ii
|
||||
integer :: i0, j0
|
||||
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
|
||||
double precision, intent(in) :: u_t(N_st,N_det)
|
||||
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
|
||||
|
||||
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
|
||||
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
|
||||
integer(bit_kind) :: sorted_i(Nint)
|
||||
|
||||
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
||||
integer :: N_st_8
|
||||
PROVIDE ref_bitmask_energy N_int
|
||||
|
||||
integer, external :: align_double
|
||||
integer :: blockb, blockb2, istep
|
||||
double precision :: ave_workload, workload, target_workload_inv
|
||||
select case (N_int)
|
||||
case (1)
|
||||
call H_S2_u_0_nstates_openmp_work_1(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
case (2)
|
||||
call H_S2_u_0_nstates_openmp_work_2(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
case (3)
|
||||
call H_S2_u_0_nstates_openmp_work_3(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
case (4)
|
||||
call H_S2_u_0_nstates_openmp_work_4(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
case default
|
||||
call H_S2_u_0_nstates_openmp_work_N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
end select
|
||||
end
|
||||
BEGIN_TEMPLATE
|
||||
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
||||
subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
||||
!
|
||||
! Default should be 1,N_det,0,1
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
|
||||
double precision, intent(in) :: u_t(N_st,N_det)
|
||||
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
|
||||
|
||||
N_st_8 = align_double(N_st)
|
||||
double precision :: hij, sij
|
||||
integer :: i,j,k,l
|
||||
integer :: k_a, k_b, l_a, l_b, m_a, m_b
|
||||
integer :: istate
|
||||
integer :: krow, kcol, krow_b, kcol_b
|
||||
integer :: lrow, lcol
|
||||
integer :: mrow, mcol
|
||||
integer(bit_kind) :: spindet($N_int)
|
||||
integer(bit_kind) :: tmp_det($N_int,2)
|
||||
integer(bit_kind) :: tmp_det2($N_int,2)
|
||||
integer(bit_kind) :: tmp_det3($N_int,2)
|
||||
integer(bit_kind), allocatable :: buffer(:,:)
|
||||
integer :: n_doubles
|
||||
integer, allocatable :: doubles(:)
|
||||
integer, allocatable :: singles_a(:)
|
||||
integer, allocatable :: singles_b(:)
|
||||
integer, allocatable :: idx(:), idx0(:)
|
||||
integer :: maxab, n_singles_a, n_singles_b, kcol_prev, nmax
|
||||
integer*8 :: k8
|
||||
double precision, allocatable :: v_t(:,:), s_t(:,:)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
ASSERT (n>0)
|
||||
PROVIDE ref_bitmask_energy
|
||||
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
|
||||
allocate(idx0(maxab))
|
||||
|
||||
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
|
||||
allocate(ut(N_st_8,n))
|
||||
do i=1,maxab
|
||||
idx0(i) = i
|
||||
enddo
|
||||
|
||||
v_0 = 0.d0
|
||||
s_0 = 0.d0
|
||||
|
||||
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
|
||||
! Prepare the array of all alpha single excitations
|
||||
! -------------------------------------------------
|
||||
|
||||
PROVIDE N_int
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
|
||||
!$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
|
||||
allocate(vt(N_st_8,n),st(N_st_8,n))
|
||||
Vt = 0.d0
|
||||
St = 0.d0
|
||||
!$OMP SHARED(psi_bilinear_matrix_rows, N_det, &
|
||||
!$OMP psi_bilinear_matrix_columns, &
|
||||
!$OMP psi_det_alpha_unique, psi_det_beta_unique, &
|
||||
!$OMP n_det_alpha_unique, n_det_beta_unique, N_int, &
|
||||
!$OMP psi_bilinear_matrix_transp_rows, &
|
||||
!$OMP psi_bilinear_matrix_transp_columns, &
|
||||
!$OMP psi_bilinear_matrix_transp_order, N_st, &
|
||||
!$OMP psi_bilinear_matrix_order_transp_reverse, &
|
||||
!$OMP psi_bilinear_matrix_columns_loc, &
|
||||
!$OMP istart, iend, istep, &
|
||||
!$OMP ishift, idx0, u_t, maxab, v_0, s_0) &
|
||||
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
|
||||
!$OMP lcol, lrow, l_a, l_b, nmax, &
|
||||
!$OMP buffer, doubles, n_doubles, &
|
||||
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, v_t, &
|
||||
!$OMP singles_a, n_singles_a, singles_b, &
|
||||
!$OMP n_singles_b, s_t, k8)
|
||||
|
||||
!$OMP DO
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(sort_idx(i,2),istate)
|
||||
! Alpha/Beta double excitations
|
||||
! =============================
|
||||
|
||||
allocate( buffer($N_int,maxab), &
|
||||
singles_a(maxab), &
|
||||
singles_b(maxab), &
|
||||
doubles(maxab), &
|
||||
idx(maxab), &
|
||||
v_t(N_st,N_det), s_t(N_st,N_det))
|
||||
kcol_prev=-1
|
||||
|
||||
v_t = 0.d0
|
||||
s_t = 0.d0
|
||||
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
do k_a=istart+ishift,iend,istep
|
||||
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
kcol = psi_bilinear_matrix_columns(k_a)
|
||||
|
||||
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||
|
||||
if (kcol /= kcol_prev) then
|
||||
call get_all_spin_singles_$N_int( &
|
||||
psi_det_beta_unique(1,kcol+1), idx0(kcol+1), &
|
||||
tmp_det(1,2), N_det_beta_unique-kcol, &
|
||||
singles_b, n_singles_b)
|
||||
endif
|
||||
kcol_prev = kcol
|
||||
|
||||
! Loop over singly excited beta columns > current column
|
||||
! ------------------------------------------------------
|
||||
|
||||
do i=1,n_singles_b
|
||||
lcol = singles_b(i)
|
||||
|
||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
|
||||
|
||||
l_a = psi_bilinear_matrix_columns_loc(lcol)
|
||||
|
||||
nmax = psi_bilinear_matrix_columns_loc(lcol+1) - l_a
|
||||
do j=1,nmax
|
||||
lrow = psi_bilinear_matrix_rows(l_a)
|
||||
buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow)
|
||||
idx(j) = l_a
|
||||
l_a = l_a+1
|
||||
enddo
|
||||
j = j-1
|
||||
|
||||
call get_all_spin_singles_$N_int( &
|
||||
buffer, idx, tmp_det(1,1), j, &
|
||||
singles_a, n_singles_a )
|
||||
|
||||
! Loop over alpha singles
|
||||
! -----------------------
|
||||
|
||||
do k = 1,n_singles_a
|
||||
l_a = singles_a(k)
|
||||
lrow = psi_bilinear_matrix_rows(l_a)
|
||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
|
||||
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij)
|
||||
call get_s2(tmp_det,tmp_det2,$N_int,sij)
|
||||
do l=1,N_st
|
||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
|
||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
||||
s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
enddo
|
||||
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do sh=1,shortcut(0,2)
|
||||
do i=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_i = sort_idx(i,2)
|
||||
do j=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_j = sort_idx(j,2)
|
||||
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP DO SCHEDULE(dynamic,64)
|
||||
do k_a=istart+ishift,iend,istep
|
||||
|
||||
!$OMP DO
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(sort_idx(i,1),istate)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(dynamic)
|
||||
do sh=1,shortcut(0,1)
|
||||
do sh2=1,shortcut(0,1)
|
||||
if (sh==sh2) cycle
|
||||
! Single and double alpha excitations
|
||||
! ===================================
|
||||
|
||||
exa = 0
|
||||
do ni=1,Nint
|
||||
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
|
||||
end do
|
||||
if(exa > 2) then
|
||||
cycle
|
||||
end if
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
enddo
|
||||
! Initial determinant is at k_a in alpha-major representation
|
||||
! -----------------------------------------------------------------------
|
||||
|
||||
do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
kcol = psi_bilinear_matrix_columns(k_a)
|
||||
|
||||
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||
|
||||
! Initial determinant is at k_b in beta-major representation
|
||||
! ----------------------------------------------------------------------
|
||||
|
||||
k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
|
||||
|
||||
spindet(1:$N_int) = tmp_det(1:$N_int,1)
|
||||
|
||||
! Loop inside the beta column to gather all the connected alphas
|
||||
l_a = k_a+1
|
||||
nmax = min(N_det_alpha_unique, N_det - l_a)
|
||||
do i=1,nmax
|
||||
lcol = psi_bilinear_matrix_columns(l_a)
|
||||
if (lcol /= kcol) exit
|
||||
lrow = psi_bilinear_matrix_rows(l_a)
|
||||
buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow)
|
||||
idx(i) = l_a
|
||||
l_a = l_a+1
|
||||
enddo
|
||||
i = i-1
|
||||
|
||||
call get_all_spin_singles_and_doubles_$N_int( &
|
||||
buffer, idx, spindet, i, &
|
||||
singles_a, doubles, n_singles_a, n_doubles )
|
||||
|
||||
! Compute Hij for all alpha singles
|
||||
! ----------------------------------
|
||||
|
||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||
do i=1,n_singles_a
|
||||
l_a = singles_a(i)
|
||||
lrow = psi_bilinear_matrix_rows(l_a)
|
||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
|
||||
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
|
||||
do l=1,N_st
|
||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||
! single => sij = 0
|
||||
enddo
|
||||
enddo
|
||||
|
||||
exa = 0
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
! Compute Hij for all alpha doubles
|
||||
! ----------------------------------
|
||||
|
||||
do i=1,n_doubles
|
||||
l_a = doubles(i)
|
||||
lrow = psi_bilinear_matrix_rows(l_a)
|
||||
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
||||
do l=1,N_st
|
||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||
! same spin => sij = 0
|
||||
enddo
|
||||
enddo
|
||||
|
||||
do j=shortcut(sh,1),i-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
|
||||
|
||||
! Single and double beta excitations
|
||||
! ==================================
|
||||
|
||||
|
||||
! Initial determinant is at k_a in alpha-major representation
|
||||
! -----------------------------------------------------------------------
|
||||
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
kcol = psi_bilinear_matrix_columns(k_a)
|
||||
|
||||
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||
|
||||
spindet(1:$N_int) = tmp_det(1:$N_int,2)
|
||||
|
||||
! Initial determinant is at k_b in beta-major representation
|
||||
! -----------------------------------------------------------------------
|
||||
|
||||
k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
|
||||
|
||||
! Loop inside the alpha row to gather all the connected betas
|
||||
l_b = k_b+1
|
||||
nmax = min(N_det_beta_unique, N_det - l_b)
|
||||
do i=1,nmax
|
||||
lrow = psi_bilinear_matrix_transp_rows(l_b)
|
||||
if (lrow /= krow) exit
|
||||
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
||||
buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
|
||||
idx(i) = l_b
|
||||
l_b = l_b+1
|
||||
enddo
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
i = i-1
|
||||
|
||||
call get_all_spin_singles_and_doubles_$N_int( &
|
||||
buffer, idx, spindet, i, &
|
||||
singles_b, doubles, n_singles_b, n_doubles )
|
||||
|
||||
! Compute Hij for all beta singles
|
||||
! ----------------------------------
|
||||
|
||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||
do i=1,n_singles_b
|
||||
l_b = singles_b(i)
|
||||
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
|
||||
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 2, hij)
|
||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||
do l=1,N_st
|
||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||
! single => sij = 0
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
|
||||
do j=i+1,shortcut(sh+1,1)-1
|
||||
if (i==j) cycle
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
! Compute Hij for all beta doubles
|
||||
! ----------------------------------
|
||||
|
||||
!$OMP CRITICAL (u0Hu0)
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
|
||||
s_0(i,istate) = s_0(i,istate) + st(istate,i)
|
||||
do i=1,n_doubles
|
||||
l_b = doubles(i)
|
||||
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
||||
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
|
||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||
do l=1,N_st
|
||||
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
|
||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||
! same spin => sij = 0
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END CRITICAL (u0Hu0)
|
||||
|
||||
deallocate(vt,st)
|
||||
|
||||
! Diagonal contribution
|
||||
! =====================
|
||||
|
||||
|
||||
! Initial determinant is at k_a in alpha-major representation
|
||||
! -----------------------------------------------------------------------
|
||||
|
||||
krow = psi_bilinear_matrix_rows(k_a)
|
||||
kcol = psi_bilinear_matrix_columns(k_a)
|
||||
|
||||
tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||
tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||
|
||||
double precision, external :: diag_H_mat_elem, diag_S_mat_elem
|
||||
|
||||
hij = diag_H_mat_elem(tmp_det,$N_int)
|
||||
sij = diag_S_mat_elem(tmp_det,$N_int)
|
||||
do l=1,N_st
|
||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
|
||||
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
|
||||
enddo
|
||||
|
||||
end do
|
||||
!$OMP END DO NOWAIT
|
||||
deallocate(buffer, singles_a, singles_b, doubles, idx)
|
||||
|
||||
!$OMP CRITICAL
|
||||
do l=1,N_st
|
||||
do i=1, N_det
|
||||
v_0(i,l) = v_0(i,l) + v_t(l,i)
|
||||
s_0(i,l) = s_0(i,l) + s_t(l,i)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END CRITICAL
|
||||
deallocate(v_t, s_t)
|
||||
|
||||
!$OMP BARRIER
|
||||
!$OMP END PARALLEL
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
|
||||
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
deallocate (shortcut, sort_idx, sorted, version, ut)
|
||||
end
|
||||
|
||||
SUBST [ N_int ]
|
||||
|
||||
1;;
|
||||
2;;
|
||||
3;;
|
||||
4;;
|
||||
N_int;;
|
||||
|
||||
END_TEMPLATE
|
||||
|
||||
|
||||
|
517
src/Davidson/u0Hu0_old.irp.f
Normal file
517
src/Davidson/u0Hu0_old.irp.f
Normal file
@ -0,0 +1,517 @@
|
||||
|
||||
subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0>
|
||||
!
|
||||
! n : number of determinants
|
||||
!
|
||||
! H_jj : array of <j|H|j>
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st,n,Nint, sze
|
||||
double precision, intent(out) :: v_0(sze,N_st)
|
||||
double precision, intent(in) :: u_0(sze,N_st)
|
||||
double precision, intent(in) :: H_jj(n)
|
||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||
double precision :: hij,s2
|
||||
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
|
||||
integer :: i,j,k,l, jj,ii
|
||||
integer :: i0, j0
|
||||
|
||||
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
|
||||
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
|
||||
integer(bit_kind) :: sorted_i(Nint)
|
||||
|
||||
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
||||
integer :: N_st_8
|
||||
|
||||
integer, external :: align_double
|
||||
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
||||
|
||||
N_st_8 = align_double(N_st)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
ASSERT (n>0)
|
||||
PROVIDE ref_bitmask_energy
|
||||
|
||||
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
|
||||
allocate( ut(N_st_8,n))
|
||||
|
||||
v_0 = 0.d0
|
||||
|
||||
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
|
||||
!$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
|
||||
allocate(vt(N_st_8,n),st(N_st_8,n))
|
||||
Vt = 0.d0
|
||||
St = 0.d0
|
||||
|
||||
!$OMP DO
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(sort_idx(i,2),istate)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(static,1)
|
||||
do sh=1,shortcut(0,2)
|
||||
do i=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_i = sort_idx(i,2)
|
||||
do j=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_j = sort_idx(j,2)
|
||||
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(sort_idx(i,1),istate)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(static,1)
|
||||
do sh=1,shortcut(0,1)
|
||||
do sh2=1,shortcut(0,1)
|
||||
if (sh==sh2) cycle
|
||||
|
||||
exa = 0
|
||||
do ni=1,Nint
|
||||
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
|
||||
end do
|
||||
if(exa > 2) then
|
||||
cycle
|
||||
end if
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
enddo
|
||||
|
||||
do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
|
||||
exa = 0
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
enddo
|
||||
|
||||
do j=shortcut(sh,1),i-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
|
||||
do j=i+1,shortcut(sh+1,1)-1
|
||||
if (i==j) cycle
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
!$OMP ATOMIC
|
||||
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(vt,st)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
deallocate (shortcut, sort_idx, sorted, version, ut)
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Computes v_0 = H|u_0> and s_0 = S^2 |u_0>
|
||||
!
|
||||
! n : number of determinants
|
||||
!
|
||||
! H_jj : array of <j|H|j>
|
||||
!
|
||||
! S2_jj : array of <j|S^2|j>
|
||||
END_DOC
|
||||
integer, intent(in) :: N_st,n,Nint, sze
|
||||
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
|
||||
double precision, intent(in) :: u_0(sze,N_st)
|
||||
double precision, intent(in) :: H_jj(n), S2_jj(n)
|
||||
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
|
||||
double precision :: hij,s2
|
||||
double precision, allocatable :: vt(:,:), ut(:,:), st(:,:)
|
||||
integer :: i,j,k,l, jj,ii
|
||||
integer :: i0, j0
|
||||
|
||||
integer, allocatable :: shortcut(:,:), sort_idx(:,:)
|
||||
integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
|
||||
integer(bit_kind) :: sorted_i(Nint)
|
||||
|
||||
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
|
||||
integer :: N_st_8
|
||||
|
||||
integer, external :: align_double
|
||||
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
|
||||
|
||||
N_st_8 = align_double(N_st)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (Nint == N_int)
|
||||
ASSERT (n>0)
|
||||
PROVIDE ref_bitmask_energy
|
||||
|
||||
allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
|
||||
allocate( ut(N_st_8,n))
|
||||
|
||||
v_0 = 0.d0
|
||||
s_0 = 0.d0
|
||||
|
||||
call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
|
||||
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
|
||||
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
|
||||
!$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8)
|
||||
allocate(vt(N_st_8,n),st(N_st_8,n))
|
||||
Vt = 0.d0
|
||||
St = 0.d0
|
||||
|
||||
!$OMP DO
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(sort_idx(i,2),istate)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(static,4)
|
||||
do sh=1,shortcut(0,2)
|
||||
do i=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_i = sort_idx(i,2)
|
||||
do j=shortcut(sh,2),shortcut(sh+1,2)-1
|
||||
org_j = sort_idx(j,2)
|
||||
ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext == 4) then
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO
|
||||
do i=1,n
|
||||
do istate=1,N_st
|
||||
ut(istate,i) = u_0(sort_idx(i,1),istate)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
!$OMP DO SCHEDULE(static,4)
|
||||
do sh=1,shortcut(0,1)
|
||||
do sh2=1,shortcut(0,1)
|
||||
if (sh==sh2) cycle
|
||||
|
||||
exa = 0
|
||||
do ni=1,Nint
|
||||
exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
|
||||
end do
|
||||
if(exa > 2) then
|
||||
cycle
|
||||
end if
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
enddo
|
||||
|
||||
do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
|
||||
exa = 0
|
||||
|
||||
do i=shortcut(sh,1),shortcut(sh+1,1)-1
|
||||
org_i = sort_idx(i,1)
|
||||
do ni=1,Nint
|
||||
sorted_i(ni) = sorted(ni,i,1)
|
||||
enddo
|
||||
|
||||
do j=shortcut(sh,1),i-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
|
||||
do j=i+1,shortcut(sh+1,1)-1
|
||||
ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
|
||||
if (ext > 4) cycle
|
||||
do ni=2,Nint
|
||||
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
|
||||
if (ext > 4) exit
|
||||
end do
|
||||
if(ext <= 4) then
|
||||
org_j = sort_idx(j,1)
|
||||
call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
|
||||
if (hij /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
if (ext /= 2) then
|
||||
call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
|
||||
if (s2 /= 0.d0) then
|
||||
do istate=1,n_st
|
||||
st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
|
||||
enddo
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
!$OMP ATOMIC
|
||||
v_0(i,istate) = v_0(i,istate) + vt(istate,i)
|
||||
!$OMP ATOMIC
|
||||
s_0(i,istate) = s_0(i,istate) + st(istate,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
deallocate(vt,st)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
do istate=1,N_st
|
||||
do i=1,n
|
||||
v_0(i,istate) = v_0(i,istate) + H_jj(i) * u_0(i,istate)
|
||||
s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
|
||||
enddo
|
||||
enddo
|
||||
deallocate (shortcut, sort_idx, sorted, version, ut)
|
||||
end
|
||||
|
||||
subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
|
||||
use bitmasks
|
||||
implicit none
|
||||
integer, intent(in) :: N_st,n,Nint, sze
|
||||
integer(bit_kind), intent(in) :: keys_tmp(Nint,2,n)
|
||||
double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st)
|
||||
double precision, intent(in) :: u_0(sze,N_st)
|
||||
double precision, intent(in) :: H_jj(n), S2_jj(n)
|
||||
|
||||
PROVIDE ref_bitmask_energy
|
||||
|
||||
double precision, allocatable :: vt(:,:)
|
||||
integer, allocatable :: idx(:)
|
||||
integer :: i,j, jj, l
|
||||
double precision :: hij
|
||||
|
||||
do i=1,n
|
||||
v_0(i,:) = H_jj(i) * u_0(i,:)
|
||||
enddo
|
||||
|
||||
allocate(idx(0:n), vt(N_st,n))
|
||||
Vt = 0.d0
|
||||
!$OMP PARALLEL DO DEFAULT(shared) PRIVATE(i,idx,jj,j,degree,exc,phase,hij,l) SCHEDULE(static,1)
|
||||
do i=2,n
|
||||
idx(0) = i
|
||||
call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx)
|
||||
do jj=1,idx(0)
|
||||
j = idx(jj)
|
||||
double precision :: phase
|
||||
integer :: degree
|
||||
integer :: exc(0:2,2,2)
|
||||
call get_excitation(keys_tmp(1,1,j),keys_tmp(1,1,i),exc,degree,phase,Nint)
|
||||
! if ((degree == 2).and.(exc(0,1,1)==1)) then
|
||||
! continue
|
||||
! else
|
||||
! cycle
|
||||
! endif
|
||||
! if ((degree == 2).and.(exc(0,1,1)==1)) cycle
|
||||
! if ((degree > 1)) cycle
|
||||
! if (exc(0,1,2) /= 0) cycle
|
||||
! if (exc(0,1,1) == 2) cycle
|
||||
! if (exc(0,1,2) == 2) cycle
|
||||
! if ((degree==1).and.(exc(0,1,2) == 1)) cycle
|
||||
call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij)
|
||||
do l=1,N_st
|
||||
!$OMP ATOMIC
|
||||
vt (l,i) = vt (l,i) + hij*u_0(j,l)
|
||||
!$OMP ATOMIC
|
||||
vt (l,j) = vt (l,j) + hij*u_0(i,l)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
do i=1,n
|
||||
v_0(i,:) = v_0(i,:) + vt(:,i)
|
||||
enddo
|
||||
end
|
||||
|
@ -1,3 +1,35 @@
|
||||
double precision function diag_S_mat_elem(key_i,Nint)
|
||||
implicit none
|
||||
use bitmasks
|
||||
include 'Utils/constants.include.F'
|
||||
|
||||
integer :: Nint
|
||||
integer(bit_kind), intent(in) :: key_i(Nint,2)
|
||||
BEGIN_DOC
|
||||
! Returns <i|S^2|i>
|
||||
END_DOC
|
||||
integer :: nup, i
|
||||
integer(bit_kind) :: xorvec(N_int_max)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
|
||||
|
||||
do i=1,Nint
|
||||
xorvec(i) = xor(key_i(i,1),key_i(i,2))
|
||||
enddo
|
||||
|
||||
do i=1,Nint
|
||||
xorvec(i) = iand(xorvec(i),key_i(i,1))
|
||||
enddo
|
||||
|
||||
nup = 0
|
||||
do i=1,Nint
|
||||
if (xorvec(i) /= 0_bit_kind) then
|
||||
nup += popcnt(xorvec(i))
|
||||
endif
|
||||
enddo
|
||||
diag_S_mat_elem = dble(nup)
|
||||
|
||||
end
|
||||
|
||||
subroutine get_s2(key_i,key_j,Nint,s2)
|
||||
implicit none
|
||||
use bitmasks
|
||||
@ -25,11 +57,9 @@ subroutine get_s2(key_i,key_j,Nint,s2)
|
||||
endif
|
||||
endif
|
||||
case(0)
|
||||
nup = 0
|
||||
do i=1,Nint
|
||||
nup += popcnt(iand(xor(key_i(i,1),key_i(i,2)),key_i(i,1)))
|
||||
enddo
|
||||
s2 = dble(nup)
|
||||
double precision, external :: diag_S_mat_elem
|
||||
!DIR$ FORCEINLINE
|
||||
s2 = diag_S_mat_elem(key_i,Nint)
|
||||
end select
|
||||
end
|
||||
|
||||
|
@ -1,32 +1,60 @@
|
||||
subroutine get_excitation_degree(key1,key2,degree,Nint)
|
||||
use bitmasks
|
||||
include 'Utils/constants.include.F'
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the excitation degree between two determinants
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: key1(Nint,2)
|
||||
integer(bit_kind), intent(in) :: key2(Nint,2)
|
||||
integer(bit_kind), intent(in) :: key1(Nint*2)
|
||||
integer(bit_kind), intent(in) :: key2(Nint*2)
|
||||
integer, intent(out) :: degree
|
||||
|
||||
integer(bit_kind) :: xorvec(2*N_int_max)
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
|
||||
integer :: l
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
|
||||
degree = popcnt(xor( key1(1,1), key2(1,1))) + &
|
||||
popcnt(xor( key1(1,2), key2(1,2)))
|
||||
!DIR$ NOUNROLL
|
||||
do l=2,Nint
|
||||
degree = degree+ popcnt(xor( key1(l,1), key2(l,1))) + &
|
||||
popcnt(xor( key1(l,2), key2(l,2)))
|
||||
select case (Nint)
|
||||
|
||||
case (1)
|
||||
xorvec(1) = xor( key1(1), key2(1))
|
||||
xorvec(2) = xor( key1(2), key2(2))
|
||||
degree = sum(popcnt(xorvec(1:2)))
|
||||
|
||||
case (2)
|
||||
xorvec(1) = xor( key1(1), key2(1))
|
||||
xorvec(2) = xor( key1(2), key2(2))
|
||||
xorvec(3) = xor( key1(3), key2(3))
|
||||
xorvec(4) = xor( key1(4), key2(4))
|
||||
degree = sum(popcnt(xorvec(1:4)))
|
||||
|
||||
case (3)
|
||||
do l=1,6
|
||||
xorvec(l) = xor( key1(l), key2(l))
|
||||
enddo
|
||||
ASSERT (degree >= 0)
|
||||
degree = sum(popcnt(xorvec(1:6)))
|
||||
|
||||
case (4)
|
||||
do l=1,8
|
||||
xorvec(l) = xor( key1(l), key2(l))
|
||||
enddo
|
||||
degree = sum(popcnt(xorvec(1:8)))
|
||||
|
||||
case default
|
||||
do l=1,ishft(Nint,1)
|
||||
xorvec(l) = xor( key1(l), key2(l))
|
||||
enddo
|
||||
degree = sum(popcnt(xorvec(1:l)))
|
||||
|
||||
end select
|
||||
|
||||
degree = ishft(degree,-1)
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
subroutine get_excitation(det1,det2,exc,degree,phase,Nint)
|
||||
use bitmasks
|
||||
implicit none
|
||||
@ -139,72 +167,6 @@ subroutine decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
|
||||
end
|
||||
|
||||
|
||||
subroutine decode_exc_int2(exc,degree,h1,p1,h2,p2,s1,s2)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Decodes the exc arrays returned by get_excitation.
|
||||
! h1,h2 : Holes
|
||||
! p1,p2 : Particles
|
||||
! s1,s2 : Spins (1:alpha, 2:beta)
|
||||
! degree : Degree of excitation
|
||||
END_DOC
|
||||
integer, intent(in) :: exc(0:2,2,2),degree
|
||||
integer*2, intent(out) :: h1,h2,p1,p2,s1,s2
|
||||
ASSERT (degree > 0)
|
||||
ASSERT (degree < 3)
|
||||
|
||||
select case(degree)
|
||||
case(2)
|
||||
if (exc(0,1,1) == 2) then
|
||||
h1 = exc(1,1,1)
|
||||
h2 = exc(2,1,1)
|
||||
p1 = exc(1,2,1)
|
||||
p2 = exc(2,2,1)
|
||||
s1 = 1
|
||||
s2 = 1
|
||||
else if (exc(0,1,2) == 2) then
|
||||
h1 = exc(1,1,2)
|
||||
h2 = exc(2,1,2)
|
||||
p1 = exc(1,2,2)
|
||||
p2 = exc(2,2,2)
|
||||
s1 = 2
|
||||
s2 = 2
|
||||
else
|
||||
h1 = exc(1,1,1)
|
||||
h2 = exc(1,1,2)
|
||||
p1 = exc(1,2,1)
|
||||
p2 = exc(1,2,2)
|
||||
s1 = 1
|
||||
s2 = 2
|
||||
endif
|
||||
case(1)
|
||||
if (exc(0,1,1) == 1) then
|
||||
h1 = exc(1,1,1)
|
||||
h2 = 0
|
||||
p1 = exc(1,2,1)
|
||||
p2 = 0
|
||||
s1 = 1
|
||||
s2 = 0
|
||||
else
|
||||
h1 = exc(1,1,2)
|
||||
h2 = 0
|
||||
p1 = exc(1,2,2)
|
||||
p2 = 0
|
||||
s1 = 2
|
||||
s2 = 0
|
||||
endif
|
||||
case(0)
|
||||
h1 = 0
|
||||
p1 = 0
|
||||
h2 = 0
|
||||
p2 = 0
|
||||
s1 = 0
|
||||
s2 = 0
|
||||
end select
|
||||
end
|
||||
|
||||
|
||||
subroutine get_double_excitation(det1,det2,exc,phase,Nint)
|
||||
use bitmasks
|
||||
implicit none
|
||||
@ -925,22 +887,29 @@ subroutine create_minilist(key_mask, fullList, miniList, idx_miniList, N_fullLis
|
||||
|
||||
N_miniList = 0
|
||||
|
||||
integer :: e_ab
|
||||
e_ab = n_a+n_b
|
||||
do i=1,N_fullList
|
||||
e_a = n_a - popcnt(iand(fullList(1, 1, i), key_mask(1, 1)))
|
||||
e_b = n_b - popcnt(iand(fullList(1, 2, i), key_mask(1, 2)))
|
||||
e_a = e_ab - popcnt(iand(fullList(1, 1, i), key_mask(1, 1))) &
|
||||
- popcnt(iand(fullList(1, 2, i), key_mask(1, 2)))
|
||||
do ni=2,nint
|
||||
e_a -= popcnt(iand(fullList(ni, 1, i), key_mask(ni, 1)))
|
||||
e_b -= popcnt(iand(fullList(ni, 2, i), key_mask(ni, 2)))
|
||||
e_a = e_a - popcnt(iand(fullList(ni, 1, i), key_mask(ni, 1))) &
|
||||
- popcnt(iand(fullList(ni, 2, i), key_mask(ni, 2)))
|
||||
end do
|
||||
|
||||
if(e_a + e_b <= 2) then
|
||||
if(e_a > 2) then
|
||||
cycle
|
||||
endif
|
||||
|
||||
N_miniList = N_miniList + 1
|
||||
do ni=1,Nint
|
||||
miniList(1,1,N_miniList) = fullList(1,1,i)
|
||||
miniList(1,2,N_miniList) = fullList(1,2,i)
|
||||
do ni=2,Nint
|
||||
miniList(ni,1,N_miniList) = fullList(ni,1,i)
|
||||
miniList(ni,2,N_miniList) = fullList(ni,2,i)
|
||||
enddo
|
||||
idx_miniList(N_miniList) = i
|
||||
end if
|
||||
|
||||
end do
|
||||
end subroutine
|
||||
|
||||
@ -1041,13 +1010,15 @@ subroutine i_H_psi(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array)
|
||||
double precision :: phase
|
||||
integer :: exc(0:2,2,2)
|
||||
double precision :: hij
|
||||
integer :: idx(0:Ndet)
|
||||
integer, allocatable :: idx(:)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (N_int == Nint)
|
||||
ASSERT (Nstate > 0)
|
||||
ASSERT (Ndet > 0)
|
||||
ASSERT (Ndet_max >= Ndet)
|
||||
allocate(idx(0:Ndet))
|
||||
|
||||
i_H_psi_array = 0.d0
|
||||
|
||||
call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx)
|
||||
@ -1089,7 +1060,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,
|
||||
double precision :: phase
|
||||
integer :: exc(0:2,2,2)
|
||||
double precision :: hij
|
||||
integer :: idx(0:Ndet)
|
||||
integer, allocatable :: idx(:)
|
||||
BEGIN_DOC
|
||||
! Computes <i|H|Psi> = \sum_J c_J <i|H|J>.
|
||||
!
|
||||
@ -1102,6 +1073,7 @@ subroutine i_H_psi_minilist(key,keys,idx_key,N_minilist,coef,Nint,Ndet,Ndet_max,
|
||||
ASSERT (Nstate > 0)
|
||||
ASSERT (Ndet > 0)
|
||||
ASSERT (Ndet_max >= Ndet)
|
||||
allocate(idx(0:Ndet))
|
||||
i_H_psi_array = 0.d0
|
||||
|
||||
call filter_connected_i_H_psi0(keys,key,Nint,N_minilist,idx)
|
||||
@ -1148,7 +1120,8 @@ subroutine i_H_psi_sec_ord(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array
|
||||
double precision :: phase
|
||||
integer :: exc(0:2,2,2)
|
||||
double precision :: hij
|
||||
integer :: idx(0:Ndet),n_interact
|
||||
integer,allocatable :: idx(:)
|
||||
integer :: n_interact
|
||||
BEGIN_DOC
|
||||
! <key|H|psi> for the various Nstates
|
||||
END_DOC
|
||||
@ -1158,6 +1131,7 @@ subroutine i_H_psi_sec_ord(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array
|
||||
ASSERT (Nstate > 0)
|
||||
ASSERT (Ndet > 0)
|
||||
ASSERT (Ndet_max >= Ndet)
|
||||
allocate(idx(0:Ndet))
|
||||
i_H_psi_array = 0.d0
|
||||
call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx)
|
||||
n_interact = 0
|
||||
@ -1207,7 +1181,7 @@ subroutine i_H_psi_SC2(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array,idx
|
||||
double precision :: phase
|
||||
integer :: exc(0:2,2,2)
|
||||
double precision :: hij
|
||||
integer :: idx(0:Ndet)
|
||||
integer,allocatable :: idx(:)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (N_int == Nint)
|
||||
@ -1215,6 +1189,7 @@ subroutine i_H_psi_SC2(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array,idx
|
||||
ASSERT (Ndet > 0)
|
||||
ASSERT (Ndet_max >= Ndet)
|
||||
i_H_psi_array = 0.d0
|
||||
allocate(idx(0:Ndet))
|
||||
call filter_connected_i_H_psi0_SC2(keys,key,Nint,Ndet,idx,idx_repeat)
|
||||
do ii=1,idx(0)
|
||||
i = idx(ii)
|
||||
@ -1254,7 +1229,7 @@ subroutine i_H_psi_SC2_verbose(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_a
|
||||
double precision :: phase
|
||||
integer :: exc(0:2,2,2)
|
||||
double precision :: hij
|
||||
integer :: idx(0:Ndet)
|
||||
integer,allocatable :: idx(:)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
ASSERT (N_int == Nint)
|
||||
@ -1262,6 +1237,7 @@ subroutine i_H_psi_SC2_verbose(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_a
|
||||
ASSERT (Ndet > 0)
|
||||
ASSERT (Ndet_max >= Ndet)
|
||||
i_H_psi_array = 0.d0
|
||||
allocate(idx(0:Ndet))
|
||||
call filter_connected_i_H_psi0_SC2(keys,key,Nint,Ndet,idx,idx_repeat)
|
||||
print*,'--------'
|
||||
do ii=1,idx(0)
|
||||
@ -2140,8 +2116,8 @@ end
|
||||
subroutine get_phase(key1,key2,phase,Nint)
|
||||
use bitmasks
|
||||
implicit none
|
||||
integer(bit_kind), intent(in) :: key1(Nint,2), key2(Nint,2)
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: key1(Nint,2), key2(Nint,2)
|
||||
double precision, intent(out) :: phase
|
||||
BEGIN_DOC
|
||||
! Returns the phase between key1 and key2
|
||||
@ -2192,3 +2168,424 @@ subroutine u_0_H_u_0_stored(e_0,u_0,hmatrix,sze)
|
||||
call matrix_vector_product(u_0,v_0,hmatrix,sze,sze)
|
||||
e_0 = u_dot_v(v_0,u_0,sze)
|
||||
end
|
||||
|
||||
|
||||
|
||||
! Spin-determinant routines
|
||||
! -------------------------
|
||||
|
||||
subroutine get_excitation_degree_spin(key1,key2,degree,Nint)
|
||||
use bitmasks
|
||||
include 'Utils/constants.include.F'
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the excitation degree between two determinants
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: key1(Nint)
|
||||
integer(bit_kind), intent(in) :: key2(Nint)
|
||||
integer, intent(out) :: degree
|
||||
|
||||
integer(bit_kind) :: xorvec(N_int_max)
|
||||
integer :: l
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
|
||||
select case (Nint)
|
||||
|
||||
case (1)
|
||||
xorvec(1) = xor( key1(1), key2(1))
|
||||
degree = popcnt(xorvec(1))
|
||||
|
||||
case (2)
|
||||
xorvec(1) = xor( key1(1), key2(1))
|
||||
xorvec(2) = xor( key1(2), key2(2))
|
||||
degree = popcnt(xorvec(1))+popcnt(xorvec(2))
|
||||
|
||||
case (3)
|
||||
xorvec(1) = xor( key1(1), key2(1))
|
||||
xorvec(2) = xor( key1(2), key2(2))
|
||||
xorvec(3) = xor( key1(3), key2(3))
|
||||
degree = sum(popcnt(xorvec(1:3)))
|
||||
|
||||
case (4)
|
||||
xorvec(1) = xor( key1(1), key2(1))
|
||||
xorvec(2) = xor( key1(2), key2(2))
|
||||
xorvec(3) = xor( key1(3), key2(3))
|
||||
xorvec(4) = xor( key1(4), key2(4))
|
||||
degree = sum(popcnt(xorvec(1:4)))
|
||||
|
||||
case default
|
||||
do l=1,N_int
|
||||
xorvec(l) = xor( key1(l), key2(l))
|
||||
enddo
|
||||
degree = sum(popcnt(xorvec(1:Nint)))
|
||||
|
||||
end select
|
||||
|
||||
degree = ishft(degree,-1)
|
||||
|
||||
end
|
||||
|
||||
|
||||
subroutine get_excitation_spin(det1,det2,exc,degree,phase,Nint)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the excitation operators between two determinants and the phase
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: det1(Nint)
|
||||
integer(bit_kind), intent(in) :: det2(Nint)
|
||||
integer, intent(out) :: exc(0:2,2)
|
||||
integer, intent(out) :: degree
|
||||
double precision, intent(out) :: phase
|
||||
! exc(number,hole/particle)
|
||||
! ex :
|
||||
! exc(0,1) = number of holes
|
||||
! exc(0,2) = number of particles
|
||||
! exc(1,2) = first particle
|
||||
! exc(1,1) = first hole
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
|
||||
!DIR$ FORCEINLINE
|
||||
call get_excitation_degree_spin(det1,det2,degree,Nint)
|
||||
select case (degree)
|
||||
|
||||
case (3:)
|
||||
degree = -1
|
||||
return
|
||||
|
||||
case (2)
|
||||
call get_double_excitation_spin(det1,det2,exc,phase,Nint)
|
||||
return
|
||||
|
||||
case (1)
|
||||
call get_mono_excitation_spin(det1,det2,exc,phase,Nint)
|
||||
return
|
||||
|
||||
case(0)
|
||||
return
|
||||
|
||||
end select
|
||||
end
|
||||
|
||||
subroutine decode_exc_spin(exc,h1,p1,h2,p2)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Decodes the exc arrays returned by get_excitation.
|
||||
! h1,h2 : Holes
|
||||
! p1,p2 : Particles
|
||||
END_DOC
|
||||
integer, intent(in) :: exc(0:2,2)
|
||||
integer, intent(out) :: h1,h2,p1,p2
|
||||
|
||||
select case (exc(0,1))
|
||||
case(2)
|
||||
h1 = exc(1,1)
|
||||
h2 = exc(2,1)
|
||||
p1 = exc(1,2)
|
||||
p2 = exc(2,2)
|
||||
case(1)
|
||||
h1 = exc(1,1)
|
||||
h2 = 0
|
||||
p1 = exc(1,2)
|
||||
p2 = 0
|
||||
case default
|
||||
h1 = 0
|
||||
p1 = 0
|
||||
h2 = 0
|
||||
p2 = 0
|
||||
end select
|
||||
end
|
||||
|
||||
|
||||
subroutine get_double_excitation_spin(det1,det2,exc,phase,Nint)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the two excitation operators between two doubly excited spin-determinants
|
||||
! and the phase
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: det1(Nint)
|
||||
integer(bit_kind), intent(in) :: det2(Nint)
|
||||
integer, intent(out) :: exc(0:2,2)
|
||||
double precision, intent(out) :: phase
|
||||
integer :: tz
|
||||
integer :: l, idx_hole, idx_particle, ishift
|
||||
integer :: nperm
|
||||
integer :: i,j,k,m,n
|
||||
integer :: high, low
|
||||
integer :: a,b,c,d
|
||||
integer(bit_kind) :: hole, particle, tmp
|
||||
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
nperm = 0
|
||||
exc(0,1) = 0
|
||||
exc(0,2) = 0
|
||||
|
||||
idx_particle = 0
|
||||
idx_hole = 0
|
||||
ishift = 1-bit_kind_size
|
||||
do l=1,Nint
|
||||
ishift = ishift + bit_kind_size
|
||||
if (det1(l) == det2(l)) then
|
||||
cycle
|
||||
endif
|
||||
tmp = xor( det1(l), det2(l) )
|
||||
particle = iand(tmp, det2(l))
|
||||
hole = iand(tmp, det1(l))
|
||||
do while (particle /= 0_bit_kind)
|
||||
tz = trailz(particle)
|
||||
idx_particle = idx_particle + 1
|
||||
exc(0,2) = exc(0,2) + 1
|
||||
exc(idx_particle,2) = tz+ishift
|
||||
particle = iand(particle,particle-1_bit_kind)
|
||||
enddo
|
||||
if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2
|
||||
exit
|
||||
endif
|
||||
do while (hole /= 0_bit_kind)
|
||||
tz = trailz(hole)
|
||||
idx_hole = idx_hole + 1
|
||||
exc(0,1) = exc(0,1) + 1
|
||||
exc(idx_hole,1) = tz+ishift
|
||||
hole = iand(hole,hole-1_bit_kind)
|
||||
enddo
|
||||
if (iand(exc(0,1),exc(0,2))==2) then ! exc(0,1)==2 or exc(0,2)==2
|
||||
exit
|
||||
endif
|
||||
enddo
|
||||
|
||||
select case (exc(0,1))
|
||||
|
||||
case(1)
|
||||
low = min(exc(1,1), exc(1,2))
|
||||
high = max(exc(1,1), exc(1,2))
|
||||
|
||||
ASSERT (low > 0)
|
||||
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
|
||||
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
|
||||
ASSERT (high > 0)
|
||||
k = ishft(high-1,-bit_kind_shift)+1
|
||||
m = iand(high-1,bit_kind_size-1)+1
|
||||
|
||||
if (j==k) then
|
||||
nperm = nperm + popcnt(iand(det1(j), &
|
||||
iand( ibset(0_bit_kind,m-1)-1_bit_kind, &
|
||||
ibclr(-1_bit_kind,n)+1_bit_kind ) ))
|
||||
else
|
||||
nperm = nperm + popcnt(iand(det1(k), &
|
||||
ibset(0_bit_kind,m-1)-1_bit_kind))
|
||||
if (n < bit_kind_size) then
|
||||
nperm = nperm + popcnt(iand(det1(j), ibclr(-1_bit_kind,n) +1_bit_kind))
|
||||
endif
|
||||
do i=j+1,k-1
|
||||
nperm = nperm + popcnt(det1(i))
|
||||
end do
|
||||
endif
|
||||
|
||||
case (2)
|
||||
|
||||
do i=1,2
|
||||
low = min(exc(i,1), exc(i,2))
|
||||
high = max(exc(i,1), exc(i,2))
|
||||
|
||||
ASSERT (low > 0)
|
||||
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
|
||||
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
|
||||
ASSERT (high > 0)
|
||||
k = ishft(high-1,-bit_kind_shift)+1
|
||||
m = iand(high-1,bit_kind_size-1)+1
|
||||
|
||||
if (j==k) then
|
||||
nperm = nperm + popcnt(iand(det1(j), &
|
||||
iand( ibset(0_bit_kind,m-1)-1_bit_kind, &
|
||||
ibclr(-1_bit_kind,n)+1_bit_kind ) ))
|
||||
else
|
||||
nperm = nperm + popcnt(iand(det1(k), &
|
||||
ibset(0_bit_kind,m-1)-1_bit_kind))
|
||||
if (n < bit_kind_size) then
|
||||
nperm = nperm + popcnt(iand(det1(j), ibclr(-1_bit_kind,n) +1_bit_kind))
|
||||
endif
|
||||
do l=j+1,k-1
|
||||
nperm = nperm + popcnt(det1(l))
|
||||
end do
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
a = min(exc(1,1), exc(1,2))
|
||||
b = max(exc(1,1), exc(1,2))
|
||||
c = min(exc(2,1), exc(2,2))
|
||||
d = max(exc(2,1), exc(2,2))
|
||||
if (c>a .and. c<b .and. d>b) then
|
||||
nperm = nperm + 1
|
||||
endif
|
||||
end select
|
||||
|
||||
phase = phase_dble(iand(nperm,1))
|
||||
|
||||
end
|
||||
|
||||
subroutine get_mono_excitation_spin(det1,det2,exc,phase,Nint)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the excitation operator between two singly excited determinants and the phase
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: det1(Nint)
|
||||
integer(bit_kind), intent(in) :: det2(Nint)
|
||||
integer, intent(out) :: exc(0:2,2)
|
||||
double precision, intent(out) :: phase
|
||||
integer :: tz
|
||||
integer :: l, idx_hole, idx_particle, ishift
|
||||
integer :: nperm
|
||||
integer :: i,j,k,m,n
|
||||
integer :: high, low
|
||||
integer :: a,b,c,d
|
||||
integer(bit_kind) :: hole, particle, tmp
|
||||
double precision, parameter :: phase_dble(0:1) = (/ 1.d0, -1.d0 /)
|
||||
|
||||
ASSERT (Nint > 0)
|
||||
nperm = 0
|
||||
exc(0,1) = 0
|
||||
exc(0,2) = 0
|
||||
|
||||
ishift = 1-bit_kind_size
|
||||
do l=1,Nint
|
||||
ishift = ishift + bit_kind_size
|
||||
if (det1(l) == det2(l)) then
|
||||
cycle
|
||||
endif
|
||||
tmp = xor( det1(l), det2(l) )
|
||||
particle = iand(tmp, det2(l))
|
||||
hole = iand(tmp, det1(l))
|
||||
if (particle /= 0_bit_kind) then
|
||||
tz = trailz(particle)
|
||||
exc(0,2) = 1
|
||||
exc(1,2) = tz+ishift
|
||||
endif
|
||||
if (hole /= 0_bit_kind) then
|
||||
tz = trailz(hole)
|
||||
exc(0,1) = 1
|
||||
exc(1,1) = tz+ishift
|
||||
endif
|
||||
|
||||
if ( iand(exc(0,1),exc(0,2)) /= 1) then ! exc(0,1)/=1 and exc(0,2) /= 1
|
||||
cycle
|
||||
endif
|
||||
|
||||
low = min(exc(1,1),exc(1,2))
|
||||
high = max(exc(1,1),exc(1,2))
|
||||
|
||||
ASSERT (low > 0)
|
||||
j = ishft(low-1,-bit_kind_shift)+1 ! Find integer in array(Nint)
|
||||
n = iand(low-1,bit_kind_size-1)+1 ! mod(low,bit_kind_size)
|
||||
ASSERT (high > 0)
|
||||
k = ishft(high-1,-bit_kind_shift)+1
|
||||
m = iand(high-1,bit_kind_size-1)+1
|
||||
if (j==k) then
|
||||
nperm = popcnt(iand(det1(j), &
|
||||
iand(ibset(0_bit_kind,m-1)-1_bit_kind,ibclr(-1_bit_kind,n)+1_bit_kind)))
|
||||
else
|
||||
nperm = nperm + popcnt(iand(det1(k),ibset(0_bit_kind,m-1)-1_bit_kind))
|
||||
if (n < bit_kind_size) then
|
||||
nperm = nperm + popcnt(iand(det1(j),ibclr(-1_bit_kind,n)+1_bit_kind))
|
||||
endif
|
||||
do i=j+1,k-1
|
||||
nperm = nperm + popcnt(det1(i))
|
||||
end do
|
||||
endif
|
||||
phase = phase_dble(iand(nperm,1))
|
||||
return
|
||||
|
||||
enddo
|
||||
end
|
||||
|
||||
subroutine i_H_j_mono_spin(key_i,key_j,Nint,spin,hij)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns <i|H|j> where i and j are determinants differing by a single excitation
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint, spin
|
||||
integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
|
||||
double precision, intent(out) :: hij
|
||||
|
||||
integer :: exc(0:2,2)
|
||||
double precision :: phase
|
||||
|
||||
PROVIDE big_array_exchange_integrals mo_bielec_integrals_in_map
|
||||
|
||||
call get_mono_excitation_spin(key_i(1,spin),key_j(1,spin),exc,phase,Nint)
|
||||
call get_mono_excitation_from_fock(key_i,key_j,exc(1,1),exc(1,2),spin,phase,hij)
|
||||
end
|
||||
|
||||
subroutine i_H_j_double_spin(key_i,key_j,Nint,hij)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns <i|H|j> where i and j are determinants differing by a same-spin double excitation
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: key_i(Nint), key_j(Nint)
|
||||
double precision, intent(out) :: hij
|
||||
|
||||
integer :: exc(0:2,2)
|
||||
double precision :: phase
|
||||
double precision, external :: get_mo_bielec_integral
|
||||
|
||||
PROVIDE big_array_exchange_integrals mo_bielec_integrals_in_map
|
||||
|
||||
call get_double_excitation_spin(key_i,key_j,exc,phase,Nint)
|
||||
hij = phase*(get_mo_bielec_integral( &
|
||||
exc(1,1), &
|
||||
exc(2,1), &
|
||||
exc(1,2), &
|
||||
exc(2,2), mo_integrals_map) - &
|
||||
get_mo_bielec_integral( &
|
||||
exc(1,1), &
|
||||
exc(2,1), &
|
||||
exc(2,2), &
|
||||
exc(1,2), mo_integrals_map) )
|
||||
end
|
||||
|
||||
subroutine i_H_j_double_alpha_beta(key_i,key_j,Nint,hij)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns <i|H|j> where i and j are determinants differing by an opposite-spin double excitation
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint
|
||||
integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
|
||||
double precision, intent(out) :: hij
|
||||
|
||||
integer :: exc(0:2,2,2)
|
||||
double precision :: phase, phase2
|
||||
double precision, external :: get_mo_bielec_integral
|
||||
|
||||
PROVIDE big_array_exchange_integrals mo_bielec_integrals_in_map
|
||||
|
||||
call get_mono_excitation_spin(key_i(1,1),key_j(1,1),exc(0,1,1),phase,Nint)
|
||||
call get_mono_excitation_spin(key_i(1,2),key_j(1,2),exc(0,1,2),phase2,Nint)
|
||||
phase = phase*phase2
|
||||
if (exc(1,1,1) == exc(1,2,2)) then
|
||||
hij = phase * big_array_exchange_integrals(exc(1,1,1),exc(1,1,2),exc(1,2,1))
|
||||
else if (exc(1,2,1) == exc(1,1,2)) then
|
||||
hij = phase * big_array_exchange_integrals(exc(1,2,1),exc(1,1,1),exc(1,2,2))
|
||||
else
|
||||
hij = phase*get_mo_bielec_integral( &
|
||||
exc(1,1,1), &
|
||||
exc(1,1,2), &
|
||||
exc(1,2,1), &
|
||||
exc(1,2,2) ,mo_integrals_map)
|
||||
endif
|
||||
end
|
||||
|
||||
|
||||
|
@ -64,9 +64,9 @@ BEGIN_TEMPLATE
|
||||
|
||||
integer :: i,j,k
|
||||
integer, allocatable :: iorder(:)
|
||||
integer(8), allocatable :: bit_tmp(:)
|
||||
integer(8) :: last_key
|
||||
integer(8), external :: spin_det_search_key
|
||||
integer*8, allocatable :: bit_tmp(:)
|
||||
integer*8 :: last_key
|
||||
integer*8, external :: spin_det_search_key
|
||||
logical,allocatable :: duplicate(:)
|
||||
|
||||
allocate ( iorder(N_det), bit_tmp(N_det), duplicate(N_det) )
|
||||
@ -388,24 +388,29 @@ END_PROVIDER
|
||||
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) ]
|
||||
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_rows , (N_det) ]
|
||||
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns, (N_det) ]
|
||||
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order , (N_det) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Sparse coefficient matrix if the wave function is expressed in a bilinear form :
|
||||
! D_a^t C D_b
|
||||
!
|
||||
! Rows are alpha determinants and columns are beta.
|
||||
!
|
||||
! Order refers to psi_det
|
||||
END_DOC
|
||||
integer :: i,j,k, l
|
||||
integer(bit_kind) :: tmp_det(N_int,2)
|
||||
integer :: idx
|
||||
integer, external :: get_index_in_psi_det_sorted_bit
|
||||
|
||||
|
||||
PROVIDE psi_coef_sorted_bit
|
||||
|
||||
integer, allocatable :: iorder(:), to_sort(:)
|
||||
integer*8, allocatable :: to_sort(:)
|
||||
integer, external :: get_index_in_psi_det_alpha_unique
|
||||
integer, external :: get_index_in_psi_det_beta_unique
|
||||
allocate(iorder(N_det), to_sort(N_det))
|
||||
allocate(to_sort(N_det))
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l)
|
||||
do k=1,N_det
|
||||
i = get_index_in_psi_det_alpha_unique(psi_det(1,1,k),N_int)
|
||||
j = get_index_in_psi_det_beta_unique (psi_det(1,2,k),N_int)
|
||||
@ -415,16 +420,146 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states)
|
||||
enddo
|
||||
psi_bilinear_matrix_rows(k) = i
|
||||
psi_bilinear_matrix_columns(k) = j
|
||||
to_sort(k) = N_det_alpha_unique * (j-1) + i
|
||||
iorder(k) = k
|
||||
to_sort(k) = int(N_det_alpha_unique,8) * int(j-1,8) + int(i,8)
|
||||
psi_bilinear_matrix_order(k) = k
|
||||
enddo
|
||||
call isort(to_sort, iorder, N_det)
|
||||
call iset_order(psi_bilinear_matrix_rows,iorder,N_det)
|
||||
call iset_order(psi_bilinear_matrix_columns,iorder,N_det)
|
||||
call dset_order(psi_bilinear_matrix_values,iorder,N_det)
|
||||
deallocate(iorder,to_sort)
|
||||
!$OMP END PARALLEL DO
|
||||
call i8sort(to_sort, psi_bilinear_matrix_order, N_det)
|
||||
call iset_order(psi_bilinear_matrix_rows,psi_bilinear_matrix_order,N_det)
|
||||
call iset_order(psi_bilinear_matrix_columns,psi_bilinear_matrix_order,N_det)
|
||||
do l=1,N_states
|
||||
call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det)
|
||||
enddo
|
||||
deallocate(to_sort)
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_reverse , (N_det) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Order which allors to go from psi_bilinear_matrix to psi_det
|
||||
END_DOC
|
||||
integer :: k
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k)
|
||||
do k=1,N_det
|
||||
psi_bilinear_matrix_order_reverse(psi_bilinear_matrix_order(k)) = k
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Sparse coefficient matrix if the wave function is expressed in a bilinear form :
|
||||
! D_a^t C D_b
|
||||
!
|
||||
! Rows are alpha determinants and columns are beta.
|
||||
!
|
||||
! Order refers to psi_det
|
||||
END_DOC
|
||||
integer :: i,j,k, l
|
||||
|
||||
l = psi_bilinear_matrix_columns(1)
|
||||
psi_bilinear_matrix_columns_loc(l) = 1
|
||||
do k=2,N_det
|
||||
if (psi_bilinear_matrix_columns(k) == psi_bilinear_matrix_columns(k-1)) then
|
||||
cycle
|
||||
else
|
||||
l = psi_bilinear_matrix_columns(k)
|
||||
psi_bilinear_matrix_columns_loc(l) = k
|
||||
endif
|
||||
enddo
|
||||
psi_bilinear_matrix_columns_loc(N_det_beta_unique+1) = N_det+1
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_states) ]
|
||||
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows , (N_det) ]
|
||||
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_columns, (N_det) ]
|
||||
&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_order , (N_det) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Transpose of psi_bilinear_matrix
|
||||
! D_b^t C^t D_a
|
||||
!
|
||||
! Rows are Alpha determinants and columns are beta, but the matrix is stored in row major
|
||||
! format
|
||||
END_DOC
|
||||
integer :: i,j,k,l
|
||||
|
||||
|
||||
PROVIDE psi_coef_sorted_bit
|
||||
|
||||
integer*8, allocatable :: to_sort(:)
|
||||
allocate(to_sort(N_det))
|
||||
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l)
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do l=1,N_states
|
||||
do k=1,N_det
|
||||
psi_bilinear_matrix_transp_values (k,l) = psi_bilinear_matrix_values (k,l)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP ENDDO
|
||||
!$OMP DO
|
||||
do k=1,N_det
|
||||
psi_bilinear_matrix_transp_columns(k) = psi_bilinear_matrix_columns(k)
|
||||
psi_bilinear_matrix_transp_rows (k) = psi_bilinear_matrix_rows (k)
|
||||
i = psi_bilinear_matrix_transp_columns(k)
|
||||
j = psi_bilinear_matrix_transp_rows (k)
|
||||
to_sort(k) = int(N_det_beta_unique,8) * int(j-1,8) + int(i,8)
|
||||
psi_bilinear_matrix_transp_order(k) = k
|
||||
enddo
|
||||
!$OMP ENDDO
|
||||
!$OMP END PARALLEL
|
||||
call i8radix_sort(to_sort, psi_bilinear_matrix_transp_order, N_det,-1)
|
||||
call iset_order(psi_bilinear_matrix_transp_rows,psi_bilinear_matrix_transp_order,N_det)
|
||||
call iset_order(psi_bilinear_matrix_transp_columns,psi_bilinear_matrix_transp_order,N_det)
|
||||
do l=1,N_states
|
||||
call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det)
|
||||
enddo
|
||||
deallocate(to_sort)
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows_loc, (N_det_alpha_unique+1) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Location of the columns in the psi_bilinear_matrix
|
||||
END_DOC
|
||||
integer :: i,j,k, l
|
||||
|
||||
l = psi_bilinear_matrix_transp_rows(1)
|
||||
psi_bilinear_matrix_transp_rows_loc(l) = 1
|
||||
do k=2,N_det
|
||||
if (psi_bilinear_matrix_transp_rows(k) == psi_bilinear_matrix_transp_rows(k-1)) then
|
||||
cycle
|
||||
else
|
||||
l = psi_bilinear_matrix_transp_rows(k)
|
||||
psi_bilinear_matrix_transp_rows_loc(l) = k
|
||||
endif
|
||||
enddo
|
||||
psi_bilinear_matrix_transp_rows_loc(N_det_alpha_unique+1) = N_det+1
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ]
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Order which allows to go from psi_bilinear_matrix_order_transp to psi_bilinear_matrix
|
||||
END_DOC
|
||||
integer :: k
|
||||
|
||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(k)
|
||||
do k=1,N_det
|
||||
psi_bilinear_matrix_order_transp_reverse(psi_bilinear_matrix_transp_order(k)) = k
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ double precision, psi_bilinear_matrix, (N_det_alpha_unique,N_det_beta_unique,N_states) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
@ -506,7 +641,7 @@ subroutine generate_all_alpha_beta_det_products
|
||||
! Create a wave function from all possible alpha x beta determinants
|
||||
END_DOC
|
||||
integer :: i,j,k,l
|
||||
integer :: idx, iproc
|
||||
integer :: iproc
|
||||
integer, external :: get_index_in_psi_det_sorted_bit
|
||||
integer(bit_kind), allocatable :: tmp_det(:,:,:)
|
||||
logical, external :: is_in_wavefunction
|
||||
@ -540,3 +675,489 @@ subroutine generate_all_alpha_beta_det_products
|
||||
end
|
||||
|
||||
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
|
||||
|
||||
subroutine get_all_spin_singles_and_doubles(buffer, idx, spindet, Nint, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the single and double excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
! /!\ : The buffer is transposed !
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint, size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer(Nint,size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet(Nint)
|
||||
integer, intent(out) :: singles(size_buffer)
|
||||
integer, intent(out) :: doubles(size_buffer)
|
||||
integer, intent(out) :: n_singles
|
||||
integer, intent(out) :: n_doubles
|
||||
|
||||
select case (Nint)
|
||||
case (1)
|
||||
call get_all_spin_singles_and_doubles_1(buffer, idx, spindet(1), size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
case (2)
|
||||
call get_all_spin_singles_and_doubles_2(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
case (3)
|
||||
call get_all_spin_singles_and_doubles_3(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
case (4)
|
||||
call get_all_spin_singles_and_doubles_4(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
case default
|
||||
call get_all_spin_singles_and_doubles_N_int(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
end select
|
||||
|
||||
end
|
||||
|
||||
|
||||
subroutine get_all_spin_singles(buffer, idx, spindet, Nint, size_buffer, singles, n_singles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the single excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint, size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer(Nint,size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet(Nint)
|
||||
integer, intent(out) :: singles(size_buffer)
|
||||
integer, intent(out) :: n_singles
|
||||
|
||||
select case (N_int)
|
||||
case (1)
|
||||
call get_all_spin_singles_1(buffer, idx, spindet(1), size_buffer, singles, n_singles)
|
||||
return
|
||||
case (2)
|
||||
call get_all_spin_singles_2(buffer, idx, spindet, size_buffer, singles, n_singles)
|
||||
case (3)
|
||||
call get_all_spin_singles_3(buffer, idx, spindet, size_buffer, singles, n_singles)
|
||||
case (4)
|
||||
call get_all_spin_singles_4(buffer, idx, spindet, size_buffer, singles, n_singles)
|
||||
case default
|
||||
call get_all_spin_singles_N_int(buffer, idx, spindet, size_buffer, singles, n_singles)
|
||||
end select
|
||||
|
||||
end
|
||||
|
||||
|
||||
subroutine get_all_spin_doubles(buffer, idx, spindet, Nint, size_buffer, doubles, n_doubles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the double excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: Nint, size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer(Nint,size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet(Nint)
|
||||
integer, intent(out) :: doubles(size_buffer)
|
||||
integer, intent(out) :: n_doubles
|
||||
|
||||
select case (N_int)
|
||||
case (1)
|
||||
call get_all_spin_doubles_1(buffer, idx, spindet(1), size_buffer, doubles, n_doubles)
|
||||
case (2)
|
||||
call get_all_spin_doubles_2(buffer, idx, spindet, size_buffer, doubles, n_doubles)
|
||||
case (3)
|
||||
call get_all_spin_doubles_3(buffer, idx, spindet, size_buffer, doubles, n_doubles)
|
||||
case (4)
|
||||
call get_all_spin_doubles_4(buffer, idx, spindet, size_buffer, doubles, n_doubles)
|
||||
case default
|
||||
call get_all_spin_doubles_N_int(buffer, idx, spindet, size_buffer, doubles, n_doubles)
|
||||
end select
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
subroutine copy_psi_bilinear_to_psi(psi, isize)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Overwrites psi_det and psi_coef with the wf in bilinear order
|
||||
END_DOC
|
||||
integer, intent(in) :: isize
|
||||
integer(bit_kind), intent(out) :: psi(N_int,2,isize)
|
||||
integer :: i,j,k,l
|
||||
do k=1,isize
|
||||
i = psi_bilinear_matrix_rows(k)
|
||||
j = psi_bilinear_matrix_columns(k)
|
||||
psi(1:N_int,1,k) = psi_det_alpha_unique(1:N_int,i)
|
||||
psi(1:N_int,2,k) = psi_det_beta_unique(1:N_int,j)
|
||||
enddo
|
||||
end
|
||||
|
||||
BEGIN_PROVIDER [ integer, singles_alpha_size ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Dimension of the singles_alpha array
|
||||
END_DOC
|
||||
singles_alpha_size = elec_alpha_num * (mo_tot_num - elec_alpha_num)
|
||||
END_PROVIDER
|
||||
|
||||
BEGIN_PROVIDER [ integer*8, singles_alpha_csc_idx, (N_det_alpha_unique+1) ]
|
||||
&BEGIN_PROVIDER [ integer*8, singles_alpha_csc_size ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Dimension of the singles_alpha array
|
||||
END_DOC
|
||||
integer :: i,j
|
||||
integer, allocatable :: idx0(:), s(:)
|
||||
allocate (idx0(N_det_alpha_unique))
|
||||
do i=1, N_det_alpha_unique
|
||||
idx0(i) = i
|
||||
enddo
|
||||
|
||||
!$OMP PARALLEL DEFAULT(NONE) &
|
||||
!$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, &
|
||||
!$OMP idx0, N_int, singles_alpha_csc, &
|
||||
!$OMP singles_alpha_size, singles_alpha_csc_idx) &
|
||||
!$OMP PRIVATE(i,s,j)
|
||||
allocate (s(singles_alpha_size))
|
||||
!$OMP DO SCHEDULE(static,1)
|
||||
do i=1, N_det_alpha_unique
|
||||
call get_all_spin_singles( &
|
||||
psi_det_alpha_unique, idx0, psi_det_alpha_unique(1,i), N_int, &
|
||||
N_det_alpha_unique, s, j)
|
||||
singles_alpha_csc_idx(i+1) = int(j,8)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
deallocate(s)
|
||||
!$OMP END PARALLEL
|
||||
deallocate(idx0)
|
||||
|
||||
singles_alpha_csc_idx(1) = 1_8
|
||||
do i=2, N_det_alpha_unique+1
|
||||
singles_alpha_csc_idx(i) = singles_alpha_csc_idx(i) + singles_alpha_csc_idx(i-1)
|
||||
enddo
|
||||
singles_alpha_csc_size = singles_alpha_csc_idx(N_det_alpha_unique+1)
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer, singles_alpha_csc, (singles_alpha_csc_size) ]
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Dimension of the singles_alpha array
|
||||
END_DOC
|
||||
integer :: i, k
|
||||
integer, allocatable :: idx0(:)
|
||||
allocate (idx0(N_det_alpha_unique))
|
||||
do i=1, N_det_alpha_unique
|
||||
idx0(i) = i
|
||||
enddo
|
||||
|
||||
!$OMP PARALLEL DO DEFAULT(NONE) &
|
||||
!$OMP SHARED(N_det_alpha_unique, psi_det_alpha_unique, &
|
||||
!$OMP idx0, N_int, singles_alpha_csc, singles_alpha_csc_idx) &
|
||||
!$OMP PRIVATE(i,k) SCHEDULE(static,1)
|
||||
do i=1, N_det_alpha_unique
|
||||
call get_all_spin_singles( &
|
||||
psi_det_alpha_unique, idx0, psi_det_alpha_unique(1,i), N_int, &
|
||||
N_det_alpha_unique, singles_alpha_csc(singles_alpha_csc_idx(i)), &
|
||||
k)
|
||||
enddo
|
||||
!$OMP END PARALLEL DO
|
||||
deallocate(idx0)
|
||||
|
||||
END_PROVIDER
|
||||
|
||||
|
||||
|
||||
|
||||
subroutine get_all_spin_singles_and_doubles_1(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the single and double excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
! /!\ : The buffer is transposed !
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer(size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet
|
||||
integer, intent(out) :: singles(size_buffer)
|
||||
integer, intent(out) :: doubles(size_buffer)
|
||||
integer, intent(out) :: n_singles
|
||||
integer, intent(out) :: n_doubles
|
||||
|
||||
integer :: i
|
||||
include 'Utils/constants.include.F'
|
||||
integer :: degree
|
||||
|
||||
|
||||
n_singles = 1
|
||||
n_doubles = 1
|
||||
!DIR$ VECTOR ALIGNED
|
||||
do i=1,size_buffer
|
||||
degree = popcnt( xor( spindet, buffer(i) ) )
|
||||
if ( degree == 4 ) then
|
||||
doubles(n_doubles) = idx(i)
|
||||
n_doubles = n_doubles+1
|
||||
else if ( degree == 2 ) then
|
||||
singles(n_singles) = idx(i)
|
||||
n_singles = n_singles+1
|
||||
endif
|
||||
enddo
|
||||
n_singles = n_singles-1
|
||||
n_doubles = n_doubles-1
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
subroutine get_all_spin_singles_1(buffer, idx, spindet, size_buffer, singles, n_singles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the single excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer(size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet
|
||||
integer, intent(out) :: singles(size_buffer)
|
||||
integer, intent(out) :: n_singles
|
||||
integer :: i
|
||||
integer :: degree
|
||||
include 'Utils/constants.include.F'
|
||||
|
||||
n_singles = 1
|
||||
do i=1,size_buffer
|
||||
degree = popcnt(xor( spindet, buffer(i) ))
|
||||
singles(n_singles) = idx(i)
|
||||
if (degree == 2) then
|
||||
n_singles = n_singles+1
|
||||
endif
|
||||
enddo
|
||||
n_singles = n_singles-1
|
||||
|
||||
end
|
||||
|
||||
|
||||
subroutine get_all_spin_doubles_1(buffer, idx, spindet, size_buffer, doubles, n_doubles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the double excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer(size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet
|
||||
integer, intent(out) :: doubles(size_buffer)
|
||||
integer, intent(out) :: n_doubles
|
||||
integer :: i
|
||||
include 'Utils/constants.include.F'
|
||||
integer :: degree
|
||||
|
||||
n_doubles = 1
|
||||
!DIR$ VECTOR ALIGNED
|
||||
do i=1,size_buffer
|
||||
degree = popcnt(xor( spindet, buffer(i) ))
|
||||
if ( degree == 4 ) then
|
||||
doubles(n_doubles) = idx(i)
|
||||
n_doubles = n_doubles+1
|
||||
endif
|
||||
enddo
|
||||
n_doubles = n_doubles-1
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
BEGIN_TEMPLATE
|
||||
|
||||
subroutine get_all_spin_singles_and_doubles_$N_int(buffer, idx, spindet, size_buffer, singles, doubles, n_singles, n_doubles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the single and double excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
! /!\ : The buffer is transposed !
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer($N_int,size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet($N_int)
|
||||
integer, intent(out) :: singles(size_buffer)
|
||||
integer, intent(out) :: doubles(size_buffer)
|
||||
integer, intent(out) :: n_singles
|
||||
integer, intent(out) :: n_doubles
|
||||
|
||||
integer :: i,k
|
||||
integer(bit_kind) :: xorvec($N_int)
|
||||
integer :: degree
|
||||
|
||||
integer, external :: align_double
|
||||
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec, degree
|
||||
n_singles = 1
|
||||
n_doubles = 1
|
||||
!DIR$ VECTOR ALIGNED
|
||||
do i=1,size_buffer
|
||||
|
||||
do k=1,$N_int
|
||||
xorvec(k) = xor( spindet(k), buffer(k,i) )
|
||||
enddo
|
||||
|
||||
if (xorvec(1) /= 0_8) then
|
||||
degree = popcnt(xorvec(1))
|
||||
else
|
||||
degree = 0
|
||||
endif
|
||||
|
||||
do k=2,$N_int
|
||||
!DIR$ VECTOR ALIGNED
|
||||
if ( (degree <= 4).and.(xorvec(k) /= 0_8) ) then
|
||||
degree = degree + popcnt(xorvec(k))
|
||||
endif
|
||||
enddo
|
||||
|
||||
if ( degree == 4 ) then
|
||||
doubles(n_doubles) = idx(i)
|
||||
n_doubles = n_doubles+1
|
||||
else if ( degree == 2 ) then
|
||||
singles(n_singles) = idx(i)
|
||||
n_singles = n_singles+1
|
||||
endif
|
||||
|
||||
enddo
|
||||
n_singles = n_singles-1
|
||||
n_doubles = n_doubles-1
|
||||
|
||||
end
|
||||
|
||||
|
||||
subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, singles, n_singles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the single excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer($N_int,size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet($N_int)
|
||||
integer, intent(out) :: singles(size_buffer)
|
||||
integer, intent(out) :: n_singles
|
||||
|
||||
integer :: i,k
|
||||
include 'Utils/constants.include.F'
|
||||
integer(bit_kind) :: xorvec($N_int)
|
||||
integer :: degree
|
||||
|
||||
integer, external :: align_double
|
||||
|
||||
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xorvec
|
||||
|
||||
n_singles = 1
|
||||
!DIR$ VECTOR ALIGNED
|
||||
do i=1,size_buffer
|
||||
|
||||
do k=1,$N_int
|
||||
xorvec(k) = xor( spindet(k), buffer(k,i) )
|
||||
enddo
|
||||
|
||||
if (xorvec(1) /= 0_8) then
|
||||
degree = popcnt(xorvec(1))
|
||||
else
|
||||
degree = 0
|
||||
endif
|
||||
|
||||
do k=2,$N_int
|
||||
if ( (degree <= 2).and.(xorvec(k) /= 0_8) ) then
|
||||
degree = degree + popcnt(xorvec(k))
|
||||
endif
|
||||
enddo
|
||||
|
||||
if ( degree == 2 ) then
|
||||
singles(n_singles) = idx(i)
|
||||
n_singles = n_singles+1
|
||||
endif
|
||||
|
||||
enddo
|
||||
n_singles = n_singles-1
|
||||
|
||||
end
|
||||
|
||||
|
||||
subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, doubles, n_doubles)
|
||||
use bitmasks
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
!
|
||||
! Returns the indices of all the double excitations in the list of
|
||||
! unique alpha determinants.
|
||||
!
|
||||
END_DOC
|
||||
integer, intent(in) :: size_buffer, idx(size_buffer)
|
||||
integer(bit_kind), intent(in) :: buffer($N_int,size_buffer)
|
||||
integer(bit_kind), intent(in) :: spindet($N_int)
|
||||
integer, intent(out) :: doubles(size_buffer)
|
||||
integer, intent(out) :: n_doubles
|
||||
|
||||
integer :: i,k, degree
|
||||
include 'Utils/constants.include.F'
|
||||
integer(bit_kind) :: xorvec($N_int)
|
||||
|
||||
n_doubles = 1
|
||||
!DIR$ VECTOR ALIGNED
|
||||
do i=1,size_buffer
|
||||
|
||||
do k=1,$N_int
|
||||
xorvec(k) = xor( spindet(k), buffer(k,i) )
|
||||
enddo
|
||||
|
||||
if (xorvec(1) /= 0_8) then
|
||||
degree = popcnt(xorvec(1))
|
||||
else
|
||||
degree = 0
|
||||
endif
|
||||
|
||||
do k=2,$N_int
|
||||
!DIR$ VECTOR ALIGNED
|
||||
if ( (degree <= 4).and.(xorvec(k) /= 0_8) ) then
|
||||
degree = degree + popcnt(xorvec(k))
|
||||
endif
|
||||
enddo
|
||||
|
||||
if ( degree == 4 ) then
|
||||
doubles(n_doubles) = idx(i)
|
||||
n_doubles = n_doubles+1
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
n_doubles = n_doubles-1
|
||||
|
||||
end
|
||||
|
||||
SUBST [ N_int ]
|
||||
2;;
|
||||
3;;
|
||||
4;;
|
||||
N_int;;
|
||||
|
||||
END_TEMPLATE
|
||||
|
||||
|
||||
|
@ -349,7 +349,7 @@ BEGIN_PROVIDER [ logical, ao_bielec_integrals_in_map ]
|
||||
|
||||
integral = ao_bielec_integral(1,1,1,1)
|
||||
|
||||
real :: map_mb
|
||||
double precision :: map_mb
|
||||
PROVIDE read_ao_integrals disk_access_ao_integrals
|
||||
if (read_ao_integrals) then
|
||||
print*,'Reading the AO integrals'
|
||||
|
@ -196,7 +196,7 @@ subroutine add_integrals_to_map(mask_ijkl)
|
||||
integer :: size_buffer
|
||||
integer(key_kind),allocatable :: buffer_i(:)
|
||||
real(integral_kind),allocatable :: buffer_value(:)
|
||||
real :: map_mb
|
||||
double precision :: map_mb
|
||||
|
||||
integer :: i1,j1,k1,l1, ii1, kmax, thread_num
|
||||
integer :: i2,i3,i4
|
||||
@ -503,7 +503,7 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
||||
integer :: size_buffer
|
||||
integer(key_kind),allocatable :: buffer_i(:)
|
||||
real(integral_kind),allocatable :: buffer_value(:)
|
||||
real :: map_mb
|
||||
double precision :: map_mb
|
||||
|
||||
integer :: i1,j1,k1,l1, ii1, kmax, thread_num
|
||||
integer :: i2,i3,i4
|
||||
@ -817,7 +817,7 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl)
|
||||
integer :: size_buffer
|
||||
integer(key_kind),allocatable :: buffer_i(:)
|
||||
real(integral_kind),allocatable :: buffer_value(:)
|
||||
real :: map_mb
|
||||
double precision :: map_mb
|
||||
|
||||
integer :: i1,j1,k1,l1, ii1, kmax, thread_num
|
||||
integer :: i2,i3,i4
|
||||
|
@ -12,17 +12,15 @@ BEGIN_TEMPLATE
|
||||
$type :: xtmp
|
||||
integer :: i, i0, j, jmax
|
||||
|
||||
do i=1,isize
|
||||
do i=2,isize
|
||||
xtmp = x(i)
|
||||
i0 = iorder(i)
|
||||
j=i-1
|
||||
do j=i-1,1,-1
|
||||
if ( x(j) > xtmp ) then
|
||||
do while (j>0)
|
||||
if ((x(j) <= xtmp)) exit
|
||||
x(j+1) = x(j)
|
||||
iorder(j+1) = iorder(j)
|
||||
else
|
||||
exit
|
||||
endif
|
||||
j=j-1
|
||||
enddo
|
||||
x(j+1) = xtmp
|
||||
iorder(j+1) = i0
|
||||
@ -158,6 +156,38 @@ BEGIN_TEMPLATE
|
||||
|
||||
end subroutine heap_$Xsort_big
|
||||
|
||||
subroutine sorted_$Xnumber(x,isize,n)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Returns the number of sorted elements
|
||||
END_DOC
|
||||
integer, intent(in) :: isize
|
||||
$type, intent(in) :: x(isize)
|
||||
integer, intent(out) :: n
|
||||
integer :: i
|
||||
n=1
|
||||
|
||||
if (isize < 2) then
|
||||
return
|
||||
endif
|
||||
|
||||
do i=2,isize
|
||||
if (x(i-1) <= x(i)) then
|
||||
n=n+1
|
||||
endif
|
||||
enddo
|
||||
|
||||
end
|
||||
|
||||
SUBST [ X, type ]
|
||||
; real ;;
|
||||
d ; double precision ;;
|
||||
i ; integer ;;
|
||||
i8 ; integer*8 ;;
|
||||
i2 ; integer*2 ;;
|
||||
END_TEMPLATE
|
||||
|
||||
BEGIN_TEMPLATE
|
||||
subroutine $Xsort(x,iorder,isize)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
@ -168,16 +198,42 @@ BEGIN_TEMPLATE
|
||||
integer,intent(in) :: isize
|
||||
$type,intent(inout) :: x(isize)
|
||||
integer,intent(inout) :: iorder(isize)
|
||||
if (isize < 32) then
|
||||
integer :: n
|
||||
if (isize < 2) then
|
||||
return
|
||||
endif
|
||||
call sorted_$Xnumber(x,isize,n)
|
||||
if (isize == n) then
|
||||
return
|
||||
endif
|
||||
if ( isize < 32+n) then
|
||||
call insertion_$Xsort(x,iorder,isize)
|
||||
else
|
||||
call heap_$Xsort(x,iorder,isize)
|
||||
endif
|
||||
end subroutine $Xsort
|
||||
|
||||
SUBST [ X, type, Y ]
|
||||
; real ; i ;;
|
||||
d ; double precision ; i8 ;;
|
||||
END_TEMPLATE
|
||||
|
||||
BEGIN_TEMPLATE
|
||||
subroutine $Xsort(x,iorder,isize)
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Sort array x(isize).
|
||||
! iorder in input should be (1,2,3,...,isize), and in output
|
||||
! contains the new order of the elements.
|
||||
END_DOC
|
||||
integer,intent(in) :: isize
|
||||
$type,intent(inout) :: x(isize)
|
||||
integer,intent(inout) :: iorder(isize)
|
||||
integer :: n
|
||||
call $Xradix_sort(x,iorder,isize,-1)
|
||||
end subroutine $Xsort
|
||||
|
||||
SUBST [ X, type ]
|
||||
; real ;;
|
||||
d ; double precision ;;
|
||||
i ; integer ;;
|
||||
i8 ; integer*8 ;;
|
||||
i2 ; integer*2 ;;
|
||||
@ -232,17 +288,15 @@ BEGIN_TEMPLATE
|
||||
$type :: xtmp
|
||||
integer*8 :: i, i0, j, jmax
|
||||
|
||||
do i=1_8,isize
|
||||
do i=2_8,isize
|
||||
xtmp = x(i)
|
||||
i0 = iorder(i)
|
||||
j = i-1_8
|
||||
do j=i-1_8,1_8,-1_8
|
||||
if ( x(j) > xtmp ) then
|
||||
do while (j>0_8)
|
||||
if (x(j)<=xtmp) exit
|
||||
x(j+1_8) = x(j)
|
||||
iorder(j+1_8) = iorder(j)
|
||||
else
|
||||
exit
|
||||
endif
|
||||
j = j-1_8
|
||||
enddo
|
||||
x(j+1_8) = xtmp
|
||||
iorder(j+1_8) = i0
|
||||
@ -292,63 +346,107 @@ BEGIN_TEMPLATE
|
||||
! contains the new order of the elements.
|
||||
! iradix should be -1 in input.
|
||||
END_DOC
|
||||
$int_type, intent(in) :: isize
|
||||
$int_type, intent(inout) :: iorder(isize)
|
||||
$type, intent(inout) :: x(isize)
|
||||
integer*$int_type, intent(in) :: isize
|
||||
integer*$int_type, intent(inout) :: iorder(isize)
|
||||
integer*$type, intent(inout) :: x(isize)
|
||||
integer, intent(in) :: iradix
|
||||
integer :: iradix_new
|
||||
$type, allocatable :: x2(:), x1(:)
|
||||
$type :: i4
|
||||
$int_type, allocatable :: iorder1(:),iorder2(:)
|
||||
$int_type :: i0, i1, i2, i3, i
|
||||
integer, parameter :: integer_size=$octets
|
||||
$type, parameter :: zero=$zero
|
||||
$type :: mask
|
||||
integer :: nthreads, omp_get_num_threads
|
||||
integer*$type, allocatable :: x2(:), x1(:)
|
||||
integer*$type :: i4 ! data type
|
||||
integer*$int_type, allocatable :: iorder1(:),iorder2(:)
|
||||
integer*$int_type :: i0, i1, i2, i3, i ! index type
|
||||
integer*$type :: mask
|
||||
integer :: err
|
||||
!DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1
|
||||
|
||||
if (iradix == -1) then
|
||||
if (iradix == -1) then ! Sort Positive and negative
|
||||
|
||||
! Find most significant bit
|
||||
|
||||
i0 = 0_8
|
||||
i4 = -1_8
|
||||
|
||||
do i=1,isize
|
||||
i4 = max(i4,x(i))
|
||||
enddo
|
||||
i3 = i4 ! Type conversion
|
||||
|
||||
iradix_new = integer_size-1-leadz(i3)
|
||||
mask = ibset(zero,iradix_new)
|
||||
nthreads = 1
|
||||
! nthreads = 1+ishft(omp_get_num_threads(),-1)
|
||||
|
||||
integer :: err
|
||||
allocate(x1(isize/nthreads+1),iorder1(isize/nthreads+1),x2(isize/nthreads+1),iorder2(isize/nthreads+1),stat=err)
|
||||
allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
|
||||
if (err /= 0) then
|
||||
print *, irp_here, ': Unable to allocate arrays'
|
||||
stop
|
||||
endif
|
||||
|
||||
i1=1_8
|
||||
i2=1_8
|
||||
|
||||
do i=1,isize
|
||||
if (iand(mask,x(i)) == zero) then
|
||||
i1=1_$int_type
|
||||
i2=1_$int_type
|
||||
do i=1_$int_type,isize
|
||||
if (x(i) < 0_$type) then
|
||||
iorder1(i1) = iorder(i)
|
||||
x1(i1) = x(i)
|
||||
i1 = i1+1_8
|
||||
x1(i1) = -x(i)
|
||||
i1 = i1+1_$int_type
|
||||
else
|
||||
iorder2(i2) = iorder(i)
|
||||
x2(i2) = x(i)
|
||||
i2 = i2+1_8
|
||||
i2 = i2+1_$int_type
|
||||
endif
|
||||
enddo
|
||||
i1=i1-1_8
|
||||
i2=i2-1_8
|
||||
i1=i1-1_$int_type
|
||||
i2=i2-1_$int_type
|
||||
|
||||
do i=1,i1
|
||||
do i=1_$int_type,i2
|
||||
iorder(i1+i) = iorder2(i)
|
||||
x(i1+i) = x2(i)
|
||||
enddo
|
||||
deallocate(x2,iorder2,stat=err)
|
||||
if (err /= 0) then
|
||||
print *, irp_here, ': Unable to deallocate arrays x2, iorder2'
|
||||
stop
|
||||
endif
|
||||
|
||||
|
||||
if (i1 > 1_$int_type) then
|
||||
call $Xradix_sort$big(x1,iorder1,i1,-2)
|
||||
do i=1_$int_type,i1
|
||||
x(i) = -x1(1_$int_type+i1-i)
|
||||
iorder(i) = iorder1(1_$int_type+i1-i)
|
||||
enddo
|
||||
endif
|
||||
deallocate(x1,iorder1,stat=err)
|
||||
if (err /= 0) then
|
||||
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
|
||||
stop
|
||||
endif
|
||||
|
||||
if (i2>1_$int_type) then
|
||||
call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
else if (iradix == -2) then ! Positive
|
||||
|
||||
! Find most significant bit
|
||||
|
||||
i0 = 0_$int_type
|
||||
i4 = maxval(x)
|
||||
|
||||
iradix_new = max($integer_size-1-leadz(i4),1)
|
||||
mask = ibset(0_$type,iradix_new)
|
||||
|
||||
allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
|
||||
if (err /= 0) then
|
||||
print *, irp_here, ': Unable to allocate arrays'
|
||||
stop
|
||||
endif
|
||||
|
||||
i1=1_$int_type
|
||||
i2=1_$int_type
|
||||
|
||||
do i=1_$int_type,isize
|
||||
if (iand(mask,x(i)) == 0_$type) then
|
||||
iorder1(i1) = iorder(i)
|
||||
x1(i1) = x(i)
|
||||
i1 = i1+1_$int_type
|
||||
else
|
||||
iorder2(i2) = iorder(i)
|
||||
x2(i2) = x(i)
|
||||
i2 = i2+1_$int_type
|
||||
endif
|
||||
enddo
|
||||
i1=i1-1_$int_type
|
||||
i2=i2-1_$int_type
|
||||
|
||||
do i=1_$int_type,i1
|
||||
iorder(i0+i) = iorder1(i)
|
||||
x(i0+i) = x1(i)
|
||||
enddo
|
||||
@ -361,7 +459,7 @@ BEGIN_TEMPLATE
|
||||
endif
|
||||
|
||||
|
||||
do i=1,i2
|
||||
do i=1_$int_type,i2
|
||||
iorder(i0+i) = iorder2(i)
|
||||
x(i0+i) = x2(i)
|
||||
enddo
|
||||
@ -373,12 +471,12 @@ BEGIN_TEMPLATE
|
||||
endif
|
||||
|
||||
|
||||
if (i3>1) then
|
||||
if (i3>1_$int_type) then
|
||||
call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
|
||||
endif
|
||||
|
||||
if (isize-i3>1) then
|
||||
call $Xradix_sort$big(x(i3+1),iorder(i3+1),isize-i3,iradix_new-1)
|
||||
if (isize-i3>1_$int_type) then
|
||||
call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
|
||||
endif
|
||||
|
||||
return
|
||||
@ -399,25 +497,25 @@ BEGIN_TEMPLATE
|
||||
endif
|
||||
|
||||
|
||||
mask = ibset(zero,iradix)
|
||||
i0=1
|
||||
i1=1
|
||||
mask = ibset(0_$type,iradix)
|
||||
i0=1_$int_type
|
||||
i1=1_$int_type
|
||||
|
||||
do i=1,isize
|
||||
if (iand(mask,x(i)) == zero) then
|
||||
do i=1_$int_type,isize
|
||||
if (iand(mask,x(i)) == 0_$type) then
|
||||
iorder(i0) = iorder(i)
|
||||
x(i0) = x(i)
|
||||
i0 = i0+1
|
||||
i0 = i0+1_$int_type
|
||||
else
|
||||
iorder2(i1) = iorder(i)
|
||||
x2(i1) = x(i)
|
||||
i1 = i1+1
|
||||
i1 = i1+1_$int_type
|
||||
endif
|
||||
enddo
|
||||
i0=i0-1
|
||||
i1=i1-1
|
||||
i0=i0-1_$int_type
|
||||
i1=i1-1_$int_type
|
||||
|
||||
do i=1,i1
|
||||
do i=1_$int_type,i1
|
||||
iorder(i0+i) = iorder2(i)
|
||||
x(i0+i) = x2(i)
|
||||
enddo
|
||||
@ -434,8 +532,8 @@ BEGIN_TEMPLATE
|
||||
endif
|
||||
|
||||
|
||||
if (i1>1) then
|
||||
call $Xradix_sort$big(x(i0+1),iorder(i0+1),i1,iradix-1)
|
||||
if (i1>1_$int_type) then
|
||||
call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
|
||||
endif
|
||||
if (i0>1) then
|
||||
call $Xradix_sort$big(x,iorder,i0,iradix-1)
|
||||
@ -443,12 +541,12 @@ BEGIN_TEMPLATE
|
||||
|
||||
end
|
||||
|
||||
SUBST [ X, type, octets, is_big, big, int_type, zero ]
|
||||
i ; integer ; 32 ; .False. ; ; integer ; 0;;
|
||||
i8 ; integer*8 ; 32 ; .False. ; ; integer ; 0_8;;
|
||||
i2 ; integer*2 ; 32 ; .False. ; ; integer ; 0;;
|
||||
i ; integer ; 64 ; .True. ; _big ; integer*8 ; 0 ;;
|
||||
i8 ; integer*8 ; 64 ; .True. ; _big ; integer*8 ; 0_8 ;;
|
||||
SUBST [ X, type, integer_size, is_big, big, int_type ]
|
||||
i ; 4 ; 32 ; .False. ; ; 4 ;;
|
||||
i8 ; 8 ; 64 ; .False. ; ; 4 ;;
|
||||
i2 ; 2 ; 16 ; .False. ; ; 4 ;;
|
||||
i ; 4 ; 32 ; .True. ; _big ; 8 ;;
|
||||
i8 ; 8 ; 64 ; .True. ; _big ; 8 ;;
|
||||
END_TEMPLATE
|
||||
|
||||
|
||||
|
@ -1,11 +1,8 @@
|
||||
use f77_zmq
|
||||
use omp_lib
|
||||
|
||||
integer, pointer :: thread_id
|
||||
integer(omp_lock_kind) :: zmq_lock
|
||||
|
||||
|
||||
BEGIN_PROVIDER [ integer(ZMQ_PTR), zmq_context ]
|
||||
&BEGIN_PROVIDER [ integer(omp_lock_kind), zmq_lock ]
|
||||
use f77_zmq
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
@ -94,7 +91,7 @@ subroutine switch_qp_run_to_master
|
||||
print *, 'This run should be started with the qp_run command'
|
||||
stop -1
|
||||
endif
|
||||
qp_run_address = trim(buffer)
|
||||
qp_run_address = adjustl(buffer)
|
||||
print *, 'Switched to qp_run master : ', trim(qp_run_address)
|
||||
|
||||
integer :: i
|
||||
@ -235,8 +232,8 @@ function new_zmq_pull_socket()
|
||||
if (zmq_context == 0_ZMQ_PTR) then
|
||||
stop 'zmq_context is uninitialized'
|
||||
endif
|
||||
new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL)
|
||||
! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP)
|
||||
! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL)
|
||||
new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP)
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (new_zmq_pull_socket == 0_ZMQ_PTR) then
|
||||
stop 'Unable to create zmq pull socket'
|
||||
@ -312,8 +309,8 @@ function new_zmq_push_socket(thread)
|
||||
if (zmq_context == 0_ZMQ_PTR) then
|
||||
stop 'zmq_context is uninitialized'
|
||||
endif
|
||||
new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH)
|
||||
! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ)
|
||||
! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH)
|
||||
new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ)
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (new_zmq_push_socket == 0_ZMQ_PTR) then
|
||||
stop 'Unable to create zmq push socket'
|
||||
@ -407,7 +404,9 @@ subroutine end_zmq_sub_socket(zmq_socket_sub)
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_socket_sub
|
||||
integer :: rc
|
||||
|
||||
call omp_set_lock(zmq_lock)
|
||||
rc = f77_zmq_close(zmq_socket_sub)
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (rc /= 0) then
|
||||
print *, 'f77_zmq_close(zmq_socket_sub)'
|
||||
stop 'error'
|
||||
@ -426,7 +425,9 @@ subroutine end_zmq_pair_socket(zmq_socket_pair)
|
||||
integer :: rc
|
||||
character*(8), external :: zmq_port
|
||||
|
||||
call omp_set_lock(zmq_lock)
|
||||
rc = f77_zmq_close(zmq_socket_pair)
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (rc /= 0) then
|
||||
print *, 'f77_zmq_close(zmq_socket_pair)'
|
||||
stop 'error'
|
||||
@ -444,7 +445,14 @@ subroutine end_zmq_pull_socket(zmq_socket_pull)
|
||||
integer :: rc
|
||||
character*(8), external :: zmq_port
|
||||
|
||||
rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,0,4)
|
||||
if (rc /= 0) then
|
||||
stop 'Unable to set ZMQ_LINGER on pull socket'
|
||||
endif
|
||||
|
||||
call omp_set_lock(zmq_lock)
|
||||
rc = f77_zmq_close(zmq_socket_pull)
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (rc /= 0) then
|
||||
print *, 'f77_zmq_close(zmq_socket_pull)'
|
||||
stop 'error'
|
||||
@ -469,7 +477,9 @@ subroutine end_zmq_push_socket(zmq_socket_push,thread)
|
||||
stop 'Unable to set ZMQ_LINGER on push socket'
|
||||
endif
|
||||
|
||||
call omp_set_lock(zmq_lock)
|
||||
rc = f77_zmq_close(zmq_socket_push)
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (rc /= 0) then
|
||||
print *, 'f77_zmq_close(zmq_socket_push)'
|
||||
stop 'error'
|
||||
@ -500,10 +510,17 @@ subroutine new_parallel_job(zmq_to_qp_run_socket,name_in)
|
||||
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
|
||||
integer(ZMQ_PTR), intent(out) :: zmq_to_qp_run_socket
|
||||
|
||||
call omp_set_lock(zmq_lock)
|
||||
zmq_context = f77_zmq_ctx_new ()
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (zmq_context == 0_ZMQ_PTR) then
|
||||
stop 'ZMQ_PTR is null'
|
||||
endif
|
||||
! rc = f77_zmq_ctx_set(zmq_context, ZMQ_IO_THREADS, nproc)
|
||||
! if (rc /= 0) then
|
||||
! print *, 'Unable to set the number of ZMQ IO threads to', nproc
|
||||
! endif
|
||||
|
||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||
name = name_in
|
||||
sze = len(trim(name))
|
||||
@ -584,7 +601,10 @@ subroutine end_parallel_job(zmq_to_qp_run_socket,name_in)
|
||||
zmq_state = 'No_state'
|
||||
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
|
||||
|
||||
call omp_set_lock(zmq_lock)
|
||||
rc = f77_zmq_ctx_term(zmq_context)
|
||||
zmq_context = 0_ZMQ_PTR
|
||||
call omp_unset_lock(zmq_lock)
|
||||
if (rc /= 0) then
|
||||
print *, 'Unable to terminate ZMQ context'
|
||||
stop 'error'
|
||||
@ -684,10 +704,43 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task)
|
||||
character*(*), intent(in) :: task
|
||||
|
||||
integer :: rc, sze
|
||||
character*(512) :: message
|
||||
character(len=:), allocatable :: message
|
||||
|
||||
message='add_task '//trim(zmq_state)//' '//trim(task)
|
||||
sze = len(message)
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0)
|
||||
|
||||
if (rc /= sze) then
|
||||
print *, rc, sze
|
||||
print *, irp_here,': f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, sze-1, 0)
|
||||
if (message(1:rc) /= 'ok') then
|
||||
print *, trim(task)
|
||||
print *, 'Unable to add the next task'
|
||||
stop -1
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
subroutine add_task_to_taskserver_send(zmq_to_qp_run_socket,task)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Get a task from the task server
|
||||
END_DOC
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
|
||||
character*(*), intent(in) :: task
|
||||
|
||||
integer :: rc, sze
|
||||
character(len=:), allocatable :: message
|
||||
|
||||
sze = len(trim(task))+12+len(trim(zmq_state))
|
||||
message = repeat(' ',sze)
|
||||
write(message,*) 'add_task '//trim(zmq_state)//' '//trim(task)
|
||||
|
||||
sze = len(trim(message))
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)
|
||||
if (rc /= sze) then
|
||||
print *, rc, sze
|
||||
@ -695,10 +748,20 @@ subroutine add_task_to_taskserver(zmq_to_qp_run_socket,task)
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
end
|
||||
|
||||
subroutine add_task_to_taskserver_recv(zmq_to_qp_run_socket)
|
||||
use f77_zmq
|
||||
implicit none
|
||||
BEGIN_DOC
|
||||
! Get a task from the task server
|
||||
END_DOC
|
||||
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
|
||||
|
||||
integer :: rc, sze
|
||||
character*(512) :: message
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
|
||||
message = trim(message(1:rc))
|
||||
if (trim(message) /= 'ok') then
|
||||
print *, trim(task)
|
||||
if (message(1:rc) /= 'ok') then
|
||||
print *, 'Unable to add the next task'
|
||||
stop -1
|
||||
endif
|
||||
@ -726,8 +789,7 @@ subroutine task_done_to_taskserver(zmq_to_qp_run_socket, worker_id, task_id)
|
||||
endif
|
||||
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
|
||||
message = trim(message(1:rc))
|
||||
if (trim(message) /= 'ok') then
|
||||
if (trim(message(1:rc)) /= 'ok') then
|
||||
print *, 'Unable to send task_done message'
|
||||
stop -1
|
||||
endif
|
||||
@ -752,17 +814,17 @@ subroutine get_task_from_taskserver(zmq_to_qp_run_socket,worker_id,task_id,task)
|
||||
write(message,*) 'get_task '//trim(zmq_state), worker_id
|
||||
|
||||
sze = len(trim(message))
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)
|
||||
rc = f77_zmq_send(zmq_to_qp_run_socket, message, sze, 0)
|
||||
if (rc /= sze) then
|
||||
print *, irp_here, ':f77_zmq_send(zmq_to_qp_run_socket, trim(message), sze, 0)'
|
||||
stop 'error'
|
||||
endif
|
||||
|
||||
message = repeat(' ',512)
|
||||
rc = f77_zmq_recv(zmq_to_qp_run_socket, message, 510, 0)
|
||||
message = trim(message(1:rc))
|
||||
read(message,*) reply
|
||||
read(message(1:rc),*) reply
|
||||
if (trim(reply) == 'get_task_reply') then
|
||||
read(message,*) reply, task_id
|
||||
read(message(1:rc),*) reply, task_id
|
||||
rc = 15
|
||||
do while (message(rc:rc) == ' ')
|
||||
rc += 1
|
||||
@ -938,3 +1000,4 @@ subroutine wait_for_states(state_wait,state,n)
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user