mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-07 05:53:37 +01:00
Optimizations
This commit is contained in:
parent
0f8ea82d68
commit
08d197ebbb
@ -32,7 +32,7 @@ OPENMP : 1 ; Append OpenMP flags
|
|||||||
#
|
#
|
||||||
[OPT]
|
[OPT]
|
||||||
FC : -traceback
|
FC : -traceback
|
||||||
FCFLAGS : -march=corei7-avx -O2 -ip -ftz -g
|
FCFLAGS : -xAVX -O2 -ip -ftz -g
|
||||||
|
|
||||||
# Profiling flags
|
# Profiling flags
|
||||||
#################
|
#################
|
||||||
|
@ -306,10 +306,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
i = psi_bilinear_matrix_rows(l_a)
|
i = psi_bilinear_matrix_rows(l_a)
|
||||||
if (nt + exc_degree(i) <= 4) then
|
if (nt + exc_degree(i) <= 4) then
|
||||||
idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a))
|
idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a))
|
||||||
if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle
|
if (psi_average_norm_contrib_sorted(idx) > 1.d-12) then
|
||||||
indices(k) = idx
|
indices(k) = idx
|
||||||
k=k+1
|
k=k+1
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
@ -329,10 +330,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
idx = psi_det_sorted_order( &
|
idx = psi_det_sorted_order( &
|
||||||
psi_bilinear_matrix_order( &
|
psi_bilinear_matrix_order( &
|
||||||
psi_bilinear_matrix_transp_order(l_a)))
|
psi_bilinear_matrix_transp_order(l_a)))
|
||||||
if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle
|
if (psi_average_norm_contrib_sorted(idx) > 1.d-12) then
|
||||||
indices(k) = idx
|
indices(k) = idx
|
||||||
k=k+1
|
k=k+1
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
@ -440,19 +442,20 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
fullinteresting(0) = 0
|
fullinteresting(0) = 0
|
||||||
|
|
||||||
do ii=1,preinteresting(0)
|
do ii=1,preinteresting(0)
|
||||||
|
i = preinteresting(ii)
|
||||||
select case (N_int)
|
select case (N_int)
|
||||||
case (1)
|
case (1)
|
||||||
mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,preinteresting(ii)))
|
mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
|
||||||
mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,preinteresting(ii)))
|
mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
|
||||||
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
|
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
|
||||||
case (2)
|
case (2)
|
||||||
mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted(1:2,1,preinteresting(ii)))
|
mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted(1:2,1,i))
|
||||||
mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted(1:2,2,preinteresting(ii)))
|
mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted(1:2,2,i))
|
||||||
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + &
|
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + &
|
||||||
popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2))
|
popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2))
|
||||||
case (3)
|
case (3)
|
||||||
mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted(1:3,1,preinteresting(ii)))
|
mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted(1:3,1,i))
|
||||||
mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted(1:3,2,preinteresting(ii)))
|
mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted(1:3,2,i))
|
||||||
nt = 0
|
nt = 0
|
||||||
do j=3,1,-1
|
do j=3,1,-1
|
||||||
if (mobMask(j,1) /= 0_bit_kind) then
|
if (mobMask(j,1) /= 0_bit_kind) then
|
||||||
@ -465,8 +468,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
endif
|
endif
|
||||||
end do
|
end do
|
||||||
case (4)
|
case (4)
|
||||||
mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted(1:4,1,preinteresting(ii)))
|
mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted(1:4,1,i))
|
||||||
mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted(1:4,2,preinteresting(ii)))
|
mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted(1:4,2,i))
|
||||||
nt = 0
|
nt = 0
|
||||||
do j=4,1,-1
|
do j=4,1,-1
|
||||||
if (mobMask(j,1) /= 0_bit_kind) then
|
if (mobMask(j,1) /= 0_bit_kind) then
|
||||||
@ -479,8 +482,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
endif
|
endif
|
||||||
end do
|
end do
|
||||||
case default
|
case default
|
||||||
mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted(1:N_int,1,preinteresting(ii)))
|
mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted(1:N_int,1,i))
|
||||||
mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted(1:N_int,2,preinteresting(ii)))
|
mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted(1:N_int,2,i))
|
||||||
nt = 0
|
nt = 0
|
||||||
do j=N_int,1,-1
|
do j=N_int,1,-1
|
||||||
if (mobMask(j,1) /= 0_bit_kind) then
|
if (mobMask(j,1) /= 0_bit_kind) then
|
||||||
@ -495,7 +498,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
end select
|
end select
|
||||||
|
|
||||||
if(nt <= 4) then
|
if(nt <= 4) then
|
||||||
i = preinteresting(ii)
|
|
||||||
sze = interesting(0)
|
sze = interesting(0)
|
||||||
if (sze+1 == size(interesting)) then
|
if (sze+1 == size(interesting)) then
|
||||||
allocate (tmp_array(0:sze))
|
allocate (tmp_array(0:sze))
|
||||||
@ -563,6 +565,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
|
|||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
endif
|
endif
|
||||||
|
|
||||||
do i=1,fullinteresting(0)
|
do i=1,fullinteresting(0)
|
||||||
fullminilist(1:N_int,1:2,i) = psi_det_sorted(1:N_int,1:2,fullinteresting(i))
|
fullminilist(1:N_int,1:2,i) = psi_det_sorted(1:N_int,1:2,fullinteresting(i))
|
||||||
enddo
|
enddo
|
||||||
@ -707,8 +710,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
|
|||||||
if(bannedOrb(p2, s2)) cycle
|
if(bannedOrb(p2, s2)) cycle
|
||||||
if(banned(p1,p2)) cycle
|
if(banned(p1,p2)) cycle
|
||||||
|
|
||||||
|
val = maxval(abs(mat(1:N_states, p1, p2)))
|
||||||
if( sum(abs(mat(1:N_states, p1, p2))) == 0d0) cycle
|
if( val == 0d0) cycle
|
||||||
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
|
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
|
||||||
|
|
||||||
if (do_only_cas) then
|
if (do_only_cas) then
|
||||||
@ -958,10 +961,12 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
|
|
||||||
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
|
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
|
||||||
if(ma == 1) then
|
if(ma == 1) then
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, putj, puti) = mat(k, putj, puti) +coefs(k) * hij
|
mat(k, putj, puti) = mat(k, putj, puti) +coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
else
|
else
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
@ -981,6 +986,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
p1 = p(turn2(i), 1)
|
p1 = p(turn2(i), 1)
|
||||||
|
|
||||||
hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
|
hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
@ -1005,6 +1011,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
p1 = p(i1, ma)
|
p1 = p(i1, ma)
|
||||||
p2 = p(i2, ma)
|
p2 = p(i2, ma)
|
||||||
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
|
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
@ -1023,9 +1030,17 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
p2 = p(i, ma)
|
p2 = p(i, ma)
|
||||||
|
|
||||||
hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
|
hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
|
||||||
|
if (puti < putj) then
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, min(puti, putj), max(puti, putj)) = mat(k, min(puti, putj), max(puti, putj)) + coefs(k) * hij
|
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
|
else
|
||||||
|
!DIR$ NOVECTOR
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
end do
|
end do
|
||||||
else ! tip == 4
|
else ! tip == 4
|
||||||
puti = p(1, sp)
|
puti = p(1, sp)
|
||||||
@ -1036,6 +1051,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
h1 = h(1, mi)
|
h1 = h(1, mi)
|
||||||
h2 = h(2, mi)
|
h2 = h(2, mi)
|
||||||
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
|
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
@ -1061,7 +1077,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
|
|
||||||
logical, allocatable :: lbanned(:,:)
|
logical, allocatable :: lbanned(:,:)
|
||||||
integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
|
integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
|
||||||
integer :: hfix, pfix, h1, h2, p1, p2, ib, k
|
integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l
|
||||||
|
|
||||||
integer, parameter :: turn2(2) = (/2,1/)
|
integer, parameter :: turn2(2) = (/2,1/)
|
||||||
integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/))
|
integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/))
|
||||||
@ -1121,7 +1137,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
if(ma == 1) then
|
if(ma == 1) then
|
||||||
mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
|
mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
|
||||||
else
|
else
|
||||||
mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num)
|
do l=1,mo_num
|
||||||
|
!DIR$ NOVECTOR
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
end if
|
end if
|
||||||
end if
|
end if
|
||||||
|
|
||||||
@ -1140,13 +1161,16 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
hij = hij_cache(puti,2)
|
hij = hij_cache(puti,2)
|
||||||
if (hij /= 0.d0) then
|
if (hij /= 0.d0) then
|
||||||
hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
|
hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
|
tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
|
||||||
enddo
|
enddo
|
||||||
endif
|
endif
|
||||||
end if
|
end if
|
||||||
|
! enddo
|
||||||
|
!
|
||||||
putj = p2
|
putj = p2
|
||||||
|
! do puti=1,mo_num !HOT
|
||||||
if(.not. banned(putj,puti,bant)) then
|
if(.not. banned(putj,puti,bant)) then
|
||||||
hij = hij_cache(puti,1)
|
hij = hij_cache(puti,1)
|
||||||
if (hij /= 0.d0) then
|
if (hij /= 0.d0) then
|
||||||
@ -1162,8 +1186,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
|
mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
|
||||||
mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
|
mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
|
||||||
else
|
else
|
||||||
mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:)
|
!DIR$ NOVECTOR
|
||||||
mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:)
|
do l=1,mo_num
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
|
||||||
|
mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
end if
|
end if
|
||||||
|
|
||||||
else ! sp /= 3
|
else ! sp /= 3
|
||||||
@ -1197,7 +1226,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
end do
|
end do
|
||||||
|
|
||||||
mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
|
mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
|
||||||
mat(:, puti, puti:) = mat(:, puti,puti:) + tmp_row(:,puti:)
|
do l=puti,mo_num
|
||||||
|
!DIR$ NOVECTOR
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
end do
|
end do
|
||||||
else
|
else
|
||||||
hfix = h(1,mi)
|
hfix = h(1,mi)
|
||||||
@ -1234,9 +1268,19 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
end if
|
end if
|
||||||
end do
|
end do
|
||||||
mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
|
mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
|
||||||
mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:)
|
do l=p2,mo_num
|
||||||
|
!DIR$ NOVECTOR
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
|
mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
|
||||||
mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:)
|
do l=p1,mo_num
|
||||||
|
!DIR$ NOVECTOR
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
end if
|
end if
|
||||||
end if
|
end if
|
||||||
deallocate(lbanned,hij_cache)
|
deallocate(lbanned,hij_cache)
|
||||||
@ -1259,7 +1303,10 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
|
if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
|
||||||
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
|
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
|
||||||
call i_h_j(gen, det, N_int, hij)
|
call i_h_j(gen, det, N_int, hij)
|
||||||
mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
|
!DIR$ NOVECTOR
|
||||||
|
do k=1,N_states
|
||||||
|
mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij
|
||||||
|
enddo
|
||||||
end do
|
end do
|
||||||
end do
|
end do
|
||||||
end
|
end
|
||||||
@ -1307,6 +1354,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
hij = hij_cache1(p2) * phase
|
hij = hij_cache1(p2) * phase
|
||||||
end if
|
end if
|
||||||
if (hij == 0.d0) cycle
|
if (hij == 0.d0) cycle
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT
|
mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT
|
||||||
enddo
|
enddo
|
||||||
@ -1349,6 +1397,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
|
|||||||
hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
|
hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
|
||||||
end if
|
end if
|
||||||
if (hij == 0.d0) cycle
|
if (hij == 0.d0) cycle
|
||||||
|
!DIR$ NOVECTOR
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
|
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
|
||||||
enddo
|
enddo
|
||||||
|
@ -219,7 +219,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
exit
|
exit
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if (itermax > 3) then
|
if (itermax > 4) then
|
||||||
itermax = itermax - 1
|
itermax = itermax - 1
|
||||||
else if (m==1.and.disk_based_davidson) then
|
else if (m==1.and.disk_based_davidson) then
|
||||||
m=0
|
m=0
|
||||||
@ -417,7 +417,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
! Compute s_kl = <u_k | S_l> = <u_k| S2 |u_l>
|
! Compute s_kl = <u_k | S_l> = <u_k| S2 |u_l>
|
||||||
! -------------------------------------------
|
! -------------------------------------------
|
||||||
|
|
||||||
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k)
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2)
|
||||||
do j=1,shift2
|
do j=1,shift2
|
||||||
do i=1,shift2
|
do i=1,shift2
|
||||||
s_(i,j) = 0.d0
|
s_(i,j) = 0.d0
|
||||||
@ -572,6 +572,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
! Compute residual vector and davidson step
|
! Compute residual vector and davidson step
|
||||||
! -----------------------------------------
|
! -----------------------------------------
|
||||||
|
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
|
||||||
do k=1,N_st_diag
|
do k=1,N_st_diag
|
||||||
do i=1,sze
|
do i=1,sze
|
||||||
U(i,shift2+k) = &
|
U(i,shift2+k) = &
|
||||||
@ -586,6 +587,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
|
|||||||
to_print(3,k) = residual_norm(k)
|
to_print(3,k) = residual_norm(k)
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
|
||||||
if ((itertot>1).and.(iter == 1)) then
|
if ((itertot>1).and.(iter == 1)) then
|
||||||
|
@ -188,7 +188,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
|
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
|
||||||
|
|
||||||
double precision :: hij, sij
|
double precision :: hij, sij
|
||||||
integer :: i,j,k,l
|
integer :: i,j,k,l,kk
|
||||||
integer :: k_a, k_b, l_a, l_b, m_a, m_b
|
integer :: k_a, k_b, l_a, l_b, m_a, m_b
|
||||||
integer :: istate
|
integer :: istate
|
||||||
integer :: krow, kcol, krow_b, kcol_b
|
integer :: krow, kcol, krow_b, kcol_b
|
||||||
@ -209,6 +209,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
logical :: compute_singles
|
logical :: compute_singles
|
||||||
integer*8 :: last_found, left, right, right_max
|
integer*8 :: last_found, left, right, right_max
|
||||||
double precision :: rss, mem, ratio
|
double precision :: rss, mem, ratio
|
||||||
|
double precision, allocatable :: utl(:,:)
|
||||||
|
|
||||||
! call resident_memory(rss)
|
! call resident_memory(rss)
|
||||||
! mem = dble(singles_beta_csc_size) / 1024.d0**3
|
! mem = dble(singles_beta_csc_size) / 1024.d0**3
|
||||||
@ -247,7 +248,7 @@ compute_singles=.True.
|
|||||||
!$OMP singles_alpha_csc,singles_alpha_csc_idx, &
|
!$OMP singles_alpha_csc,singles_alpha_csc_idx, &
|
||||||
!$OMP singles_beta_csc,singles_beta_csc_idx) &
|
!$OMP singles_beta_csc,singles_beta_csc_idx) &
|
||||||
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
|
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
|
||||||
!$OMP lcol, lrow, l_a, l_b, &
|
!$OMP lcol, lrow, l_a, l_b, utl, kk, &
|
||||||
!$OMP buffer, doubles, n_doubles, &
|
!$OMP buffer, doubles, n_doubles, &
|
||||||
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
|
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
|
||||||
!$OMP singles_a, n_singles_a, singles_b, ratio, &
|
!$OMP singles_a, n_singles_a, singles_b, ratio, &
|
||||||
@ -260,7 +261,7 @@ compute_singles=.True.
|
|||||||
singles_a(maxab), &
|
singles_a(maxab), &
|
||||||
singles_b(maxab), &
|
singles_b(maxab), &
|
||||||
doubles(maxab), &
|
doubles(maxab), &
|
||||||
idx(maxab))
|
idx(maxab), utl(N_st,32))
|
||||||
|
|
||||||
kcol_prev=-1
|
kcol_prev=-1
|
||||||
|
|
||||||
@ -398,10 +399,21 @@ compute_singles=.True.
|
|||||||
! -----------------------
|
! -----------------------
|
||||||
|
|
||||||
!DIR$ LOOP COUNT avg(1000)
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do k = 1,n_singles_a
|
do k = 1,n_singles_a,32
|
||||||
l_a = singles_a(k)
|
! Prefetch u_t(:,l_a)
|
||||||
|
do kk=0,31
|
||||||
|
if (k+kk > n_singles_a) exit
|
||||||
|
l_a = singles_a(k+kk)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
|
do l=1,N_st
|
||||||
|
utl(l,kk+1) = u_t(l,l_a)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do kk=0,31
|
||||||
|
if (k+kk > n_singles_a) exit
|
||||||
|
l_a = singles_a(k+kk)
|
||||||
lrow = psi_bilinear_matrix_rows(l_a)
|
lrow = psi_bilinear_matrix_rows(l_a)
|
||||||
ASSERT (lrow <= N_det_alpha_unique)
|
ASSERT (lrow <= N_det_alpha_unique)
|
||||||
|
|
||||||
@ -410,8 +422,9 @@ compute_singles=.True.
|
|||||||
call get_s2(tmp_det,tmp_det2,$N_int,sij)
|
call get_s2(tmp_det,tmp_det2,$N_int,sij)
|
||||||
!DIR$ LOOP COUNT AVG(4)
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
|
||||||
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
|
s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1)
|
||||||
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
@ -475,10 +488,21 @@ compute_singles=.True.
|
|||||||
|
|
||||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||||
!DIR$ LOOP COUNT avg(1000)
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do i=1,n_singles_a
|
do i=1,n_singles_a,32
|
||||||
l_a = singles_a(i)
|
! Prefetch u_t(:,l_a)
|
||||||
|
do kk=0,31
|
||||||
|
if (i+kk > n_singles_a) exit
|
||||||
|
l_a = singles_a(i+kk)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
|
do l=1,N_st
|
||||||
|
utl(l,kk+1) = u_t(l,l_a)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do kk=0,31
|
||||||
|
if (i+kk > n_singles_a) exit
|
||||||
|
l_a = singles_a(i+kk)
|
||||||
lrow = psi_bilinear_matrix_rows(l_a)
|
lrow = psi_bilinear_matrix_rows(l_a)
|
||||||
ASSERT (lrow <= N_det_alpha_unique)
|
ASSERT (lrow <= N_det_alpha_unique)
|
||||||
|
|
||||||
@ -487,30 +511,43 @@ compute_singles=.True.
|
|||||||
|
|
||||||
!DIR$ LOOP COUNT AVG(4)
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
|
||||||
! single => sij = 0
|
! single => sij = 0
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
|
||||||
! Compute Hij for all alpha doubles
|
! Compute Hij for all alpha doubles
|
||||||
! ----------------------------------
|
! ----------------------------------
|
||||||
|
|
||||||
!DIR$ LOOP COUNT avg(50000)
|
!DIR$ LOOP COUNT avg(50000)
|
||||||
do i=1,n_doubles
|
do i=1,n_doubles,32
|
||||||
l_a = doubles(i)
|
! Prefetch u_t(:,l_a)
|
||||||
|
do kk=0,31
|
||||||
|
if (i+kk > n_doubles) exit
|
||||||
|
l_a = doubles(i+kk)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
|
do l=1,N_st
|
||||||
|
utl(l,kk+1) = u_t(l,l_a)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do kk=0,31
|
||||||
|
if (i+kk > n_doubles) exit
|
||||||
|
l_a = doubles(i+kk)
|
||||||
lrow = psi_bilinear_matrix_rows(l_a)
|
lrow = psi_bilinear_matrix_rows(l_a)
|
||||||
ASSERT (lrow <= N_det_alpha_unique)
|
ASSERT (lrow <= N_det_alpha_unique)
|
||||||
|
|
||||||
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
||||||
!DIR$ LOOP COUNT AVG(4)
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
|
||||||
! same spin => sij = 0
|
! same spin => sij = 0
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
|
||||||
! Single and double beta excitations
|
! Single and double beta excitations
|
||||||
@ -560,45 +597,70 @@ compute_singles=.True.
|
|||||||
|
|
||||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||||
!DIR$ LOOP COUNT avg(1000)
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do i=1,n_singles_b
|
do i=1,n_singles_b,32
|
||||||
l_b = singles_b(i)
|
do kk=0,31
|
||||||
|
if (i+kk > n_singles_b) exit
|
||||||
|
l_b = singles_b(i+kk)
|
||||||
ASSERT (l_b <= N_det)
|
ASSERT (l_b <= N_det)
|
||||||
|
|
||||||
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
|
do l=1,N_st
|
||||||
|
utl(l,kk+1) = u_t(l,l_a)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do kk=0,31
|
||||||
|
if (i+kk > n_singles_b) exit
|
||||||
|
l_b = singles_b(i+kk)
|
||||||
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
||||||
ASSERT (lcol <= N_det_beta_unique)
|
ASSERT (lcol <= N_det_beta_unique)
|
||||||
|
|
||||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
|
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
|
||||||
call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij)
|
call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij)
|
||||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
|
||||||
ASSERT (l_a <= N_det)
|
|
||||||
!DIR$ LOOP COUNT AVG(4)
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
|
||||||
! single => sij = 0
|
! single => sij = 0
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
! Compute Hij for all beta doubles
|
! Compute Hij for all beta doubles
|
||||||
! ----------------------------------
|
! ----------------------------------
|
||||||
|
|
||||||
!DIR$ LOOP COUNT avg(50000)
|
!DIR$ LOOP COUNT avg(50000)
|
||||||
do i=1,n_doubles
|
do i=1,n_doubles,32
|
||||||
l_b = doubles(i)
|
do kk=0,31
|
||||||
|
if (i+kk > n_doubles) exit
|
||||||
|
l_b = doubles(i+kk)
|
||||||
ASSERT (l_b <= N_det)
|
ASSERT (l_b <= N_det)
|
||||||
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
|
do l=1,N_st
|
||||||
|
utl(l,kk+1) = u_t(l,l_a)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do kk=0,31
|
||||||
|
if (i+kk > n_doubles) exit
|
||||||
|
l_b = doubles(i+kk)
|
||||||
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
lcol = psi_bilinear_matrix_transp_columns(l_b)
|
||||||
ASSERT (lcol <= N_det_beta_unique)
|
ASSERT (lcol <= N_det_beta_unique)
|
||||||
|
|
||||||
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
|
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
|
||||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
|
||||||
ASSERT (l_a <= N_det)
|
|
||||||
|
|
||||||
!DIR$ LOOP COUNT AVG(4)
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
|
||||||
! same spin => sij = 0
|
! same spin => sij = 0
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
|
||||||
! Diagonal contribution
|
! Diagonal contribution
|
||||||
@ -629,7 +691,7 @@ compute_singles=.True.
|
|||||||
|
|
||||||
end do
|
end do
|
||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
deallocate(buffer, singles_a, singles_b, doubles, idx)
|
deallocate(buffer, singles_a, singles_b, doubles, idx, utl)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -108,6 +108,11 @@ subroutine get_single_excitation_from_fock(det_1,det_2,h,p,spin,phase,hij)
|
|||||||
integer :: occ_partcl(N_int*bit_kind_size,2)
|
integer :: occ_partcl(N_int*bit_kind_size,2)
|
||||||
integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
|
integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
|
||||||
integer :: i0,i
|
integer :: i0,i
|
||||||
|
double precision :: buffer_c(mo_num),buffer_x(mo_num)
|
||||||
|
do i=1, mo_num
|
||||||
|
buffer_c(i) = big_array_coulomb_integrals(i,h,p)
|
||||||
|
buffer_x(i) = big_array_exchange_integrals(i,h,p)
|
||||||
|
enddo
|
||||||
do i = 1, N_int
|
do i = 1, N_int
|
||||||
differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask(i,1))
|
differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask(i,1))
|
||||||
differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask(i,2))
|
differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask(i,2))
|
||||||
@ -122,33 +127,33 @@ subroutine get_single_excitation_from_fock(det_1,det_2,h,p,spin,phase,hij)
|
|||||||
! holes :: direct terms
|
! holes :: direct terms
|
||||||
do i0 = 1, n_occ_ab_hole(1)
|
do i0 = 1, n_occ_ab_hole(1)
|
||||||
i = occ_hole(i0,1)
|
i = occ_hole(i0,1)
|
||||||
hij -= big_array_coulomb_integrals(i,h,p)
|
hij -= buffer_c(i)
|
||||||
enddo
|
enddo
|
||||||
do i0 = 1, n_occ_ab_hole(2)
|
do i0 = 1, n_occ_ab_hole(2)
|
||||||
i = occ_hole(i0,2)
|
i = occ_hole(i0,2)
|
||||||
hij -= big_array_coulomb_integrals(i,h,p)
|
hij -= buffer_c(i)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
! holes :: exchange terms
|
! holes :: exchange terms
|
||||||
do i0 = 1, n_occ_ab_hole(spin)
|
do i0 = 1, n_occ_ab_hole(spin)
|
||||||
i = occ_hole(i0,spin)
|
i = occ_hole(i0,spin)
|
||||||
hij += big_array_exchange_integrals(i,h,p)
|
hij += buffer_x(i)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
! particles :: direct terms
|
! particles :: direct terms
|
||||||
do i0 = 1, n_occ_ab_partcl(1)
|
do i0 = 1, n_occ_ab_partcl(1)
|
||||||
i = occ_partcl(i0,1)
|
i = occ_partcl(i0,1)
|
||||||
hij += big_array_coulomb_integrals(i,h,p)
|
hij += buffer_c(i)
|
||||||
enddo
|
enddo
|
||||||
do i0 = 1, n_occ_ab_partcl(2)
|
do i0 = 1, n_occ_ab_partcl(2)
|
||||||
i = occ_partcl(i0,2)
|
i = occ_partcl(i0,2)
|
||||||
hij += big_array_coulomb_integrals(i,h,p)
|
hij += buffer_c(i)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
! particles :: exchange terms
|
! particles :: exchange terms
|
||||||
do i0 = 1, n_occ_ab_partcl(spin)
|
do i0 = 1, n_occ_ab_partcl(spin)
|
||||||
i = occ_partcl(i0,spin)
|
i = occ_partcl(i0,spin)
|
||||||
hij -= big_array_exchange_integrals(i,h,p)
|
hij -= buffer_x(i)
|
||||||
enddo
|
enddo
|
||||||
hij = hij * phase
|
hij = hij * phase
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user