10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-22 20:34:58 +01:00

Merge branch 'mmap' into dev

This commit is contained in:
Anthony Scemama 2019-11-19 18:05:17 +01:00
commit ed25bf565c
10 changed files with 390 additions and 215 deletions

View File

@ -32,7 +32,7 @@ OPENMP : 1 ; Append OpenMP flags
#
[OPT]
FC : -traceback
FCFLAGS : -march=corei7-avx -O2 -ip -ftz -g
FCFLAGS : -xAVX -O2 -ip -ftz -g
# Profiling flags
#################

View File

@ -306,10 +306,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
i = psi_bilinear_matrix_rows(l_a)
if (nt + exc_degree(i) <= 4) then
idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a))
if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle
if (psi_average_norm_contrib_sorted(idx) > 1.d-12) then
indices(k) = idx
k=k+1
endif
endif
enddo
enddo
@ -329,10 +330,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
idx = psi_det_sorted_order( &
psi_bilinear_matrix_order( &
psi_bilinear_matrix_transp_order(l_a)))
if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle
if (psi_average_norm_contrib_sorted(idx) > 1.d-12) then
indices(k) = idx
k=k+1
endif
endif
enddo
enddo
@ -440,19 +442,20 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
fullinteresting(0) = 0
do ii=1,preinteresting(0)
i = preinteresting(ii)
select case (N_int)
case (1)
mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,preinteresting(ii)))
mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,preinteresting(ii)))
mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
case (2)
mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted(1:2,1,preinteresting(ii)))
mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted(1:2,2,preinteresting(ii)))
mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted(1:2,1,i))
mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted(1:2,2,i))
nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + &
popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2))
case (3)
mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted(1:3,1,preinteresting(ii)))
mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted(1:3,2,preinteresting(ii)))
mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted(1:3,1,i))
mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted(1:3,2,i))
nt = 0
do j=3,1,-1
if (mobMask(j,1) /= 0_bit_kind) then
@ -465,8 +468,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
endif
end do
case (4)
mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted(1:4,1,preinteresting(ii)))
mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted(1:4,2,preinteresting(ii)))
mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted(1:4,1,i))
mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted(1:4,2,i))
nt = 0
do j=4,1,-1
if (mobMask(j,1) /= 0_bit_kind) then
@ -479,8 +482,8 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
endif
end do
case default
mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted(1:N_int,1,preinteresting(ii)))
mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted(1:N_int,2,preinteresting(ii)))
mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted(1:N_int,1,i))
mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted(1:N_int,2,i))
nt = 0
do j=N_int,1,-1
if (mobMask(j,1) /= 0_bit_kind) then
@ -495,7 +498,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
end select
if(nt <= 4) then
i = preinteresting(ii)
sze = interesting(0)
if (sze+1 == size(interesting)) then
allocate (tmp_array(0:sze))
@ -563,6 +565,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
enddo
enddo
endif
do i=1,fullinteresting(0)
fullminilist(1:N_int,1:2,i) = psi_det_sorted(1:N_int,1:2,fullinteresting(i))
enddo
@ -707,8 +710,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
if(bannedOrb(p2, s2)) cycle
if(banned(p1,p2)) cycle
if( sum(abs(mat(1:N_states, p1, p2))) == 0d0) cycle
val = maxval(abs(mat(1:N_states, p1, p2)))
if( val == 0d0) cycle
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
if (do_only_cas) then
@ -946,6 +949,9 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(tip == 3) then
puti = p(1, mi)
if(bannedOrb(puti, mi)) return
h1 = h(1, ma)
h2 = h(2, ma)
do i = 1, 3
putj = p(i, ma)
if(banned(putj,puti,bant)) cycle
@ -953,17 +959,21 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
i2 = turn3(2,i)
p1 = p(i1, ma)
p2 = p(i2, ma)
h1 = h(1, ma)
h2 = h(2, ma)
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2)
if (hij == 0.d0) cycle
hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
if(ma == 1) then
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, putj, puti) = mat(k, putj, puti) +coefs(k) * hij
mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
enddo
else
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
enddo
end if
end do
@ -980,10 +990,14 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
p1 = p(turn2(i), 1)
hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
hij = mo_two_e_integral(p1, p2, h1, h2)
if (hij /= 0.d0) then
hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
enddo
endif
end do
end do
end if
@ -1004,7 +1018,11 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
i2 = turn2d(2, i, j)
p1 = p(i1, ma)
p2 = p(i2, ma)
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)
if (hij == 0.d0) cycle
hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
enddo
@ -1022,10 +1040,21 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(banned(puti,putj,1)) cycle
p2 = p(i, ma)
hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
hij = mo_two_e_integral(p1, p2, h1, h2)
if (hij == 0.d0) cycle
hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
if (puti < putj) then
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, min(puti, putj), max(puti, putj)) = mat(k, min(puti, putj), max(puti, putj)) + coefs(k) * hij
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
enddo
else
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
enddo
endif
end do
else ! tip == 4
puti = p(1, sp)
@ -1035,13 +1064,17 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
p2 = p(2, mi)
h1 = h(1, mi)
h2 = h(2, mi)
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2))
if (hij /= 0.d0) then
hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
enddo
end if
end if
end if
end if
end
@ -1061,7 +1094,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
logical, allocatable :: lbanned(:,:)
integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
integer :: hfix, pfix, h1, h2, p1, p2, ib, k
integer :: hfix, pfix, h1, h2, p1, p2, ib, k, l
integer, parameter :: turn2(2) = (/2,1/)
integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/))
@ -1105,7 +1138,10 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
hij = hij_cache(putj,1) - hij_cache(putj,2)
if (hij /= 0.d0) then
hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
enddo
endif
end do
do putj=hfix+1, mo_num
@ -1114,14 +1150,22 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
hij = hij_cache(putj,2) - hij_cache(putj,1)
if (hij /= 0.d0) then
hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
enddo
endif
end do
if(ma == 1) then
mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
else
mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num)
do l=1,mo_num
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
enddo
enddo
end if
end if
@ -1132,7 +1176,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map)
call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map)
putj = p1
do puti=1,mo_num
do puti=1,mo_num !HOT
if(lbanned(puti,mi)) cycle
!p1 fixed
putj = p1
@ -1140,13 +1184,16 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
hij = hij_cache(puti,2)
if (hij /= 0.d0) then
hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
enddo
endif
end if
! enddo
!
putj = p2
! do puti=1,mo_num !HOT
if(.not. banned(putj,puti,bant)) then
hij = hij_cache(puti,1)
if (hij /= 0.d0) then
@ -1162,8 +1209,13 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
else
mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:)
mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:)
do l=1,mo_num
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
enddo
enddo
end if
else ! sp /= 3
@ -1197,7 +1249,12 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
end do
mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
mat(:, puti, puti:) = mat(:, puti,puti:) + tmp_row(:,puti:)
do l=puti,mo_num
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
enddo
enddo
end do
else
hfix = h(1,mi)
@ -1216,6 +1273,7 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
hij = hij_cache(puti,1)
if (hij /= 0.d0) then
hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
enddo
@ -1234,9 +1292,19 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
end if
end do
mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:)
do l=p2,mo_num
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
enddo
enddo
mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:)
do l=p1,mo_num
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
enddo
enddo
end if
end if
deallocate(lbanned,hij_cache)
@ -1259,7 +1327,10 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
call i_h_j(gen, det, N_int, hij)
mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij
enddo
end do
end do
end
@ -1303,34 +1374,15 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
call i_h_j(gen, det, N_int, hij)
else
phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
! hij = mo_two_e_integral(p2, p1, h2, h1) * phase
hij = hij_cache1(p2) * phase
end if
if (hij == 0.d0) cycle
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT
enddo
end do
end do
! do p2=1, mo_num
! if(bannedOrb(p2,2)) cycle
! call get_mo_two_e_integrals(p2,h1,h2,mo_num,hij_cache1,mo_integrals_map)
! do p1=1, mo_num
! if(bannedOrb(p1, 1) .or. banned(p1, p2, bant)) cycle
! if(p1 /= h1 .and. p2 /= h2) then
! if (hij_cache1(p1) == 0.d0) cycle
! phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
! hij = hij_cache1(p1) * phase
! else
! call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
! call i_h_j(gen, det, N_int, hij)
! if (hij == 0.d0) cycle
! end if
! do k=1,N_states
! mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij ! HOTSPOT
! enddo
! end do
! end do
else ! AA BB
p1 = p(1,sp)
@ -1345,31 +1397,16 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
call i_h_j(gen, det, N_int, hij)
else
hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
end if
if (hij == 0.d0) cycle
else
hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))
if (hij == 0.d0) cycle
hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
end if
!DIR$ LOOP COUNT AVG(4)
do k=1,N_states
mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
enddo
! if(bannedOrb(putj, sp) .or. banned(putj, sp, bant)) cycle
! if(puti /= p1 .and. putj /= p2 .and. puti /= p2 .and. putj /= p1) then
! hij = hij_cache1(putj) - hij_cache2(putj)
! if (hij /= 0.d0) then
! hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
! do k=1,N_states
! mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
! enddo
! endif
! else
! call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
! call i_h_j(gen, det, N_int, hij)
! if (hij /= 0.d0) then
! do k=1,N_states
! mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
! enddo
! endif
! end if
end do
end do
end if
@ -1403,8 +1440,8 @@ subroutine past_d2(banned, p, sp)
integer :: i,j
if(sp == 3) then
do i=1,p(0,1)
do j=1,p(0,2)
do i=1,p(0,1)
banned(p(i,1), p(j,2)) = .true.
end do
end do

View File

@ -6,7 +6,7 @@ default: 1.e-10
[n_states_diag]
type: States_number
doc: Number of states to consider during the Davdison diagonalization
doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag
default: 4
interface: ezfio,ocaml

View File

@ -428,7 +428,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
integer :: istep, imin, imax, ishift, ipos
integer, external :: add_task_to_taskserver
integer, parameter :: tasksize=40000
integer, parameter :: tasksize=10000
character*(100000) :: task
istep=1
ishift=0

View File

@ -161,7 +161,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
stop -1
endif
itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1
itertot = 0
if (state_following) then
@ -322,6 +322,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
call normalize(u_in(1,k),sze)
enddo
do k=1,N_st_diag
do i=1,sze
U(i,k) = u_in(i,k)
enddo
enddo
do while (.not.converged)
itertot = itertot+1
@ -329,23 +335,22 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
exit
endif
do k=1,N_st_diag
do i=1,sze
U(i,k) = u_in(i,k)
enddo
enddo
do iter=1,itermax-1
shift = N_st_diag*(iter-1)
shift2 = N_st_diag*iter
call ortho_qr(U,size(U,1),sze,shift2)
call ortho_qr(U,size(U,1),sze,shift2)
if ((iter > 1).or.(itertot == 1)) then
! Compute |W_k> = \sum_i |i><i|H|u_k>
! -----------------------------------------
! -----------------------------------
if (disk_based) then
call ortho_qr_unblocked(U,size(U,1),sze,shift2)
call ortho_qr_unblocked(U,size(U,1),sze,shift2)
else
call ortho_qr(U,size(U,1),sze,shift2)
call ortho_qr(U,size(U,1),sze,shift2)
endif
if ((sze > 100000).and.distributed_davidson) then
call H_S2_u_0_nstates_zmq (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
@ -353,6 +358,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
endif
S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag))
else
! Already computed in update below
continue
endif
if (dressing_state > 0) then
@ -408,7 +417,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
! Compute s_kl = <u_k | S_l> = <u_k| S2 |u_l>
! -------------------------------------------
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2)
do j=1,shift2
do i=1,shift2
s_(i,j) = 0.d0
@ -563,6 +572,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
! Compute residual vector and davidson step
! -----------------------------------------
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
do k=1,N_st_diag
do i=1,sze
U(i,shift2+k) = &
@ -577,9 +587,15 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
to_print(3,k) = residual_norm(k)
endif
enddo
!$OMP END PARALLEL DO
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter, to_print(1:3,1:N_st)
if ((itertot>1).and.(iter == 1)) then
!don't print
continue
else
write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st)
endif
call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
do k=1,N_st
if (residual_norm(k) > 1.e8) then
@ -600,11 +616,56 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
enddo
! Re-contract to u_in
! -----------
! Re-contract U and update S and W
! --------------------------------
call sgemm('N','N', sze, N_st_diag, shift2, 1., &
S, size(S,1), y_s, size(y_s,1), 0., S(1,shift2+1), size(S,1))
do k=1,N_st_diag
do i=1,sze
S(i,k) = S(i,shift2+k)
enddo
enddo
call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, &
W, size(W,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
do k=1,N_st_diag
do i=1,sze
W(i,k) = u_in(i,k)
enddo
enddo
call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, &
U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
do k=1,N_st_diag
do i=1,sze
U(i,k) = u_in(i,k)
enddo
enddo
if (disk_based) then
call ortho_qr_unblocked(U,size(U,1),sze,N_st_diag)
call ortho_qr_unblocked(U,size(U,1),sze,N_st_diag)
else
call ortho_qr(U,size(U,1),sze,N_st_diag)
call ortho_qr(U,size(U,1),sze,N_st_diag)
endif
do j=1,N_st_diag
k=1
do while ((k<sze).and.(U(k,j) == 0.d0))
k = k+1
enddo
if (U(k,j) * u_in(k,j) < 0.d0) then
do i=1,sze
W(i,j) = -W(i,j)
S(i,j) = -S(i,j)
enddo
endif
enddo
do j=1,N_st_diag
do i=1,sze
S_d(i,j) = dble(S(i,j))
enddo
enddo
enddo
@ -626,7 +687,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w )
fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
close(fd_w,status='delete')
call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_s, ptr_s )
call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 4, fd_s, ptr_s )
fd_s = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_s','r')
close(fd_s,status='delete')
else

View File

@ -15,7 +15,7 @@ BEGIN_PROVIDER [ integer, n_states_diag ]
print *, 'davidson/n_states_diag not found in EZFIO file'
stop 1
endif
n_states_diag = max(N_states, N_states_diag)
n_states_diag = max(2,N_states * N_states_diag)
endif
IRP_IF MPI_DEBUG
print *, irp_here, mpi_rank

View File

@ -188,7 +188,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze)
double precision :: hij, sij
integer :: i,j,k,l
integer :: i,j,k,l,kk
integer :: k_a, k_b, l_a, l_b, m_a, m_b
integer :: istate
integer :: krow, kcol, krow_b, kcol_b
@ -209,6 +209,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
logical :: compute_singles
integer*8 :: last_found, left, right, right_max
double precision :: rss, mem, ratio
double precision, allocatable :: utl(:,:)
integer, parameter :: block_size=128
! call resident_memory(rss)
! mem = dble(singles_beta_csc_size) / 1024.d0**3
@ -247,7 +249,7 @@ compute_singles=.True.
!$OMP singles_alpha_csc,singles_alpha_csc_idx, &
!$OMP singles_beta_csc,singles_beta_csc_idx) &
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
!$OMP lcol, lrow, l_a, l_b, &
!$OMP lcol, lrow, l_a, l_b, utl, kk, &
!$OMP buffer, doubles, n_doubles, &
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
!$OMP singles_a, n_singles_a, singles_b, ratio, &
@ -260,7 +262,7 @@ compute_singles=.True.
singles_a(maxab), &
singles_b(maxab), &
doubles(maxab), &
idx(maxab))
idx(maxab), utl(N_st,block_size))
kcol_prev=-1
@ -398,10 +400,21 @@ compute_singles=.True.
! -----------------------
!DIR$ LOOP COUNT avg(1000)
do k = 1,n_singles_a
l_a = singles_a(k)
do k = 1,n_singles_a,block_size
! Prefetch u_t(:,l_a)
do kk=0,block_size-1
if (k+kk > n_singles_a) exit
l_a = singles_a(k+kk)
ASSERT (l_a <= N_det)
do l=1,N_st
utl(l,kk+1) = u_t(l,l_a)
enddo
enddo
do kk=0,block_size-1
if (k+kk > n_singles_a) exit
l_a = singles_a(k+kk)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
@ -410,8 +423,9 @@ compute_singles=.True.
call get_s2(tmp_det,tmp_det2,$N_int,sij)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1)
enddo
enddo
enddo
@ -475,10 +489,21 @@ compute_singles=.True.
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
!DIR$ LOOP COUNT avg(1000)
do i=1,n_singles_a
l_a = singles_a(i)
do i=1,n_singles_a,block_size
! Prefetch u_t(:,l_a)
do kk=0,block_size-1
if (i+kk > n_singles_a) exit
l_a = singles_a(i+kk)
ASSERT (l_a <= N_det)
do l=1,N_st
utl(l,kk+1) = u_t(l,l_a)
enddo
enddo
do kk=0,block_size-1
if (i+kk > n_singles_a) exit
l_a = singles_a(i+kk)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
@ -487,30 +512,43 @@ compute_singles=.True.
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
! single => sij = 0
enddo
enddo
enddo
! Compute Hij for all alpha doubles
! ----------------------------------
!DIR$ LOOP COUNT avg(50000)
do i=1,n_doubles
l_a = doubles(i)
do i=1,n_doubles,block_size
! Prefetch u_t(:,l_a)
do kk=0,block_size-1
if (i+kk > n_doubles) exit
l_a = doubles(i+kk)
ASSERT (l_a <= N_det)
do l=1,N_st
utl(l,kk+1) = u_t(l,l_a)
enddo
enddo
do kk=0,block_size-1
if (i+kk > n_doubles) exit
l_a = doubles(i+kk)
lrow = psi_bilinear_matrix_rows(l_a)
ASSERT (lrow <= N_det_alpha_unique)
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
! same spin => sij = 0
enddo
enddo
enddo
! Single and double beta excitations
@ -560,45 +598,70 @@ compute_singles=.True.
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
!DIR$ LOOP COUNT avg(1000)
do i=1,n_singles_b
l_b = singles_b(i)
do i=1,n_singles_b,block_size
do kk=0,block_size-1
if (i+kk > n_singles_b) exit
l_b = singles_b(i+kk)
ASSERT (l_b <= N_det)
l_a = psi_bilinear_matrix_transp_order(l_b)
ASSERT (l_a <= N_det)
do l=1,N_st
utl(l,kk+1) = u_t(l,l_a)
enddo
enddo
do kk=0,block_size-1
if (i+kk > n_singles_b) exit
l_b = singles_b(i+kk)
l_a = psi_bilinear_matrix_transp_order(l_b)
lcol = psi_bilinear_matrix_transp_columns(l_b)
ASSERT (lcol <= N_det_beta_unique)
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij)
l_a = psi_bilinear_matrix_transp_order(l_b)
ASSERT (l_a <= N_det)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
! single => sij = 0
enddo
enddo
enddo
! Compute Hij for all beta doubles
! ----------------------------------
!DIR$ LOOP COUNT avg(50000)
do i=1,n_doubles
l_b = doubles(i)
do i=1,n_doubles,block_size
do kk=0,block_size-1
if (i+kk > n_doubles) exit
l_b = doubles(i+kk)
ASSERT (l_b <= N_det)
l_a = psi_bilinear_matrix_transp_order(l_b)
ASSERT (l_a <= N_det)
do l=1,N_st
utl(l,kk+1) = u_t(l,l_a)
enddo
enddo
do kk=0,block_size-1
if (i+kk > n_doubles) exit
l_b = doubles(i+kk)
l_a = psi_bilinear_matrix_transp_order(l_b)
lcol = psi_bilinear_matrix_transp_columns(l_b)
ASSERT (lcol <= N_det_beta_unique)
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
l_a = psi_bilinear_matrix_transp_order(l_b)
ASSERT (l_a <= N_det)
!DIR$ LOOP COUNT AVG(4)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
! same spin => sij = 0
enddo
enddo
enddo
! Diagonal contribution
@ -629,7 +692,7 @@ compute_singles=.True.
end do
!$OMP END DO
deallocate(buffer, singles_a, singles_b, doubles, idx)
deallocate(buffer, singles_a, singles_b, doubles, idx, utl)
!$OMP END PARALLEL
end

View File

@ -108,6 +108,11 @@ subroutine get_single_excitation_from_fock(det_1,det_2,h,p,spin,phase,hij)
integer :: occ_partcl(N_int*bit_kind_size,2)
integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
integer :: i0,i
double precision :: buffer_c(mo_num),buffer_x(mo_num)
do i=1, mo_num
buffer_c(i) = big_array_coulomb_integrals(i,h,p)
buffer_x(i) = big_array_exchange_integrals(i,h,p)
enddo
do i = 1, N_int
differences(i,1) = xor(det_1(i,1),ref_closed_shell_bitmask(i,1))
differences(i,2) = xor(det_1(i,2),ref_closed_shell_bitmask(i,2))
@ -122,33 +127,33 @@ subroutine get_single_excitation_from_fock(det_1,det_2,h,p,spin,phase,hij)
! holes :: direct terms
do i0 = 1, n_occ_ab_hole(1)
i = occ_hole(i0,1)
hij -= big_array_coulomb_integrals(i,h,p)
hij -= buffer_c(i)
enddo
do i0 = 1, n_occ_ab_hole(2)
i = occ_hole(i0,2)
hij -= big_array_coulomb_integrals(i,h,p)
hij -= buffer_c(i)
enddo
! holes :: exchange terms
do i0 = 1, n_occ_ab_hole(spin)
i = occ_hole(i0,spin)
hij += big_array_exchange_integrals(i,h,p)
hij += buffer_x(i)
enddo
! particles :: direct terms
do i0 = 1, n_occ_ab_partcl(1)
i = occ_partcl(i0,1)
hij += big_array_coulomb_integrals(i,h,p)
hij += buffer_c(i)
enddo
do i0 = 1, n_occ_ab_partcl(2)
i = occ_partcl(i0,2)
hij += big_array_coulomb_integrals(i,h,p)
hij += buffer_c(i)
enddo
! particles :: exchange terms
do i0 = 1, n_occ_ab_partcl(spin)
i = occ_partcl(i0,spin)
hij -= big_array_exchange_integrals(i,h,p)
hij -= buffer_x(i)
enddo
hij = hij * phase

View File

@ -138,18 +138,27 @@ subroutine ortho_qr(A,LDA,m,n)
double precision, intent(inout) :: A(LDA,n)
integer :: lwork, info
integer, allocatable :: jpvt(:)
double precision, allocatable :: tau(:), work(:)
double precision, allocatable :: TAU(:), WORK(:)
allocate (TAU(n), WORK(1))
allocate (jpvt(n), tau(n), work(1))
LWORK=-1
call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
LWORK=2*int(WORK(1))
LWORK=int(WORK(1))
deallocate(WORK)
allocate(WORK(LWORK))
call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
deallocate(WORK,jpvt,tau)
LWORK=-1
call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
LWORK=int(WORK(1))
deallocate(WORK)
allocate(WORK(LWORK))
call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
deallocate(WORK,TAU)
end
subroutine ortho_qr_unblocked(A,LDA,m,n)
@ -170,13 +179,12 @@ subroutine ortho_qr_unblocked(A,LDA,m,n)
double precision, intent(inout) :: A(LDA,n)
integer :: info
integer, allocatable :: jpvt(:)
double precision, allocatable :: tau(:), work(:)
double precision, allocatable :: TAU(:), WORK(:)
allocate (jpvt(n), tau(n), work(n))
allocate (TAU(n), WORK(n))
call dgeqr2( m, n, A, LDA, TAU, WORK, INFO )
call dorg2r(m, n, n, A, LDA, tau, WORK, INFO)
deallocate(WORK,jpvt,tau)
call dorg2r(m, n, n, A, LDA, TAU, WORK, INFO)
deallocate(WORK,TAU)
end
subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m)

View File

@ -99,6 +99,7 @@ subroutine check_mem(rss_in,routine)
rss += rss_in
if (int(rss)+1 > qp_max_mem) then
print *, 'Not enough memory: aborting in ', routine
print *, int(rss)+1, ' GB required'
stop -1
endif
!$OMP END CRITICAL