10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-06-29 08:24:51 +02:00

OpenMP davidson

This commit is contained in:
Anthony Scemama 2017-04-14 18:11:02 +02:00
parent 923eec3c25
commit 7a09448f62
4 changed files with 84 additions and 65 deletions

View File

@ -56,7 +56,10 @@ subroutine sort_selection_buffer(b)
iorder(i) = i iorder(i) = i
end do end do
! Optimal for almost sorted data ! Optimal for almost sorted data
call insertion_dsort(absval, iorder, b%cur) ! call sorted_dnumber(absval, b%cur, i)
! if (b%cur/i >
! call insertion_dsort(absval, iorder, b%cur)
call dsort(absval, iorder, b%cur)
do i=1, nmwen do i=1, nmwen
detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))

View File

@ -669,9 +669,8 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t, u_t !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t, u_t
maxab = max(N_det_alpha_unique, N_det_beta_unique) maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
allocate(idx0(maxab), & allocate(idx0(maxab), u_t(N_st,N_det) )
u_t(N_st,N_det), v_t(N_st,N_det), s_t(N_st,N_det) )
do i=1,maxab do i=1,maxab
idx0(i) = i idx0(i) = i
@ -684,12 +683,12 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
size(u_t, 1), & size(u_t, 1), &
N_det, N_st) N_det, N_st)
v_0 = 0.d0
s_0 = 0.d0
! Prepare the array of all alpha single excitations ! Prepare the array of all alpha single excitations
! ------------------------------------------------- ! -------------------------------------------------
v_t = 0.d0
s_t = 0.d0
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(psi_bilinear_matrix_rows, N_det, & !$OMP SHARED(psi_bilinear_matrix_rows, N_det, &
!$OMP psi_bilinear_matrix_columns, & !$OMP psi_bilinear_matrix_columns, &
@ -700,24 +699,30 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
!$OMP psi_bilinear_matrix_transp_order, N_st, & !$OMP psi_bilinear_matrix_transp_order, N_st, &
!$OMP psi_bilinear_matrix_order_transp_reverse, & !$OMP psi_bilinear_matrix_order_transp_reverse, &
!$OMP singles_alpha, psi_bilinear_matrix_columns_loc, & !$OMP singles_alpha, psi_bilinear_matrix_columns_loc, &
!$OMP idx0, u_t, v_t, s_t, maxab) & !$OMP singles_alpha_size, sze_8, &
!$OMP idx0, u_t, maxab, v_0, s_0) &
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
!$OMP lcol, lrow, is_single_a,l_a, l_b, & !$OMP lcol, lrow, is_single_a,l_a, l_b, &
!$OMP buffer, singles, doubles, n_singles, n_doubles, & !$OMP buffer, singles, doubles, n_singles, n_doubles, &
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev) !$OMP tmp_det2, hij, sij, idx, l, kcol_prev, v_t, s_t)
! Alpha/Beta double excitations ! Alpha/Beta double excitations
! ============================= ! =============================
allocate( buffer(N_int,maxab), & allocate( buffer(N_int,maxab), &
singles(maxab), doubles(maxab), & singles(singles_alpha_size), &
idx(maxab), & doubles(maxab), &
! v_t(N_st,N_det), s_t(N_st,N_det), & idx(maxab), &
v_t(N_st,N_det), s_t(N_st,N_det), &
is_single_a(N_det_alpha_unique)) is_single_a(N_det_alpha_unique))
is_single_a = .False. is_single_a = .False.
kcol_prev=-1 kcol_prev=-1
krow=1 krow=1
v_t = 0.d0
s_t = 0.d0
!$OMP DO SCHEDULE(static,1) !$OMP DO SCHEDULE(static,1)
do k_a=1,N_det do k_a=1,N_det
@ -764,29 +769,58 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
else else
doubles(n_doubles) = lrow doubles(n_doubles) = lrow
idx(n_doubles) = l_a idx(n_doubles) = l_a
if (n_doubles == maxab) then
do k=1,n_doubles
lrow = doubles(k)
l_a = idx(k)
tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow)
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij)
call get_s2(tmp_det,tmp_det2,N_int,sij)
do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
enddo
enddo
n_doubles=0
endif
n_doubles = n_doubles+1 n_doubles = n_doubles+1
endif endif
l_a = l_a+1 l_a = l_a+1
enddo enddo
n_doubles = n_doubles-1 n_doubles = n_doubles-1
do k=1,n_doubles if (n_doubles > 0) then
lrow = doubles(k) do k=1,n_doubles
l_a = idx(k) lrow = doubles(k)
tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) l_a = idx(k)
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij) tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow)
call get_s2(tmp_det,tmp_det2,N_int,sij) call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij)
do l=1,N_st call get_s2(tmp_det,tmp_det2,N_int,sij)
!$OMP ATOMIC do l=1,N_st
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
!$OMP ATOMIC s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
!$OMP ATOMIC s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) enddo
!$OMP ATOMIC enddo
s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) endif
enddo ! do k=1,n_doubles
enddo ! lrow = doubles(k)
! l_a = idx(k)
! tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow)
! call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij)
! call get_s2(tmp_det,tmp_det2,N_int,sij)
! do l=1,N_st
! v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
! v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
! s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a)
! enddo
! enddo
enddo enddo
@ -798,9 +832,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
hij = diag_H_mat_elem(tmp_det,N_int) hij = diag_H_mat_elem(tmp_det,N_int)
sij = diag_S_mat_elem(tmp_det,N_int) sij = diag_S_mat_elem(tmp_det,N_int)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
!$OMP ATOMIC
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
enddo enddo
@ -862,9 +894,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow)
call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 1, hij) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 1, hij)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
!$OMP ATOMIC
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0 ! single => sij = 0
enddo enddo
@ -882,9 +912,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
enddo enddo
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, doubles(i)), N_int, hij) call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, doubles(i)), N_int, hij)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
!$OMP ATOMIC
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0 ! same spin => sij = 0
enddo enddo
@ -932,9 +960,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
l_a = psi_bilinear_matrix_transp_order(l_b) l_a = psi_bilinear_matrix_transp_order(l_b)
call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
!$OMP ATOMIC
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! single => sij = 0 ! single => sij = 0
enddo enddo
@ -953,33 +979,26 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8)
l_a = psi_bilinear_matrix_transp_order(l_b) l_a = psi_bilinear_matrix_transp_order(l_b)
call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, doubles(i)), N_int, hij) call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, doubles(i)), N_int, hij)
do l=1,N_st do l=1,N_st
!$OMP ATOMIC
v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a)
!$OMP ATOMIC
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
! same spin => sij = 0 ! same spin => sij = 0
enddo enddo
enddo enddo
end do end do
!$OMP END DO !$OMP END DO NOWAIT
!$OMP CRITICAL
do l=1,N_st
do i=1, N_det
v_0(i,l) = v_0(i,l) + v_t(l,i)
s_0(i,l) = s_0(i,l) + s_t(l,i)
enddo
enddo
!$OMP END CRITICAL
!$OMP END PARALLEL !$OMP END PARALLEL
call dtranspose( &
v_t, &
size(v_t, 1), &
v_0, &
size(v_0, 1), &
N_st, N_det)
call dtranspose( &
s_t, &
size(s_t, 1), &
s_0, &
size(s_0, 1), &
N_st, N_det)
end end

View File

@ -771,7 +771,7 @@ subroutine get_all_spin_singles(buffer, idx, spindet, Nint, size_buffer, singles
endif endif
enddo enddo
n_singles = n_singles-1 n_singles = n_singles-1
deallocate(xorvec) deallocate(xorvec, degree)
end end

View File

@ -164,7 +164,7 @@ BEGIN_TEMPLATE
! Returns the number of sorted elements ! Returns the number of sorted elements
END_DOC END_DOC
integer, intent(in) :: isize integer, intent(in) :: isize
$type, intent(inout) :: x(isize) $type, intent(in) :: x(isize)
integer, intent(out) :: n integer, intent(out) :: n
integer :: i integer :: i
if (isize < 2) then if (isize < 2) then
@ -172,14 +172,14 @@ BEGIN_TEMPLATE
return return
endif endif
if (x(1) > x(2)) then if (x(1) >= x(2)) then
n=1 n=1
else else
n=0 n=0
endif endif
do i=2,isize do i=2,isize
if (x(i-1) > x(i)) then if (x(i-1) >= x(i)) then
n=n+1 n=n+1
endif endif
enddo enddo
@ -197,15 +197,12 @@ BEGIN_TEMPLATE
$type,intent(inout) :: x(isize) $type,intent(inout) :: x(isize)
integer,intent(inout) :: iorder(isize) integer,intent(inout) :: iorder(isize)
integer :: n integer :: n
if (isize < 32) then call sorted_$Xnumber(x,isize,n)
print *, isize, n, isize-n
if ( isize-n < 1000) then
call insertion_$Xsort(x,iorder,isize) call insertion_$Xsort(x,iorder,isize)
else else
! call sorted_$Xnumber(x,isize,n) call heap_$Xsort(x,iorder,isize)
! if ( (16*n) / isize > 0) then
! call insertion_$Xsort(x,iorder,isize)
! else
call heap_$Xsort(x,iorder,isize)
! endif
endif endif
end subroutine $Xsort end subroutine $Xsort