mirror of
https://github.com/LCPQ/quantum_package
synced 2024-10-20 06:48:20 +02:00
Optimmization
This commit is contained in:
parent
69faa958f6
commit
710198f32f
@ -138,22 +138,18 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
integer :: maxab, n_singles_a, n_singles_b, kcol_prev
|
integer :: maxab, n_singles_a, n_singles_b, kcol_prev
|
||||||
integer*8 :: k8
|
integer*8 :: k8
|
||||||
logical :: compute_singles
|
logical :: compute_singles
|
||||||
integer*8 :: last_found, left, right
|
integer*8 :: last_found, left, right, right_max
|
||||||
|
double precision :: rss, mem, ratio
|
||||||
double precision :: rss, mem
|
|
||||||
call resident_memory(rss)
|
|
||||||
mem = dble(singles_beta_csc_size) / 1024.d0**3
|
|
||||||
|
|
||||||
compute_singles = (mem+rss > qp_max_mem)
|
|
||||||
! compute_singles = (iend-istart < 100000).and.(mem+rss < qp_max_mem)
|
|
||||||
|
|
||||||
! compute_singles = .True.
|
|
||||||
|
|
||||||
if (.not.compute_singles) then
|
|
||||||
! provide singles_alpha_csc
|
|
||||||
provide singles_beta_csc
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
! call resident_memory(rss)
|
||||||
|
! mem = dble(singles_beta_csc_size) / 1024.d0**3
|
||||||
|
!
|
||||||
|
! compute_singles = (mem+rss > qp_max_mem)
|
||||||
|
!
|
||||||
|
! if (.not.compute_singles) then
|
||||||
|
! provide singles_beta_csc
|
||||||
|
! endif
|
||||||
|
compute_singles=.True.
|
||||||
|
|
||||||
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
|
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
|
||||||
allocate(idx0(maxab))
|
allocate(idx0(maxab))
|
||||||
@ -185,8 +181,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
!$OMP lcol, lrow, l_a, l_b, &
|
!$OMP lcol, lrow, l_a, l_b, &
|
||||||
!$OMP buffer, doubles, n_doubles, &
|
!$OMP buffer, doubles, n_doubles, &
|
||||||
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
|
!$OMP tmp_det2, hij, sij, idx, l, kcol_prev, &
|
||||||
!$OMP singles_a, n_singles_a, singles_b, &
|
!$OMP singles_a, n_singles_a, singles_b, ratio, &
|
||||||
!$OMP n_singles_b, k8, last_found,left,right)
|
!$OMP n_singles_b, k8, last_found,left,right,right_max)
|
||||||
|
|
||||||
! Alpha/Beta double excitations
|
! Alpha/Beta double excitations
|
||||||
! =============================
|
! =============================
|
||||||
@ -223,6 +219,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
singles_b, n_singles_b)
|
singles_b, n_singles_b)
|
||||||
else
|
else
|
||||||
n_singles_b = 0
|
n_singles_b = 0
|
||||||
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1
|
do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1
|
||||||
n_singles_b = n_singles_b+1
|
n_singles_b = n_singles_b+1
|
||||||
singles_b(n_singles_b) = singles_beta_csc(k8)
|
singles_b(n_singles_b) = singles_beta_csc(k8)
|
||||||
@ -234,15 +231,20 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! Loop over singly excited beta columns
|
! Loop over singly excited beta columns
|
||||||
! -------------------------------------
|
! -------------------------------------
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do i=1,n_singles_b
|
do i=1,n_singles_b
|
||||||
lcol = singles_b(i)
|
lcol = singles_b(i)
|
||||||
|
|
||||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
|
tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
|
||||||
|
|
||||||
|
!---
|
||||||
|
! if (compute_singles) then
|
||||||
|
|
||||||
l_a = psi_bilinear_matrix_columns_loc(lcol)
|
l_a = psi_bilinear_matrix_columns_loc(lcol)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
! if (compute_singles) then
|
!DIR$ UNROLL(8)
|
||||||
|
!DIR$ LOOP COUNT avg(50000)
|
||||||
do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
|
do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
|
||||||
lrow = psi_bilinear_matrix_rows(l_a)
|
lrow = psi_bilinear_matrix_rows(l_a)
|
||||||
ASSERT (lrow <= N_det_alpha_unique)
|
ASSERT (lrow <= N_det_alpha_unique)
|
||||||
@ -258,16 +260,51 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
call get_all_spin_singles_$N_int( &
|
call get_all_spin_singles_$N_int( &
|
||||||
buffer, idx, tmp_det(1,1), j, &
|
buffer, idx, tmp_det(1,1), j, &
|
||||||
singles_a, n_singles_a )
|
singles_a, n_singles_a )
|
||||||
|
|
||||||
|
!-----
|
||||||
! else
|
! else
|
||||||
!
|
!
|
||||||
|
! ! Search for singles
|
||||||
|
!
|
||||||
|
!call cpu_time(time0)
|
||||||
|
! ! Right boundary
|
||||||
|
! l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1
|
||||||
|
! ASSERT (l_a <= N_det)
|
||||||
|
! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
|
||||||
|
! lrow = psi_bilinear_matrix_rows(l_a)
|
||||||
|
! ASSERT (lrow <= N_det_alpha_unique)
|
||||||
|
!
|
||||||
|
! left = singles_alpha_csc_idx(krow)
|
||||||
|
! right_max = -1_8
|
||||||
|
! right = singles_alpha_csc_idx(krow+1)
|
||||||
|
! do while (right-left>0_8)
|
||||||
|
! k8 = shiftr(right+left,1)
|
||||||
|
! if (singles_alpha_csc(k8) > lrow) then
|
||||||
|
! right = k8
|
||||||
|
! else if (singles_alpha_csc(k8) < lrow) then
|
||||||
|
! left = k8 + 1_8
|
||||||
|
! else
|
||||||
|
! right_max = k8+1_8
|
||||||
|
! exit
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
! if (right_max > 0_8) exit
|
||||||
|
! l_a = l_a-1
|
||||||
|
! enddo
|
||||||
|
! if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow)
|
||||||
|
!
|
||||||
|
! ! Search
|
||||||
! n_singles_a = 0
|
! n_singles_a = 0
|
||||||
|
! l_a = psi_bilinear_matrix_columns_loc(lcol)
|
||||||
|
! ASSERT (l_a <= N_det)
|
||||||
|
!
|
||||||
! last_found = singles_alpha_csc_idx(krow)
|
! last_found = singles_alpha_csc_idx(krow)
|
||||||
! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
|
! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
|
||||||
! lrow = psi_bilinear_matrix_rows(l_a)
|
! lrow = psi_bilinear_matrix_rows(l_a)
|
||||||
! ASSERT (lrow <= N_det_alpha_unique)
|
! ASSERT (lrow <= N_det_alpha_unique)
|
||||||
!
|
!
|
||||||
! left = last_found
|
! left = last_found
|
||||||
! right = singles_alpha_csc_idx(krow+1)
|
! right = right_max
|
||||||
! do while (right-left>0_8)
|
! do while (right-left>0_8)
|
||||||
! k8 = shiftr(right+left,1)
|
! k8 = shiftr(right+left,1)
|
||||||
! if (singles_alpha_csc(k8) > lrow) then
|
! if (singles_alpha_csc(k8) > lrow) then
|
||||||
@ -286,10 +323,12 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! j = j-1
|
! j = j-1
|
||||||
!
|
!
|
||||||
! endif
|
! endif
|
||||||
|
!-----
|
||||||
|
|
||||||
! Loop over alpha singles
|
! Loop over alpha singles
|
||||||
! -----------------------
|
! -----------------------
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do k = 1,n_singles_a
|
do k = 1,n_singles_a
|
||||||
l_a = singles_a(k)
|
l_a = singles_a(k)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
@ -300,6 +339,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
|
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
|
||||||
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij)
|
call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij)
|
||||||
call get_s2(tmp_det,tmp_det2,$N_int,sij)
|
call get_s2(tmp_det,tmp_det2,$N_int,sij)
|
||||||
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
|
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a)
|
||||||
@ -342,6 +382,8 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! Loop inside the beta column to gather all the connected alphas
|
! Loop inside the beta column to gather all the connected alphas
|
||||||
lcol = psi_bilinear_matrix_columns(k_a)
|
lcol = psi_bilinear_matrix_columns(k_a)
|
||||||
l_a = psi_bilinear_matrix_columns_loc(lcol)
|
l_a = psi_bilinear_matrix_columns_loc(lcol)
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT avg(200000)
|
||||||
do i=1,N_det_alpha_unique
|
do i=1,N_det_alpha_unique
|
||||||
if (l_a > N_det) exit
|
if (l_a > N_det) exit
|
||||||
lcol = psi_bilinear_matrix_columns(l_a)
|
lcol = psi_bilinear_matrix_columns(l_a)
|
||||||
@ -363,6 +405,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! ----------------------------------
|
! ----------------------------------
|
||||||
|
|
||||||
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
|
||||||
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do i=1,n_singles_a
|
do i=1,n_singles_a
|
||||||
l_a = singles_a(i)
|
l_a = singles_a(i)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
@ -373,6 +416,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
|
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
|
||||||
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
|
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 1, hij)
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! single => sij = 0
|
! single => sij = 0
|
||||||
@ -383,6 +427,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! Compute Hij for all alpha doubles
|
! Compute Hij for all alpha doubles
|
||||||
! ----------------------------------
|
! ----------------------------------
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT avg(50000)
|
||||||
do i=1,n_doubles
|
do i=1,n_doubles
|
||||||
l_a = doubles(i)
|
l_a = doubles(i)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
@ -391,6 +436,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
ASSERT (lrow <= N_det_alpha_unique)
|
ASSERT (lrow <= N_det_alpha_unique)
|
||||||
|
|
||||||
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
|
||||||
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! same spin => sij = 0
|
! same spin => sij = 0
|
||||||
@ -422,6 +468,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! Loop inside the alpha row to gather all the connected betas
|
! Loop inside the alpha row to gather all the connected betas
|
||||||
lrow = psi_bilinear_matrix_transp_rows(k_b)
|
lrow = psi_bilinear_matrix_transp_rows(k_b)
|
||||||
l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
|
l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
|
||||||
|
!DIR$ LOOP COUNT avg(200000)
|
||||||
do i=1,N_det_beta_unique
|
do i=1,N_det_beta_unique
|
||||||
if (l_b > N_det) exit
|
if (l_b > N_det) exit
|
||||||
lrow = psi_bilinear_matrix_transp_rows(l_b)
|
lrow = psi_bilinear_matrix_transp_rows(l_b)
|
||||||
@ -443,6 +490,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! ----------------------------------
|
! ----------------------------------
|
||||||
|
|
||||||
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
|
||||||
|
!DIR$ LOOP COUNT avg(1000)
|
||||||
do i=1,n_singles_b
|
do i=1,n_singles_b
|
||||||
l_b = singles_b(i)
|
l_b = singles_b(i)
|
||||||
ASSERT (l_b <= N_det)
|
ASSERT (l_b <= N_det)
|
||||||
@ -454,6 +502,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 2, hij)
|
call i_H_j_mono_spin( tmp_det, tmp_det2, $N_int, 2, hij)
|
||||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! single => sij = 0
|
! single => sij = 0
|
||||||
@ -463,6 +512,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
! Compute Hij for all beta doubles
|
! Compute Hij for all beta doubles
|
||||||
! ----------------------------------
|
! ----------------------------------
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT avg(50000)
|
||||||
do i=1,n_doubles
|
do i=1,n_doubles
|
||||||
l_b = doubles(i)
|
l_b = doubles(i)
|
||||||
ASSERT (l_b <= N_det)
|
ASSERT (l_b <= N_det)
|
||||||
@ -474,6 +524,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
l_a = psi_bilinear_matrix_transp_order(l_b)
|
l_a = psi_bilinear_matrix_transp_order(l_b)
|
||||||
ASSERT (l_a <= N_det)
|
ASSERT (l_a <= N_det)
|
||||||
|
|
||||||
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a)
|
||||||
! same spin => sij = 0
|
! same spin => sij = 0
|
||||||
@ -501,6 +552,7 @@ subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,
|
|||||||
|
|
||||||
hij = diag_H_mat_elem(tmp_det,$N_int)
|
hij = diag_H_mat_elem(tmp_det,$N_int)
|
||||||
sij = diag_S_mat_elem(tmp_det,$N_int)
|
sij = diag_S_mat_elem(tmp_det,$N_int)
|
||||||
|
!DIR$ LOOP COUNT AVG(4)
|
||||||
do l=1,N_st
|
do l=1,N_st
|
||||||
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
|
v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
|
||||||
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
|
s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a)
|
||||||
|
@ -773,7 +773,7 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
|
|||||||
|
|
||||||
istep = shiftr(iend-ibegin,1)
|
istep = shiftr(iend-ibegin,1)
|
||||||
idx = ibegin + istep
|
idx = ibegin + istep
|
||||||
do while (istep > 64)
|
do while (istep > 4)
|
||||||
idx = ibegin + istep
|
idx = ibegin + istep
|
||||||
if (cache_key < X(idx)) then
|
if (cache_key < X(idx)) then
|
||||||
iend = idx
|
iend = idx
|
||||||
@ -813,20 +813,17 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
|
|||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
idx = ibegin
|
idx = ibegin
|
||||||
value = Y(idx)
|
if (min(iend_in,sze) > ibegin+4) then
|
||||||
if (min(iend_in,sze) > ibegin+64) then
|
iend = ibegin+4
|
||||||
iend = ibegin+64
|
!DIR$ LOOP COUNT MAX(4)
|
||||||
do while (cache_key > X(idx))
|
do while (cache_key > X(idx))
|
||||||
idx = idx+1
|
idx = idx+1
|
||||||
value = Y(idx)
|
|
||||||
end do
|
end do
|
||||||
else
|
else
|
||||||
|
!DIR$ LOOP COUNT MAX(4)
|
||||||
do while (cache_key > X(idx))
|
do while (cache_key > X(idx))
|
||||||
idx = idx+1
|
idx = idx+1
|
||||||
value = Y(idx)
|
if (idx == iend) then
|
||||||
if (idx /= iend) then
|
|
||||||
cycle
|
|
||||||
else
|
|
||||||
exit
|
exit
|
||||||
endif
|
endif
|
||||||
end do
|
end do
|
||||||
@ -834,6 +831,8 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
|
|||||||
if (cache_key /= X(idx)) then
|
if (cache_key /= X(idx)) then
|
||||||
idx = 1-idx
|
idx = 1-idx
|
||||||
value = 0.d0
|
value = 0.d0
|
||||||
|
else
|
||||||
|
value = Y(idx)
|
||||||
endif
|
endif
|
||||||
return
|
return
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user