diff --git a/plugins/Full_CI_ZMQ/selection_buffer.irp.f b/plugins/Full_CI_ZMQ/selection_buffer.irp.f index 8a47cb9d..84992449 100644 --- a/plugins/Full_CI_ZMQ/selection_buffer.irp.f +++ b/plugins/Full_CI_ZMQ/selection_buffer.irp.f @@ -56,7 +56,10 @@ subroutine sort_selection_buffer(b) iorder(i) = i end do ! Optimal for almost sorted data - call insertion_dsort(absval, iorder, b%cur) +! call sorted_dnumber(absval, b%cur, i) +! if (b%cur/i > +! call insertion_dsort(absval, iorder, b%cur) + call dsort(absval, iorder, b%cur) do i=1, nmwen detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i)) detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i)) diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 376b6e14..094ce412 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -669,9 +669,8 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: v_t, s_t, u_t - maxab = max(N_det_alpha_unique, N_det_beta_unique) - allocate(idx0(maxab), & - u_t(N_st,N_det), v_t(N_st,N_det), s_t(N_st,N_det) ) + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab), u_t(N_st,N_det) ) do i=1,maxab idx0(i) = i @@ -684,12 +683,12 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) size(u_t, 1), & N_det, N_st) + v_0 = 0.d0 + s_0 = 0.d0 + ! Prepare the array of all alpha single excitations ! ------------------------------------------------- - v_t = 0.d0 - s_t = 0.d0 - !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & !$OMP psi_bilinear_matrix_columns, & @@ -700,24 +699,30 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) !$OMP psi_bilinear_matrix_transp_order, N_st, & !$OMP psi_bilinear_matrix_order_transp_reverse, & !$OMP singles_alpha, psi_bilinear_matrix_columns_loc, & - !$OMP idx0, u_t, v_t, s_t, maxab) & + !$OMP singles_alpha_size, sze_8, & + !$OMP idx0, u_t, maxab, v_0, s_0) & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP lcol, lrow, is_single_a,l_a, l_b, & !$OMP buffer, singles, doubles, n_singles, n_doubles, & - !$OMP tmp_det2, hij, sij, idx, l, kcol_prev) + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev, v_t, s_t) ! Alpha/Beta double excitations ! ============================= allocate( buffer(N_int,maxab), & - singles(maxab), doubles(maxab), & - idx(maxab), & -! v_t(N_st,N_det), s_t(N_st,N_det), & + singles(singles_alpha_size), & + doubles(maxab), & + idx(maxab), & + v_t(N_st,N_det), s_t(N_st,N_det), & is_single_a(N_det_alpha_unique)) is_single_a = .False. kcol_prev=-1 krow=1 + v_t = 0.d0 + s_t = 0.d0 + + !$OMP DO SCHEDULE(static,1) do k_a=1,N_det @@ -764,29 +769,58 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) else doubles(n_doubles) = lrow idx(n_doubles) = l_a + if (n_doubles == maxab) then + + do k=1,n_doubles + lrow = doubles(k) + l_a = idx(k) + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) + call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij) + call get_s2(tmp_det,tmp_det2,N_int,sij) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) + v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) + s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) + enddo + enddo + + n_doubles=0 + endif n_doubles = n_doubles+1 endif l_a = l_a+1 enddo n_doubles = n_doubles-1 - do k=1,n_doubles - lrow = doubles(k) - l_a = idx(k) - tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) - call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij) - call get_s2(tmp_det,tmp_det2,N_int,sij) - do l=1,N_st - !$OMP ATOMIC - v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) - !$OMP ATOMIC - s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) - !$OMP ATOMIC - v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) - !$OMP ATOMIC - s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) - enddo - enddo + if (n_doubles > 0) then + do k=1,n_doubles + lrow = doubles(k) + l_a = idx(k) + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) + call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij) + call get_s2(tmp_det,tmp_det2,N_int,sij) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) + v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) + s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) + enddo + enddo + endif +! do k=1,n_doubles +! lrow = doubles(k) +! l_a = idx(k) +! tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) +! call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij) +! call get_s2(tmp_det,tmp_det2,N_int,sij) +! do l=1,N_st +! v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) +! s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,l_a) +! v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) +! s_t(l,l_a) = s_t(l,l_a) + sij * u_t(l,k_a) +! enddo +! enddo enddo @@ -798,9 +832,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) hij = diag_H_mat_elem(tmp_det,N_int) sij = diag_S_mat_elem(tmp_det,N_int) do l=1,N_st - !$OMP ATOMIC v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) - !$OMP ATOMIC s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) enddo @@ -862,9 +894,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 1, hij) do l=1,N_st - !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) - !$OMP ATOMIC v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! single => sij = 0 enddo @@ -882,9 +912,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) enddo call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, doubles(i)), N_int, hij) do l=1,N_st - !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) - !$OMP ATOMIC v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! same spin => sij = 0 enddo @@ -932,9 +960,7 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) l_a = psi_bilinear_matrix_transp_order(l_b) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij) do l=1,N_st - !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) - !$OMP ATOMIC v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! single => sij = 0 enddo @@ -953,33 +979,26 @@ subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) l_a = psi_bilinear_matrix_transp_order(l_b) call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, doubles(i)), N_int, hij) do l=1,N_st - !$OMP ATOMIC v_t(l,l_a) = v_t(l,l_a) + hij * u_t(l,k_a) - !$OMP ATOMIC v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,l_a) ! same spin => sij = 0 enddo enddo end do - !$OMP END DO + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do l=1,N_st + do i=1, N_det + v_0(i,l) = v_0(i,l) + v_t(l,i) + s_0(i,l) = s_0(i,l) + s_t(l,i) + enddo + enddo + !$OMP END CRITICAL !$OMP END PARALLEL - call dtranspose( & - v_t, & - size(v_t, 1), & - v_0, & - size(v_0, 1), & - N_st, N_det) - - call dtranspose( & - s_t, & - size(s_t, 1), & - s_0, & - size(s_0, 1), & - N_st, N_det) - end diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 43ea36aa..a28649a7 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -771,7 +771,7 @@ subroutine get_all_spin_singles(buffer, idx, spindet, Nint, size_buffer, singles endif enddo n_singles = n_singles-1 - deallocate(xorvec) + deallocate(xorvec, degree) end diff --git a/src/Utils/sort.irp.f b/src/Utils/sort.irp.f index 04e71208..51dee121 100644 --- a/src/Utils/sort.irp.f +++ b/src/Utils/sort.irp.f @@ -164,7 +164,7 @@ BEGIN_TEMPLATE ! Returns the number of sorted elements END_DOC integer, intent(in) :: isize - $type, intent(inout) :: x(isize) + $type, intent(in) :: x(isize) integer, intent(out) :: n integer :: i if (isize < 2) then @@ -172,14 +172,14 @@ BEGIN_TEMPLATE return endif - if (x(1) > x(2)) then + if (x(1) >= x(2)) then n=1 else n=0 endif do i=2,isize - if (x(i-1) > x(i)) then + if (x(i-1) >= x(i)) then n=n+1 endif enddo @@ -197,15 +197,12 @@ BEGIN_TEMPLATE $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) integer :: n - if (isize < 32) then + call sorted_$Xnumber(x,isize,n) + print *, isize, n, isize-n + if ( isize-n < 1000) then call insertion_$Xsort(x,iorder,isize) else -! call sorted_$Xnumber(x,isize,n) -! if ( (16*n) / isize > 0) then -! call insertion_$Xsort(x,iorder,isize) -! else - call heap_$Xsort(x,iorder,isize) -! endif + call heap_$Xsort(x,iorder,isize) endif end subroutine $Xsort