diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f index bf56855a..71d69e82 100644 --- a/src/Davidson/diagonalization_hs2.irp.f +++ b/src/Davidson/diagonalization_hs2.irp.f @@ -25,7 +25,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d double precision, intent(out) :: energies(N_st_diag), s2_out(N_st_diag) double precision, allocatable :: H_jj(:), S2_jj(:) - double precision :: diag_h_mat_elem + double precision :: diag_H_mat_elem, diag_S_mat_elem integer :: i ASSERT (N_st > 0) ASSERT (sze > 0) @@ -37,10 +37,10 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d !$OMP PARALLEL DEFAULT(NONE) & !$OMP SHARED(sze,H_jj,S2_jj, dets_in,Nint) & !$OMP PRIVATE(i) - !$OMP DO SCHEDULE(guided) + !$OMP DO SCHEDULE(static) do i=1,sze - H_jj(i) = diag_h_mat_elem(dets_in(1,1,i),Nint) - call get_s2(dets_in(1,1,i),dets_in(1,1,i),Nint,S2_jj(i)) + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) + S2_jj(i) = diag_S_mat_elem(dets_in(1,1,i),Nint) enddo !$OMP END DO !$OMP END PARALLEL @@ -209,7 +209,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s do k=1,N_st_diag call normalize(u_in(1,k),sze) enddo - + update_dets = 1 @@ -235,7 +235,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s if (distributed_davidson) then call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8,update_dets) else - call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8) + call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) endif update_dets = 0 diff --git a/src/Davidson/diagonalize_CI.irp.f b/src/Davidson/diagonalize_CI.irp.f index e1b67438..9b98ea91 100644 --- a/src/Davidson/diagonalize_CI.irp.f +++ b/src/Davidson/diagonalize_CI.irp.f @@ -66,7 +66,6 @@ END_PROVIDER call davidson_diag_HS2(psi_det,CI_eigenvectors, CI_eigenvectors_s2, & size(CI_eigenvectors,1),CI_electronic_energy, & N_det,min(N_det,N_states),min(N_det,N_states_diag),N_int,output_determinants) - else if (diag_algorithm == "Lapack") then diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f index 7231611a..e59f80c0 100644 --- a/src/Davidson/u0Hu0.irp.f +++ b/src/Davidson/u0Hu0.irp.f @@ -613,20 +613,37 @@ end -subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) +subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze_8) use bitmasks implicit none BEGIN_DOC ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> ! - ! n : number of determinants - ! - ! H_jj : array of - ! - ! S2_jj : array of + ! Assumes that the determinants are in psi_det END_DOC integer, intent(in) :: N_st,sze_8 - double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) + double precision, intent(inout) :: v_0(sze_8,N_st), s_0(sze_8,N_st), u_0(sze_8,N_st) + integer :: k + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + call H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + +subroutine H_S2_u_0_nstates_bilinear_order(v_0,s_0,u_0,N_st,sze_8) + use bitmasks + implicit none + BEGIN_DOC + ! Computes v_0 = H|u_0> and s_0 = S^2 |u_0> + END_DOC + integer, intent(in) :: N_st,sze_8 + double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st), u_0(sze_8,N_st) PROVIDE ref_bitmask_energy @@ -643,7 +660,6 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) integer(bit_kind) :: tmp_det2(N_int,2) integer(bit_kind) :: tmp_det3(N_int,2) integer(bit_kind), allocatable :: buffer(:,:) - double precision :: ck(N_st), cl(N_st), cm(N_st) integer :: n_singles, n_doubles integer, allocatable :: singles(:), doubles(:) integer, allocatable :: singles_a(:,:), singles_b(:,:) @@ -651,18 +667,33 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) logical, allocatable :: is_single_a(:) logical, allocatable :: is_single_b(:) integer :: maxab, n_singles_max + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) maxab = max(N_det_alpha_unique, N_det_beta_unique) allocate( buffer(N_int,maxab), & singles(maxab), doubles(maxab), & is_single_a(N_det_alpha_unique), & is_single_b(N_det_beta_unique), & - idx(maxab), idx0(maxab)) + idx(maxab), idx0(maxab), & + u_t(N_st,N_det), v_t(N_st,N_det), s_t(N_st,N_det) ) do i=1,maxab idx0(i) = i enddo +! call dtranspose( & +! u_0, & +! size(u_0, 1), & +! u_t, & +! size(u_t, 1), & +! N_det, N_st) +! + do k=1,N_det + do l=1,N_st + u_t(l,k) = u_0(k,l) + enddo + enddo + ! Prepare the array of all alpha single excitations ! ------------------------------------------------- @@ -683,8 +714,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) singles_a(1,i), singles_a(0,i)) enddo - v_0 = 0.d0 - s_0 = 0.d0 + v_t = 0.d0 + s_t = 0.d0 do k_a=1,N_det ! Initial determinant is at k_a in alpha-major representation @@ -699,21 +730,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) ! Initial determinant is at k_b in beta-major representation ! ---------------------------------------------------------------------- - k_b = psi_bilinear_matrix_order_reverse(k_a) + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) - ! Diagonal contribution - ! --------------------- - - double precision, external :: diag_H_mat_elem, diag_S_mat_elem - - hij = diag_H_mat_elem(tmp_det,N_int) - sij = diag_S_mat_elem(tmp_det,N_int) - do l=1,N_st - v_0(k_a,l) = v_0(k_a,l) + hij * psi_bilinear_matrix_values(k_a,l) - s_0(k_a,l) = s_0(k_a,l) + sij * psi_bilinear_matrix_values(k_a,l) - enddo - - ! Get all single and double alpha excitations ! =========================================== @@ -721,7 +739,7 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) ! Loop inside the beta column to gather all the connected alphas i=1 - l_a = k_a+1 + l_a = k_a lcol = psi_bilinear_matrix_columns(l_a) do while (lcol == kcol) lrow = psi_bilinear_matrix_rows(l_a) @@ -752,8 +770,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 1, hij) do l=1,N_st - v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l) - v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l) + v_t(l,l_a) += hij * u_t(l,k_a) + v_t(l,k_a) += hij * u_t(l,l_a) ! single => sij = 0 enddo enddo @@ -770,8 +788,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) enddo call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, doubles(i)), N_int, hij) do l=1,N_st - v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l) - v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l) + v_t(l,l_a) += hij * u_t(l,k_a) + v_t(l,k_a) += hij * u_t(l,l_a) ! same spin => sij = 0 enddo enddo @@ -785,7 +803,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) ! Loop inside the alpha row to gather all the connected betas i=1 - l_b = k_b+1 + l_b = k_b + lrow = psi_bilinear_matrix_transp_rows(l_b) do while (lrow == krow) lcol = psi_bilinear_matrix_transp_columns(l_b) @@ -817,8 +836,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) l_a = psi_bilinear_matrix_transp_order(l_b) call i_H_j_mono_spin( tmp_det, tmp_det2, N_int, 2, hij) do l=1,N_st - v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l) - v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l) + v_t(l,l_a) += hij * u_t(l,k_a) + v_t(l,k_a) += hij * u_t(l,l_a) ! single => sij = 0 enddo enddo @@ -836,8 +855,8 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) l_a = psi_bilinear_matrix_transp_order(l_b) call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, doubles(i)), N_int, hij) do l=1,N_st - v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l) - v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l) + v_t(l,l_a) += hij * u_t(l,k_a) + v_t(l,k_a) += hij * u_t(l,l_a) ! same spin => sij = 0 enddo enddo @@ -892,12 +911,13 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) if (is_single_a(lrow)) then tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) - call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij,sij) + call i_H_j_double_alpha_beta(tmp_det,tmp_det2,N_int,hij) + call get_s2(tmp_det,tmp_det2,N_int,sij) do l=1,N_st - v_0(k_a, l) += hij * psi_bilinear_matrix_values(l_a,l) - v_0(l_a, l) += hij * psi_bilinear_matrix_values(k_a,l) - s_0(k_a, l) -= sij * psi_bilinear_matrix_values(l_a,l) - s_0(l_a, l) -= sij * psi_bilinear_matrix_values(k_a,l) + v_t(l,k_a) += hij * u_t(l,l_a) + v_t(l,l_a) += hij * u_t(l,k_a) + s_t(l,k_a) += sij * u_t(l,l_a) + s_t(l,l_a) += sij * u_t(l,k_a) enddo endif l_a += 1 @@ -905,6 +925,38 @@ subroutine H_S2_u_0_nstates_new(v_0,s_0,N_st,sze_8) enddo enddo + ! Diagonal contribution + ! --------------------- + + double precision, external :: diag_H_mat_elem, diag_S_mat_elem + + hij = diag_H_mat_elem(tmp_det,N_int) + sij = diag_S_mat_elem(tmp_det,N_int) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + enddo + +! call dtranspose( & +! v_t, & +! size(v_t, 1), & +! v_0, & +! size(v_0, 1), & +! N_st, N_det) +! +! call dtranspose( & +! s_t, & +! size(s_t, 1), & +! s_0, & +! size(s_0, 1), & +! N_st, N_det) + do k=1,N_det + do l=1,N_st + v_0(k,l) = v_t(l,k) + s_0(k,l) = s_t(l,k) + enddo enddo end diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index ef246e50..496b59de 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -2556,7 +2556,7 @@ subroutine i_H_j_double_spin(key_i,key_j,Nint,hij) exc(1,2), mo_integrals_map) ) end -subroutine i_H_j_double_alpha_beta(key_i,key_j,Nint,hij,phase) +subroutine i_H_j_double_alpha_beta(key_i,key_j,Nint,hij) use bitmasks implicit none BEGIN_DOC diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index 783474f9..31fd3915 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -389,6 +389,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_rows , (N_det) ] &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns, (N_det) ] &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order , (N_det) ] +&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_reverse , (N_det) ] &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_columns_loc, (N_det_beta_unique+1) ] use bitmasks implicit none @@ -429,7 +430,6 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) do l=1,N_states call dset_order(psi_bilinear_matrix_values(1,l),psi_bilinear_matrix_order,N_det) enddo - psi_bilinear_matrix_columns_loc(1:N_det_beta_unique) = -1 psi_bilinear_matrix_columns_loc(1) = 1 do k=2,N_det if (psi_bilinear_matrix_columns(k) == psi_bilinear_matrix_columns(k-1)) then @@ -440,6 +440,9 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_values, (N_det,N_states) endif enddo psi_bilinear_matrix_columns_loc(N_det_beta_unique+1) = N_det+1 + do k=1,N_det + psi_bilinear_matrix_order_reverse(psi_bilinear_matrix_order(k)) = k + enddo deallocate(to_sort) END_PROVIDER @@ -448,7 +451,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_ &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_rows , (N_det) ] &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_columns, (N_det) ] &BEGIN_PROVIDER [ integer, psi_bilinear_matrix_transp_order , (N_det) ] -&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_reverse , (N_det) ] +&BEGIN_PROVIDER [ integer, psi_bilinear_matrix_order_transp_reverse , (N_det) ] use bitmasks implicit none BEGIN_DOC @@ -487,7 +490,7 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix_transp_values, (N_det,N_ call dset_order(psi_bilinear_matrix_transp_values(1,l),psi_bilinear_matrix_transp_order,N_det) enddo do k=1,N_det - psi_bilinear_matrix_order_reverse(psi_bilinear_matrix_transp_order(k)) = k + psi_bilinear_matrix_order_transp_reverse(psi_bilinear_matrix_transp_order(k)) = k enddo deallocate(to_sort) END_PROVIDER @@ -1387,3 +1390,18 @@ subroutine get_all_spin_doubles_3(buffer, idx, spindet, size_buffer, doubles, n_ end +subroutine copy_psi_bilinear_to_psi(psi, isize) + implicit none + BEGIN_DOC +! Overwrites psi_det and psi_coef with the wf in bilinear order + END_DOC + integer, intent(in) :: isize + integer(bit_kind), intent(out) :: psi(N_int,2,isize) + integer :: i,j,k,l + do k=1,isize + i = psi_bilinear_matrix_rows(k) + j = psi_bilinear_matrix_columns(k) + psi(1:N_int,1,k) = psi_det_alpha_unique(1:N_int,i) + psi(1:N_int,2,k) = psi_det_beta_unique(1:N_int,j) + enddo +end diff --git a/src/Utils/sort.irp.f b/src/Utils/sort.irp.f index dc91ab3a..04e71208 100644 --- a/src/Utils/sort.irp.f +++ b/src/Utils/sort.irp.f @@ -158,6 +158,34 @@ BEGIN_TEMPLATE end subroutine heap_$Xsort_big + subroutine sorted_$Xnumber(x,isize,n) + implicit none + BEGIN_DOC +! Returns the number of sorted elements + END_DOC + integer, intent(in) :: isize + $type, intent(inout) :: x(isize) + integer, intent(out) :: n + integer :: i + if (isize < 2) then + n = 1 + return + endif + + if (x(1) > x(2)) then + n=1 + else + n=0 + endif + + do i=2,isize + if (x(i-1) > x(i)) then + n=n+1 + endif + enddo + + end + subroutine $Xsort(x,iorder,isize) implicit none BEGIN_DOC @@ -168,10 +196,16 @@ BEGIN_TEMPLATE integer,intent(in) :: isize $type,intent(inout) :: x(isize) integer,intent(inout) :: iorder(isize) + integer :: n if (isize < 32) then call insertion_$Xsort(x,iorder,isize) else - call heap_$Xsort(x,iorder,isize) +! call sorted_$Xnumber(x,isize,n) +! if ( (16*n) / isize > 0) then +! call insertion_$Xsort(x,iorder,isize) +! else + call heap_$Xsort(x,iorder,isize) +! endif endif end subroutine $Xsort