BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] &BEGIN_PROVIDER [ double precision, psi_s2, (N_states) ] implicit none BEGIN_DOC ! psi_energy(i) = $\langle \Psi_i | H | \Psi_i \rangle$ ! ! psi_s2(i) = $\langle \Psi_i | S^2 | \Psi_i \rangle$ END_DOC call u_0_HS2_u_0(psi_energy,psi_s2,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size) integer :: i do i=N_det+1,N_states psi_energy(i) = 0.d0 psi_s2(i) = 0.d0 enddo END_PROVIDER BEGIN_PROVIDER [ double precision, psi_energy_with_nucl_rep, (N_states) ] implicit none BEGIN_DOC ! Energy of the wave function with the nuclear repulsion energy. END_DOC psi_energy_with_nucl_rep(1:N_states) = psi_energy(1:N_states) + nuclear_repulsion END_PROVIDER subroutine u_0_HS2_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze) use bitmasks implicit none BEGIN_DOC ! Computes $E_0 = \frac{\langle u_0 | H | u_0 \rangle}{\langle u_0 | u_0 \rangle}$ ! ! and $S_0 = \frac{\langle u_0 | S^2 | u_0 \rangle}{\langle u_0 | u_0 \rangle}$ ! ! n : number of determinants ! END_DOC integer, intent(in) :: n,Nint, N_st, sze double precision, intent(out) :: e_0(N_st),s_0(N_st) double precision, intent(inout) :: u_0(sze,N_st) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) double precision, allocatable :: v_0(:,:), s_vec(:,:), u_1(:,:) double precision :: u_dot_u,u_dot_v,diag_H_mat_elem integer :: i,j, istate if ((n > 100000).and.distributed_davidson) then allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag)) u_1(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) call H_S2_u_0_nstates_zmq(v_0,s_vec,u_1,N_states_diag,n) else if (n < n_det_max_full) then allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st)) v_0(:,:) = 0.d0 u_1(:,:) = 0.d0 s_vec(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) do istate = 1,N_st do j=1,n do i=1,n v_0(i,istate) = v_0(i,istate) + h_matrix_all_dets(i,j) * u_0(j,istate) s_vec(i,istate) = s_vec(i,istate) + S2_matrix_all_dets(i,j) * u_0(j,istate) enddo enddo enddo else allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st)) u_1(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) call H_S2_u_0_nstates_openmp(v_0,s_vec,u_1,N_st,n) endif u_0(1:n,1:N_st) = u_1(1:n,1:N_st) deallocate(u_1) double precision :: norm !$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED) do i=1,N_st norm = u_dot_u(u_0(1,i),n) if (norm /= 0.d0) then e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/norm s_0(i) = u_dot_v(s_vec(1,i),u_0(1,i),n)/norm else e_0(i) = 0.d0 s_0(i) = 0.d0 endif enddo !$OMP END PARALLEL DO deallocate (s_vec, v_0) end subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) use bitmasks implicit none BEGIN_DOC ! Computes $v_0 = H | u_0\rangle$ and $s_0 = S^2 | u_0\rangle$. ! ! Assumes that the determinants are in psi_det ! ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st) integer :: k double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo v_t = 0.d0 s_t = 0.d0 call dtranspose( & u_0, & size(u_0, 1), & u_t, & size(u_t, 1), & N_det, N_st) call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1) deallocate(u_t) call dtranspose( & v_t, & size(v_t, 1), & v_0, & size(v_0, 1), & N_st, N_det) call dtranspose( & s_t, & size(s_t, 1), & s_0, & size(s_0, 1), & N_st, N_det) deallocate(v_t,s_t) do k=1,N_st call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) enddo end subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) use bitmasks implicit none BEGIN_DOC ! Computes $v_t = H | u_t\rangle$ and $s_t = S^2 | u_t\rangle$ ! ! Default should be 1,N_det,0,1 END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) PROVIDE ref_bitmask_energy N_int select case (N_int) case (1) call H_S2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) case (2) call H_S2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) case (3) call H_S2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) case (4) call H_S2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) case default call H_S2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) end select end BEGIN_TEMPLATE subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) use bitmasks implicit none BEGIN_DOC ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ ! ! Default should be 1,N_det,0,1 END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) double precision :: hij, sij integer :: i,j,k,l,kk integer :: k_a, k_b, l_a, l_b, m_a, m_b integer :: istate integer :: krow, kcol, krow_b, kcol_b integer :: lrow, lcol integer :: mrow, mcol integer(bit_kind) :: spindet($N_int) integer(bit_kind) :: tmp_det($N_int,2) integer(bit_kind) :: tmp_det2($N_int,2) integer(bit_kind) :: tmp_det3($N_int,2) integer(bit_kind), allocatable :: buffer(:,:) integer :: n_doubles integer, allocatable :: doubles(:) integer, allocatable :: singles_a(:) integer, allocatable :: singles_b(:) integer, allocatable :: idx(:), idx0(:) integer :: maxab, n_singles_a, n_singles_b, kcol_prev integer*8 :: k8 logical :: compute_singles integer*8 :: last_found, left, right, right_max double precision :: rss, mem, ratio double precision, allocatable :: utl(:,:) integer, parameter :: block_size=128 logical :: u_is_sparse ! call resident_memory(rss) ! mem = dble(singles_beta_csc_size) / 1024.d0**3 ! ! compute_singles = (mem+rss > qp_max_mem) ! ! if (.not.compute_singles) then ! provide singles_beta_csc ! endif compute_singles=.True. maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 allocate(idx0(maxab)) do i=1,maxab idx0(i) = i enddo ! Prepare the array of all alpha single excitations ! ------------------------------------------------- PROVIDE N_int nthreads_davidson !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & !$OMP psi_bilinear_matrix_columns, & !$OMP psi_det_alpha_unique, psi_det_beta_unique, & !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & !$OMP psi_bilinear_matrix_transp_rows, & !$OMP psi_bilinear_matrix_transp_columns, & !$OMP psi_bilinear_matrix_transp_order, N_st, & !$OMP psi_bilinear_matrix_order_transp_reverse, & !$OMP psi_bilinear_matrix_columns_loc, & !$OMP psi_bilinear_matrix_transp_rows_loc, & !$OMP istart, iend, istep, irp_here, v_t, s_t, & !$OMP ishift, idx0, u_t, maxab, compute_singles, & !$OMP singles_alpha_csc,singles_alpha_csc_idx, & !$OMP singles_beta_csc,singles_beta_csc_idx) & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & !$OMP buffer, doubles, n_doubles, umax, & !$OMP tmp_det2, hij, sij, idx, l, kcol_prev, & !$OMP singles_a, n_singles_a, singles_b, ratio, & !$OMP n_singles_b, k8, last_found,left,right,right_max) ! Alpha/Beta double excitations ! ============================= allocate( buffer($N_int,maxab), & singles_a(maxab), & singles_b(maxab), & doubles(maxab), & idx(maxab), utl(N_st,block_size)) kcol_prev=-1 ! Check if u has multiple zeros kk=1 ! Avoid division by zero !$OMP DO do k=1,N_det umax = 0.d0 do l=1,N_st umax = max(umax, dabs(u_t(l,k))) enddo if (umax < 1.d-20) then !$OMP ATOMIC kk = kk+1 endif enddo !$OMP END DO u_is_sparse = N_det / kk < 20 ! 5% ASSERT (iend <= N_det) ASSERT (istart > 0) ASSERT (istep > 0) !$OMP DO SCHEDULE(guided,64) do k_a=istart+ishift,iend,istep krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) kcol = psi_bilinear_matrix_columns(k_a) ASSERT (kcol <= N_det_beta_unique) tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) if (kcol /= kcol_prev) then tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) if (compute_singles) then call get_all_spin_singles_$N_int( & psi_det_beta_unique, idx0, & tmp_det(1,2), N_det_beta_unique, & singles_b, n_singles_b) else n_singles_b = 0 !DIR$ LOOP COUNT avg(1000) do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 n_singles_b = n_singles_b+1 singles_b(n_singles_b) = singles_beta_csc(k8) enddo endif endif kcol_prev = kcol ! Loop over singly excited beta columns ! ------------------------------------- !DIR$ LOOP COUNT avg(1000) do i=1,n_singles_b lcol = singles_b(i) tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) !--- ! if (compute_singles) then l_a = psi_bilinear_matrix_columns_loc(lcol) ASSERT (l_a <= N_det) !DIR$ UNROLL(8) !DIR$ LOOP COUNT avg(50000) do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot ASSERT (l_a <= N_det) idx(j) = l_a l_a = l_a+1 enddo j = j-1 call get_all_spin_singles_$N_int( & buffer, idx, tmp_det(1,1), j, & singles_a, n_singles_a ) !----- ! else ! ! ! Search for singles ! !call cpu_time(time0) ! ! Right boundary ! l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1 ! ASSERT (l_a <= N_det) ! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) ! lrow = psi_bilinear_matrix_rows(l_a) ! ASSERT (lrow <= N_det_alpha_unique) ! ! left = singles_alpha_csc_idx(krow) ! right_max = -1_8 ! right = singles_alpha_csc_idx(krow+1) ! do while (right-left>0_8) ! k8 = shiftr(right+left,1) ! if (singles_alpha_csc(k8) > lrow) then ! right = k8 ! else if (singles_alpha_csc(k8) < lrow) then ! left = k8 + 1_8 ! else ! right_max = k8+1_8 ! exit ! endif ! enddo ! if (right_max > 0_8) exit ! l_a = l_a-1 ! enddo ! if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow) ! ! ! Search ! n_singles_a = 0 ! l_a = psi_bilinear_matrix_columns_loc(lcol) ! ASSERT (l_a <= N_det) ! ! last_found = singles_alpha_csc_idx(krow) ! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) ! lrow = psi_bilinear_matrix_rows(l_a) ! ASSERT (lrow <= N_det_alpha_unique) ! ! left = last_found ! right = right_max ! do while (right-left>0_8) ! k8 = shiftr(right+left,1) ! if (singles_alpha_csc(k8) > lrow) then ! right = k8 ! else if (singles_alpha_csc(k8) < lrow) then ! left = k8 + 1_8 ! else ! n_singles_a += 1 ! singles_a(n_singles_a) = l_a ! last_found = k8+1_8 ! exit ! endif ! enddo ! l_a = l_a+1 ! enddo ! j = j-1 ! ! endif !----- ! Loop over alpha singles ! ----------------------- double precision :: umax !DIR$ LOOP COUNT avg(1000) do k = 1,n_singles_a,block_size umax = 0.d0 ! Prefetch u_t(:,l_a) if (u_is_sparse) then do kk=0,block_size-1 if (k+kk > n_singles_a) exit l_a = singles_a(k+kk) ASSERT (l_a <= N_det) do l=1,N_st utl(l,kk+1) = u_t(l,l_a) umax = max(umax, dabs(utl(l,kk+1))) enddo enddo else do kk=0,block_size-1 if (k+kk > n_singles_a) exit l_a = singles_a(k+kk) ASSERT (l_a <= N_det) utl(:,kk+1) = u_t(:,l_a) enddo umax = 1.d0 endif if (umax < 1.d-20) cycle do kk=0,block_size-1 if (k+kk > n_singles_a) exit l_a = singles_a(k+kk) lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij) call get_s2(tmp_det,tmp_det2,$N_int,sij) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) enddo enddo enddo enddo enddo !$OMP END DO !$OMP DO SCHEDULE(guided,64) do k_a=istart+ishift,iend,istep ! Single and double alpha excitations ! =================================== ! Initial determinant is at k_a in alpha-major representation ! ----------------------------------------------------------------------- krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) kcol = psi_bilinear_matrix_columns(k_a) ASSERT (kcol <= N_det_beta_unique) tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) ! Initial determinant is at k_b in beta-major representation ! ---------------------------------------------------------------------- k_b = psi_bilinear_matrix_order_transp_reverse(k_a) ASSERT (k_b <= N_det) spindet(1:$N_int) = tmp_det(1:$N_int,1) ! Loop inside the beta column to gather all the connected alphas lcol = psi_bilinear_matrix_columns(k_a) l_a = psi_bilinear_matrix_columns_loc(lcol) !DIR$ LOOP COUNT avg(200000) do i=1,N_det_alpha_unique if (l_a > N_det) exit lcol = psi_bilinear_matrix_columns(l_a) if (lcol /= kcol) exit lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot idx(i) = l_a l_a = l_a+1 enddo i = i-1 call get_all_spin_singles_and_doubles_$N_int( & buffer, idx, spindet, i, & singles_a, doubles, n_singles_a, n_doubles ) ! Compute Hij for all alpha singles ! ---------------------------------- tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) !DIR$ LOOP COUNT avg(1000) do i=1,n_singles_a,block_size umax = 0.d0 ! Prefetch u_t(:,l_a) if (u_is_sparse) then do kk=0,block_size-1 if (i+kk > n_singles_a) exit l_a = singles_a(i+kk) ASSERT (l_a <= N_det) do l=1,N_st utl(l,kk+1) = u_t(l,l_a) umax = max(umax, dabs(utl(l,kk+1))) enddo enddo else do kk=0,block_size-1 if (i+kk > n_singles_a) exit l_a = singles_a(i+kk) ASSERT (l_a <= N_det) utl(:,kk+1) = u_t(:,l_a) enddo umax = 1.d0 endif if (umax < 1.d-20) cycle do kk=0,block_size-1 if (i+kk > n_singles_a) exit l_a = singles_a(i+kk) lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) ! single => sij = 0 enddo enddo enddo ! Compute Hij for all alpha doubles ! ---------------------------------- !DIR$ LOOP COUNT avg(50000) do i=1,n_doubles,block_size umax = 0.d0 ! Prefetch u_t(:,l_a) if (u_is_sparse) then do kk=0,block_size-1 if (i+kk > n_doubles) exit l_a = doubles(i+kk) ASSERT (l_a <= N_det) do l=1,N_st utl(l,kk+1) = u_t(l,l_a) umax = max(umax, dabs(utl(l,kk+1))) enddo enddo else do kk=0,block_size-1 if (i+kk > n_doubles) exit l_a = doubles(i+kk) ASSERT (l_a <= N_det) utl(:,kk+1) = u_t(:,l_a) enddo umax = 1.d0 endif if (umax < 1.d-20) cycle do kk=0,block_size-1 if (i+kk > n_doubles) exit l_a = doubles(i+kk) lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) ! same spin => sij = 0 enddo enddo enddo ! Single and double beta excitations ! ================================== ! Initial determinant is at k_a in alpha-major representation ! ----------------------------------------------------------------------- krow = psi_bilinear_matrix_rows(k_a) kcol = psi_bilinear_matrix_columns(k_a) tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) spindet(1:$N_int) = tmp_det(1:$N_int,2) ! Initial determinant is at k_b in beta-major representation ! ----------------------------------------------------------------------- k_b = psi_bilinear_matrix_order_transp_reverse(k_a) ASSERT (k_b <= N_det) ! Loop inside the alpha row to gather all the connected betas lrow = psi_bilinear_matrix_transp_rows(k_b) l_b = psi_bilinear_matrix_transp_rows_loc(lrow) !DIR$ LOOP COUNT avg(200000) do i=1,N_det_beta_unique if (l_b > N_det) exit lrow = psi_bilinear_matrix_transp_rows(l_b) if (lrow /= krow) exit lcol = psi_bilinear_matrix_transp_columns(l_b) ASSERT (lcol <= N_det_beta_unique) buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) idx(i) = l_b l_b = l_b+1 enddo i = i-1 call get_all_spin_singles_and_doubles_$N_int( & buffer, idx, spindet, i, & singles_b, doubles, n_singles_b, n_doubles ) ! Compute Hij for all beta singles ! ---------------------------------- tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) !DIR$ LOOP COUNT avg(1000) do i=1,n_singles_b,block_size umax = 0.d0 if (u_is_sparse) then do kk=0,block_size-1 if (i+kk > n_singles_b) exit l_b = singles_b(i+kk) l_a = psi_bilinear_matrix_transp_order(l_b) ASSERT (l_b <= N_det) ASSERT (l_a <= N_det) do l=1,N_st utl(l,kk+1) = u_t(l,l_a) umax = max(umax, dabs(utl(l,kk+1))) enddo enddo else do kk=0,block_size-1 if (i+kk > n_singles_b) exit l_b = singles_b(i+kk) l_a = psi_bilinear_matrix_transp_order(l_b) ASSERT (l_b <= N_det) ASSERT (l_a <= N_det) utl(:,kk+1) = u_t(:,l_a) enddo umax = 1.d0 endif if (umax < 1.d-20) cycle do kk=0,block_size-1 if (i+kk > n_singles_b) exit l_b = singles_b(i+kk) l_a = psi_bilinear_matrix_transp_order(l_b) lcol = psi_bilinear_matrix_transp_columns(l_b) ASSERT (lcol <= N_det_beta_unique) tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) ! single => sij = 0 enddo enddo enddo ! Compute Hij for all beta doubles ! ---------------------------------- !DIR$ LOOP COUNT avg(50000) do i=1,n_doubles,block_size umax = 0.d0 if (u_is_sparse) then do kk=0,block_size-1 if (i+kk > n_doubles) exit l_b = doubles(i+kk) l_a = psi_bilinear_matrix_transp_order(l_b) ASSERT (l_b <= N_det) ASSERT (l_a <= N_det) do l=1,N_st utl(l,kk+1) = u_t(l,l_a) umax = max(umax, dabs(utl(l,kk+1))) enddo enddo else do kk=0,block_size-1 if (i+kk > n_doubles) exit l_b = doubles(i+kk) l_a = psi_bilinear_matrix_transp_order(l_b) ASSERT (l_b <= N_det) ASSERT (l_a <= N_det) utl(:,kk+1) = u_t(:,l_a) enddo umax = 1.d0 endif if (umax < 1.d-20) cycle do kk=0,block_size-1 if (i+kk > n_doubles) exit l_b = doubles(i+kk) l_a = psi_bilinear_matrix_transp_order(l_b) lcol = psi_bilinear_matrix_transp_columns(l_b) ASSERT (lcol <= N_det_beta_unique) call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) ! same spin => sij = 0 enddo enddo enddo ! Diagonal contribution ! ===================== ! Initial determinant is at k_a in alpha-major representation ! ----------------------------------------------------------------------- if (u_is_sparse) then umax = 0.d0 do l=1,N_st umax = max(umax, dabs(u_t(l,k_a))) enddo else umax = 1.d0 endif if (umax < 1.d-20) cycle krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) kcol = psi_bilinear_matrix_columns(k_a) ASSERT (kcol <= N_det_beta_unique) tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) double precision, external :: diag_H_mat_elem, diag_S_mat_elem hij = diag_H_mat_elem(tmp_det,$N_int) sij = diag_S_mat_elem(tmp_det,$N_int) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) enddo end do !$OMP END DO deallocate(buffer, singles_a, singles_b, doubles, idx, utl) !$OMP END PARALLEL end SUBST [ N_int ] 1;; 2;; 3;; 4;; N_int;; END_TEMPLATE