diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index abf1b63a..33aa8ee5 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -108,7 +108,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin integer(bit_kind) :: dets_in_sorted(Nint,2,sze) integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp - PROVIDE det_connections + !PROVIDE det_connections call write_time(iunit) call wall_time(wall) @@ -430,7 +430,6 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) do ii=shortcut(sh),shortcut(sh+1)-1 idx(0) = ii - !call filter_connected_davidson_mwen(keys_tmp,shortcut,keys_tmp(1,1,ii),Nint,ii-1,idx) call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx) i = sort_idx(ii) @@ -470,79 +469,79 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) end - -subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint,istate - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii - integer :: i0, j0 - - - - - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy delta_ij - integer, parameter :: block_size = 157 - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) & - !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij) - !$OMP DO SCHEDULE(static) - do i=1,n - v_0(i) = H_jj(i) * u_0(i) - enddo - !$OMP END DO - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - !$OMP DO SCHEDULE(guided) - do i=1,n - idx(0) = i - call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) - do jj=1,idx(0) - j = idx(jj) -! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) - hij = hij - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) -! endif - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(guided) - do ii=1,n_det_ref - i = idx_ref(ii) - do jj = 1, n_det_non_ref - j = idx_non_ref(jj) - vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) - vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) - enddo - enddo - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL -end +! +! subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Computes v_0 = H|u_0> +! ! +! ! n : number of determinants +! ! +! ! H_jj : array of +! END_DOC +! integer, intent(in) :: n,Nint,istate +! double precision, intent(out) :: v_0(n) +! double precision, intent(in) :: u_0(n) +! double precision, intent(in) :: H_jj(n) +! integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) +! integer, allocatable :: idx(:) +! double precision :: hij +! double precision, allocatable :: vt(:) +! integer :: i,j,k,l, jj,ii +! integer :: i0, j0 +! +! +! +! +! +! ASSERT (Nint > 0) +! ASSERT (Nint == N_int) +! ASSERT (n>0) +! PROVIDE ref_bitmask_energy delta_ij +! integer, parameter :: block_size = 157 +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) & +! !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij) +! !$OMP DO SCHEDULE(static) +! do i=1,n +! v_0(i) = H_jj(i) * u_0(i) +! enddo +! !$OMP END DO +! allocate(idx(0:n), vt(n)) +! Vt = 0.d0 +! !$OMP DO SCHEDULE(guided) +! do i=1,n +! idx(0) = i +! call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) +! do jj=1,idx(0) +! j = idx(jj) +! ! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then +! call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) +! hij = hij +! vt (i) = vt (i) + hij*u_0(j) +! vt (j) = vt (j) + hij*u_0(i) +! ! endif +! enddo +! enddo +! !$OMP END DO +! +! !$OMP DO SCHEDULE(guided) +! do ii=1,n_det_ref +! i = idx_ref(ii) +! do jj = 1, n_det_non_ref +! j = idx_non_ref(jj) +! vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) +! vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) +! enddo +! enddo +! !$OMP END DO +! !$OMP CRITICAL +! do i=1,n +! v_0(i) = v_0(i) + vt(i) +! enddo +! !$OMP END CRITICAL +! deallocate(idx,vt) +! !$OMP END PARALLEL +! end diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index 03a699c7..b017570a 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -288,7 +288,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun integer(bit_kind) :: dets_in_sorted(Nint, 2, sze) integer :: idx(sze), shortcut(0:sze+1) - PROVIDE det_connections + !PROVIDE det_connections call write_time(iunit) call wall_time(wall) diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index ed4f092e..3028e32a 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -147,6 +147,7 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) end do idx(l) = i_beta l = l + 1 + exit beta_loop end do beta_loop else do i_beta=warp(1,i_alpha),endloop @@ -190,201 +191,201 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) end -subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer,intent(in) :: shortcut(0:sze+1) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: i_alpha, i_beta, exc_a, exc_b, endloop - integer(bit_kind) :: tmp1, tmp2 - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - i_alpha = 0 - - if (Nint==1) then - do while(shortcut(i_alpha+1) < sze) - i_alpha = i_alpha + 1 - exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) - if(exc_a > 4) then - cycle - end if - endloop = min(shortcut(i_alpha+1)-1, sze) - if(exc_a == 4) then - do i_beta = shortcut(i_alpha), endloop - if(key1(1,2,i_beta) == key2(1,2)) then - idx(l) = i_beta - l = l + 1 - exit - end if - end do - else - do i_beta = shortcut(i_alpha), endloop - exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) - if(exc_b + exc_a <= 4) then - idx(l) = i_beta - l = l + 1 - end if - end do - end if - end do - else - print *, "TBD : filter_connected_davidson_shortcut Nint>1" - stop - end if - - idx(0) = l-1 -end - -subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: j_int, j_start - integer*8 :: itmp - - PROVIDE N_con_int det_connections - - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - - if (Nint==1) then - - i = idx(0) ! lecture dans un intent(out) ? - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else if (Nint==2) then - - - i = idx(0) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(2,1,j), key2(2,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) + & - popcnt(xor( key1(2,2,j), key2(2,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else if (Nint==3) then - - i = idx(0) - !DIR$ LOOP COUNT (1000) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) + & - popcnt(xor( key1(2,1,j), key2(2,1))) + & - popcnt(xor( key1(2,2,j), key2(2,2))) + & - popcnt(xor( key1(3,1,j), key2(3,1))) + & - popcnt(xor( key1(3,2,j), key2(3,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else - - i = idx(0) - !DIR$ LOOP COUNT (1000) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) - do k=1,Nint - degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& - popcnt(xor( key1(k,2,j), key2(k,2))) - if (degree_x2 > 4) then - exit - endif - enddo - if (degree_x2 <= 5) then - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - endif - idx(0) = l-1 -end +! subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Filters out the determinants that are not connected by H +! ! returns the array idx which contains the index of the +! ! determinants in the array key1 that interact +! ! via the H operator with key2. +! ! +! ! idx(0) is the number of determinants that interact with key1 +! ! key1 should come from psi_det_sorted_ab. +! END_DOC +! integer, intent(in) :: Nint, sze +! integer(bit_kind), intent(in) :: key1(Nint,2,sze) +! integer(bit_kind), intent(in) :: key2(Nint,2) +! integer, intent(out) :: idx(0:sze) +! +! integer,intent(in) :: shortcut(0:sze+1) +! +! integer :: i,j,k,l +! integer :: degree_x2 +! integer :: i_alpha, i_beta, exc_a, exc_b, endloop +! integer(bit_kind) :: tmp1, tmp2 +! +! ASSERT (Nint > 0) +! ASSERT (sze >= 0) +! +! l=1 +! i_alpha = 0 +! +! if (Nint==1) then +! do while(shortcut(i_alpha+1) < sze) +! i_alpha = i_alpha + 1 +! exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) +! if(exc_a > 4) then +! cycle +! end if +! endloop = min(shortcut(i_alpha+1)-1, sze) +! if(exc_a == 4) then +! do i_beta = shortcut(i_alpha), endloop +! if(key1(1,2,i_beta) == key2(1,2)) then +! idx(l) = i_beta +! l = l + 1 +! exit +! end if +! end do +! else +! do i_beta = shortcut(i_alpha), endloop +! exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) +! if(exc_b + exc_a <= 4) then +! idx(l) = i_beta +! l = l + 1 +! end if +! end do +! end if +! end do +! else +! print *, "TBD : filter_connected_davidson_shortcut Nint>1" +! stop +! end if +! +! idx(0) = l-1 +! end +! +! subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Filters out the determinants that are not connected by H +! ! returns the array idx which contains the index of the +! ! determinants in the array key1 that interact +! ! via the H operator with key2. +! ! +! ! idx(0) is the number of determinants that interact with key1 +! ! key1 should come from psi_det_sorted_ab. +! END_DOC +! integer, intent(in) :: Nint, sze +! integer(bit_kind), intent(in) :: key1(Nint,2,sze) +! integer(bit_kind), intent(in) :: key2(Nint,2) +! integer, intent(inout) :: idx(0:sze) +! +! integer :: i,j,k,l +! integer :: degree_x2 +! integer :: j_int, j_start +! integer*8 :: itmp +! +! PROVIDE N_con_int det_connections +! +! +! ASSERT (Nint > 0) +! ASSERT (sze >= 0) +! +! l=1 +! +! if (Nint==1) then +! +! i = idx(0) ! lecture dans un intent(out) ? +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else if (Nint==2) then +! +! +! i = idx(0) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(2,1,j), key2(2,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) + & +! popcnt(xor( key1(2,2,j), key2(2,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else if (Nint==3) then +! +! i = idx(0) +! !DIR$ LOOP COUNT (1000) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) + & +! popcnt(xor( key1(2,1,j), key2(2,1))) + & +! popcnt(xor( key1(2,2,j), key2(2,2))) + & +! popcnt(xor( key1(3,1,j), key2(3,1))) + & +! popcnt(xor( key1(3,2,j), key2(3,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else +! +! i = idx(0) +! !DIR$ LOOP COUNT (1000) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = 0 +! !DEC$ LOOP COUNT MIN(4) +! do k=1,Nint +! degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& +! popcnt(xor( key1(k,2,j), key2(k,2))) +! if (degree_x2 > 4) then +! exit +! endif +! enddo +! if (degree_x2 <= 5) then +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! endif +! idx(0) = l-1 +! end subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) use bitmasks diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 72c1b9aa..33b3c45d 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -106,6 +106,7 @@ subroutine get_s2_u0_old(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) s2 += S_z2_Sz end + subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) implicit none use bitmasks @@ -114,35 +115,107 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) double precision, intent(in) :: psi_coefs_tmp(nmax) double precision, intent(out) :: s2 double precision :: s2_tmp - integer :: i,j,l,jj + integer :: i,j,l,jj,ii integer, allocatable :: idx(:) + + integer(bit_kind) :: psi_keys_srt(N_int,2,n) + integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp + + + print *, "totolacitrouille" + call write_time(6) + psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:) + call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int) + print *, "totolacitrouille 2" s2 = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,s2_tmp,idx) & - !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& + !$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp) & + !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)& !$OMP REDUCTION(+:s2) allocate(idx(0:n)) !$OMP DO SCHEDULE(dynamic) - do i=1,n - idx(0) = i - call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) - do jj=1,idx(0) - j = idx(jj) - if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & - > davidson_threshold ) then - call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp - endif - enddo + + do sh=1,shortcut(0) + warp(1,0) = 0 + do ii=1,sh!shortcut(0) + tmp = 0 + do ni=1,N_int + tmp = popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh)))) + end do + if(tmp <= 4) then + tmp = warp(1,0) + 1 + warp(1,0) = tmp + warp(1,tmp) = shortcut(ii) + warp(2,tmp) = shortcut(ii+1)-1 + end if + end do + + do ii=shortcut(sh),shortcut(sh+1)-1 + !do ii=1,n + idx(0) = ii + call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx) + i = sort_idx(ii) + do jj=1,idx(0) + j = sort_idx(idx(jj)) + if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & + > davidson_threshold ) then + call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp + endif + enddo + end do enddo !$OMP END DO deallocate(idx) !$OMP END PARALLEL s2 = s2+s2 do i=1,n - call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) + call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int) s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp enddo s2 = s2 + S_z2_Sz + print *, "totolacitrouille 3" + call write_time(6) end +! +! subroutine get_s2_u0_org(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) +! implicit none +! use bitmasks +! integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) +! integer, intent(in) :: n,nmax +! double precision, intent(in) :: psi_coefs_tmp(nmax) +! double precision, intent(out) :: s2 +! double precision :: s2_tmp +! integer :: i,j,l,jj +! integer, allocatable :: idx(:) +! s2 = 0.d0 +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,j,s2_tmp,idx) & +! !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& +! !$OMP REDUCTION(+:s2) +! allocate(idx(0:n)) +! !$OMP DO SCHEDULE(dynamic) +! do i=1,n +! idx(0) = i +! call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) +! do jj=1,idx(0) +! j = idx(jj) +! if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & +! > davidson_threshold ) then +! call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) +! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp +! endif +! enddo +! enddo +! !$OMP END DO +! deallocate(idx) +! !$OMP END PARALLEL +! s2 = s2+s2 +! do i=1,n +! call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) +! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp +! enddo +! s2 = s2 + S_z2_Sz +! end +! diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 06dcb9b7..1c8573dd 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1299,74 +1299,74 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) enddo end - -subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii,sh - integer :: i0, j0 - - - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) & - !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold) - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - v_0 = 0.d0 - !$OMP DO SCHEDULE(guided) - - - - - - do ii=1,n - idx(0) = ii - i = ii - call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx) - - do jj=1,idx(0) - j = idx(jj) - if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then - call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) - endif - enddo - enddo - - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL - do i=1,n - v_0(i) += H_jj(i) * u_0(i) - enddo -end - +! +! subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Computes v_0 = H|u_0> +! ! +! ! n : number of determinants +! ! +! ! H_jj : array of +! END_DOC +! integer, intent(in) :: n,Nint +! double precision, intent(out) :: v_0(n) +! double precision, intent(in) :: u_0(n) +! double precision, intent(in) :: H_jj(n) +! integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) +! integer, allocatable :: idx(:) +! double precision :: hij +! double precision, allocatable :: vt(:) +! integer :: i,j,k,l, jj,ii,sh +! integer :: i0, j0 +! +! +! +! ASSERT (Nint > 0) +! ASSERT (Nint == N_int) +! ASSERT (n>0) +! PROVIDE ref_bitmask_energy +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) & +! !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold) +! allocate(idx(0:n), vt(n)) +! Vt = 0.d0 +! v_0 = 0.d0 +! !$OMP DO SCHEDULE(guided) +! +! +! +! +! +! do ii=1,n +! idx(0) = ii +! i = ii +! call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx) +! +! do jj=1,idx(0) +! j = idx(jj) +! if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then +! call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) +! vt (i) = vt (i) + hij*u_0(j) +! vt (j) = vt (j) + hij*u_0(i) +! endif +! enddo +! enddo +! +! !$OMP END DO +! !$OMP CRITICAL +! do i=1,n +! v_0(i) = v_0(i) + vt(i) +! enddo +! !$OMP END CRITICAL +! deallocate(idx,vt) +! !$OMP END PARALLEL +! do i=1,n +! v_0(i) += H_jj(i) * u_0(i) +! enddo +! end +! BEGIN_PROVIDER [ integer, N_con_int ] @@ -1377,166 +1377,169 @@ BEGIN_PROVIDER [ integer, N_con_int ] N_con_int = 1 + ishft(N_det-1,-11) END_PROVIDER -BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ] - implicit none - BEGIN_DOC - ! Build connection proxy between determinants - END_DOC - integer :: i,j - integer :: degree - integer :: j_int, j_k, j_l - integer, allocatable :: idx(:) - integer :: thread_num - integer :: omp_get_thread_num - - PROVIDE progress_bar - call start_progress(N_det,'Det connections',0.d0) - - select case(N_int) - - case(1) - - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, & - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case(2) - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & - popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & - popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case(3) - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & - popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & - popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + & - popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + & - popcnt(xor( psi_det(3,2,i),psi_det(3,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case default - - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - !DIR$ FORCEINLINE - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if (degree < 3) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - end select - call stop_progress - -END_PROVIDER +! BEGIN_PROVIDER [ integer*8, det_connectionsqsd, (N_con_int,N_det) ] +! implicit none +! BEGIN_DOC +! ! Build connection proxy between determinants +! END_DOC +! integer :: i,j +! integer :: degree +! integer :: j_int, j_k, j_l +! integer, allocatable :: idx(:) +! integer :: thread_num +! integer :: omp_get_thread_num +! +! PROVIDE progress_bar +! +! print *,"totolabanane" +! +! call start_progress(N_det,'Det connections',0.d0) +! +! select case(N_int) +! +! case(1) +! +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, & +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & +! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) +! if (degree < 5) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! case(2) +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & +! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & +! popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & +! popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) +! if (degree < 5) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! case(3) +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & +! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & +! popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & +! popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + & +! popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + & +! popcnt(xor( psi_det(3,2,i),psi_det(3,2,j))) +! if (degree < 5) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! case default +! +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! !DIR$ FORCEINLINE +! call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) +! if (degree < 3) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! end select +! call stop_progress +! +! END_PROVIDER