From 3eec2f5683cd320d69f15fd492ebce20dd6cbc71 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sat, 31 Oct 2015 18:42:46 +0100 Subject: [PATCH 1/5] det_connections removed - some useless functions commented out --- plugins/MRCC_Utils/davidson.irp.f | 153 ++++---- src/Determinants/davidson.irp.f | 2 +- src/Determinants/filter_connected.irp.f | 391 ++++++++++---------- src/Determinants/s2.irp.f | 103 +++++- src/Determinants/slater_rules.irp.f | 463 ++++++++++++------------ 5 files changed, 594 insertions(+), 518 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index abf1b63a..33aa8ee5 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -108,7 +108,7 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin integer(bit_kind) :: dets_in_sorted(Nint,2,sze) integer :: idx(sze), shortcut(0:sze+1),sh,ii,tmp - PROVIDE det_connections + !PROVIDE det_connections call write_time(iunit) call wall_time(wall) @@ -430,7 +430,6 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) do ii=shortcut(sh),shortcut(sh+1)-1 idx(0) = ii - !call filter_connected_davidson_mwen(keys_tmp,shortcut,keys_tmp(1,1,ii),Nint,ii-1,idx) call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx) i = sort_idx(ii) @@ -470,79 +469,79 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) end - -subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint,istate - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii - integer :: i0, j0 - - - - - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy delta_ij - integer, parameter :: block_size = 157 - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) & - !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij) - !$OMP DO SCHEDULE(static) - do i=1,n - v_0(i) = H_jj(i) * u_0(i) - enddo - !$OMP END DO - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - !$OMP DO SCHEDULE(guided) - do i=1,n - idx(0) = i - call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) - do jj=1,idx(0) - j = idx(jj) -! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) - hij = hij - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) -! endif - enddo - enddo - !$OMP END DO - - !$OMP DO SCHEDULE(guided) - do ii=1,n_det_ref - i = idx_ref(ii) - do jj = 1, n_det_non_ref - j = idx_non_ref(jj) - vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) - vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) - enddo - enddo - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL -end +! +! subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Computes v_0 = H|u_0> +! ! +! ! n : number of determinants +! ! +! ! H_jj : array of +! END_DOC +! integer, intent(in) :: n,Nint,istate +! double precision, intent(out) :: v_0(n) +! double precision, intent(in) :: u_0(n) +! double precision, intent(in) :: H_jj(n) +! integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) +! integer, allocatable :: idx(:) +! double precision :: hij +! double precision, allocatable :: vt(:) +! integer :: i,j,k,l, jj,ii +! integer :: i0, j0 +! +! +! +! +! +! ASSERT (Nint > 0) +! ASSERT (Nint == N_int) +! ASSERT (n>0) +! PROVIDE ref_bitmask_energy delta_ij +! integer, parameter :: block_size = 157 +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) & +! !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij) +! !$OMP DO SCHEDULE(static) +! do i=1,n +! v_0(i) = H_jj(i) * u_0(i) +! enddo +! !$OMP END DO +! allocate(idx(0:n), vt(n)) +! Vt = 0.d0 +! !$OMP DO SCHEDULE(guided) +! do i=1,n +! idx(0) = i +! call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) +! do jj=1,idx(0) +! j = idx(jj) +! ! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then +! call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) +! hij = hij +! vt (i) = vt (i) + hij*u_0(j) +! vt (j) = vt (j) + hij*u_0(i) +! ! endif +! enddo +! enddo +! !$OMP END DO +! +! !$OMP DO SCHEDULE(guided) +! do ii=1,n_det_ref +! i = idx_ref(ii) +! do jj = 1, n_det_non_ref +! j = idx_non_ref(jj) +! vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) +! vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) +! enddo +! enddo +! !$OMP END DO +! !$OMP CRITICAL +! do i=1,n +! v_0(i) = v_0(i) + vt(i) +! enddo +! !$OMP END CRITICAL +! deallocate(idx,vt) +! !$OMP END PARALLEL +! end diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index 03a699c7..b017570a 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -288,7 +288,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun integer(bit_kind) :: dets_in_sorted(Nint, 2, sze) integer :: idx(sze), shortcut(0:sze+1) - PROVIDE det_connections + !PROVIDE det_connections call write_time(iunit) call wall_time(wall) diff --git a/src/Determinants/filter_connected.irp.f b/src/Determinants/filter_connected.irp.f index ed4f092e..3028e32a 100644 --- a/src/Determinants/filter_connected.irp.f +++ b/src/Determinants/filter_connected.irp.f @@ -147,6 +147,7 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) end do idx(l) = i_beta l = l + 1 + exit beta_loop end do beta_loop else do i_beta=warp(1,i_alpha),endloop @@ -190,201 +191,201 @@ subroutine filter_connected_davidson_warp(key1,warp,key2,Nint,sze,idx) end -subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer,intent(in) :: shortcut(0:sze+1) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: i_alpha, i_beta, exc_a, exc_b, endloop - integer(bit_kind) :: tmp1, tmp2 - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - i_alpha = 0 - - if (Nint==1) then - do while(shortcut(i_alpha+1) < sze) - i_alpha = i_alpha + 1 - exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) - if(exc_a > 4) then - cycle - end if - endloop = min(shortcut(i_alpha+1)-1, sze) - if(exc_a == 4) then - do i_beta = shortcut(i_alpha), endloop - if(key1(1,2,i_beta) == key2(1,2)) then - idx(l) = i_beta - l = l + 1 - exit - end if - end do - else - do i_beta = shortcut(i_alpha), endloop - exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) - if(exc_b + exc_a <= 4) then - idx(l) = i_beta - l = l + 1 - end if - end do - end if - end do - else - print *, "TBD : filter_connected_davidson_shortcut Nint>1" - stop - end if - - idx(0) = l-1 -end - -subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) - use bitmasks - implicit none - BEGIN_DOC - ! Filters out the determinants that are not connected by H - ! returns the array idx which contains the index of the - ! determinants in the array key1 that interact - ! via the H operator with key2. - ! - ! idx(0) is the number of determinants that interact with key1 - ! key1 should come from psi_det_sorted_ab. - END_DOC - integer, intent(in) :: Nint, sze - integer(bit_kind), intent(in) :: key1(Nint,2,sze) - integer(bit_kind), intent(in) :: key2(Nint,2) - integer, intent(out) :: idx(0:sze) - - integer :: i,j,k,l - integer :: degree_x2 - integer :: j_int, j_start - integer*8 :: itmp - - PROVIDE N_con_int det_connections - - - ASSERT (Nint > 0) - ASSERT (sze >= 0) - - l=1 - - if (Nint==1) then - - i = idx(0) ! lecture dans un intent(out) ? - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else if (Nint==2) then - - - i = idx(0) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(2,1,j), key2(2,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) + & - popcnt(xor( key1(2,2,j), key2(2,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else if (Nint==3) then - - i = idx(0) - !DIR$ LOOP COUNT (1000) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & - popcnt(xor( key1(1,2,j), key2(1,2))) + & - popcnt(xor( key1(2,1,j), key2(2,1))) + & - popcnt(xor( key1(2,2,j), key2(2,2))) + & - popcnt(xor( key1(3,1,j), key2(3,1))) + & - popcnt(xor( key1(3,2,j), key2(3,2))) - if (degree_x2 > 4) then - cycle - else - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - else - - i = idx(0) - !DIR$ LOOP COUNT (1000) - do j_int=1,N_con_int - itmp = det_connections(j_int,i) - do while (itmp /= 0_8) - j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) - do j = j_start+1, min(j_start+32,i-1) - degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) - do k=1,Nint - degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& - popcnt(xor( key1(k,2,j), key2(k,2))) - if (degree_x2 > 4) then - exit - endif - enddo - if (degree_x2 <= 5) then - idx(l) = j - l = l+1 - endif - enddo - itmp = iand(itmp-1_8,itmp) - enddo - enddo - - endif - idx(0) = l-1 -end +! subroutine filter_connected_davidson_shortcut(key1,shortcut,key2,Nint,sze,idx) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Filters out the determinants that are not connected by H +! ! returns the array idx which contains the index of the +! ! determinants in the array key1 that interact +! ! via the H operator with key2. +! ! +! ! idx(0) is the number of determinants that interact with key1 +! ! key1 should come from psi_det_sorted_ab. +! END_DOC +! integer, intent(in) :: Nint, sze +! integer(bit_kind), intent(in) :: key1(Nint,2,sze) +! integer(bit_kind), intent(in) :: key2(Nint,2) +! integer, intent(out) :: idx(0:sze) +! +! integer,intent(in) :: shortcut(0:sze+1) +! +! integer :: i,j,k,l +! integer :: degree_x2 +! integer :: i_alpha, i_beta, exc_a, exc_b, endloop +! integer(bit_kind) :: tmp1, tmp2 +! +! ASSERT (Nint > 0) +! ASSERT (sze >= 0) +! +! l=1 +! i_alpha = 0 +! +! if (Nint==1) then +! do while(shortcut(i_alpha+1) < sze) +! i_alpha = i_alpha + 1 +! exc_a = popcnt(xor(key1(1,1,shortcut(i_alpha)), key2(1,1))) +! if(exc_a > 4) then +! cycle +! end if +! endloop = min(shortcut(i_alpha+1)-1, sze) +! if(exc_a == 4) then +! do i_beta = shortcut(i_alpha), endloop +! if(key1(1,2,i_beta) == key2(1,2)) then +! idx(l) = i_beta +! l = l + 1 +! exit +! end if +! end do +! else +! do i_beta = shortcut(i_alpha), endloop +! exc_b = popcnt(xor(key1(1,2,i_beta), key2(1,2))) +! if(exc_b + exc_a <= 4) then +! idx(l) = i_beta +! l = l + 1 +! end if +! end do +! end if +! end do +! else +! print *, "TBD : filter_connected_davidson_shortcut Nint>1" +! stop +! end if +! +! idx(0) = l-1 +! end +! +! subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Filters out the determinants that are not connected by H +! ! returns the array idx which contains the index of the +! ! determinants in the array key1 that interact +! ! via the H operator with key2. +! ! +! ! idx(0) is the number of determinants that interact with key1 +! ! key1 should come from psi_det_sorted_ab. +! END_DOC +! integer, intent(in) :: Nint, sze +! integer(bit_kind), intent(in) :: key1(Nint,2,sze) +! integer(bit_kind), intent(in) :: key2(Nint,2) +! integer, intent(inout) :: idx(0:sze) +! +! integer :: i,j,k,l +! integer :: degree_x2 +! integer :: j_int, j_start +! integer*8 :: itmp +! +! PROVIDE N_con_int det_connections +! +! +! ASSERT (Nint > 0) +! ASSERT (sze >= 0) +! +! l=1 +! +! if (Nint==1) then +! +! i = idx(0) ! lecture dans un intent(out) ? +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else if (Nint==2) then +! +! +! i = idx(0) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(2,1,j), key2(2,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) + & +! popcnt(xor( key1(2,2,j), key2(2,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else if (Nint==3) then +! +! i = idx(0) +! !DIR$ LOOP COUNT (1000) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & +! popcnt(xor( key1(1,2,j), key2(1,2))) + & +! popcnt(xor( key1(2,1,j), key2(2,1))) + & +! popcnt(xor( key1(2,2,j), key2(2,2))) + & +! popcnt(xor( key1(3,1,j), key2(3,1))) + & +! popcnt(xor( key1(3,2,j), key2(3,2))) +! if (degree_x2 > 4) then +! cycle +! else +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! else +! +! i = idx(0) +! !DIR$ LOOP COUNT (1000) +! do j_int=1,N_con_int +! itmp = det_connections(j_int,i) +! do while (itmp /= 0_8) +! j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) +! do j = j_start+1, min(j_start+32,i-1) +! degree_x2 = 0 +! !DEC$ LOOP COUNT MIN(4) +! do k=1,Nint +! degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& +! popcnt(xor( key1(k,2,j), key2(k,2))) +! if (degree_x2 > 4) then +! exit +! endif +! enddo +! if (degree_x2 <= 5) then +! idx(l) = j +! l = l+1 +! endif +! enddo +! itmp = iand(itmp-1_8,itmp) +! enddo +! enddo +! +! endif +! idx(0) = l-1 +! end subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) use bitmasks diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 72c1b9aa..33b3c45d 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -106,6 +106,7 @@ subroutine get_s2_u0_old(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) s2 += S_z2_Sz end + subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) implicit none use bitmasks @@ -114,35 +115,107 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) double precision, intent(in) :: psi_coefs_tmp(nmax) double precision, intent(out) :: s2 double precision :: s2_tmp - integer :: i,j,l,jj + integer :: i,j,l,jj,ii integer, allocatable :: idx(:) + + integer(bit_kind) :: psi_keys_srt(N_int,2,n) + integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp + + + print *, "totolacitrouille" + call write_time(6) + psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:) + call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int) + print *, "totolacitrouille 2" s2 = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,s2_tmp,idx) & - !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& + !$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp) & + !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)& !$OMP REDUCTION(+:s2) allocate(idx(0:n)) !$OMP DO SCHEDULE(dynamic) - do i=1,n - idx(0) = i - call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) - do jj=1,idx(0) - j = idx(jj) - if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & - > davidson_threshold ) then - call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp - endif - enddo + + do sh=1,shortcut(0) + warp(1,0) = 0 + do ii=1,sh!shortcut(0) + tmp = 0 + do ni=1,N_int + tmp = popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh)))) + end do + if(tmp <= 4) then + tmp = warp(1,0) + 1 + warp(1,0) = tmp + warp(1,tmp) = shortcut(ii) + warp(2,tmp) = shortcut(ii+1)-1 + end if + end do + + do ii=shortcut(sh),shortcut(sh+1)-1 + !do ii=1,n + idx(0) = ii + call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx) + i = sort_idx(ii) + do jj=1,idx(0) + j = sort_idx(idx(jj)) + if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & + > davidson_threshold ) then + call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp + endif + enddo + end do enddo !$OMP END DO deallocate(idx) !$OMP END PARALLEL s2 = s2+s2 do i=1,n - call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) + call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int) s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp enddo s2 = s2 + S_z2_Sz + print *, "totolacitrouille 3" + call write_time(6) end +! +! subroutine get_s2_u0_org(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) +! implicit none +! use bitmasks +! integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) +! integer, intent(in) :: n,nmax +! double precision, intent(in) :: psi_coefs_tmp(nmax) +! double precision, intent(out) :: s2 +! double precision :: s2_tmp +! integer :: i,j,l,jj +! integer, allocatable :: idx(:) +! s2 = 0.d0 +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,j,s2_tmp,idx) & +! !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& +! !$OMP REDUCTION(+:s2) +! allocate(idx(0:n)) +! !$OMP DO SCHEDULE(dynamic) +! do i=1,n +! idx(0) = i +! call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) +! do jj=1,idx(0) +! j = idx(jj) +! if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & +! > davidson_threshold ) then +! call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) +! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp +! endif +! enddo +! enddo +! !$OMP END DO +! deallocate(idx) +! !$OMP END PARALLEL +! s2 = s2+s2 +! do i=1,n +! call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) +! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp +! enddo +! s2 = s2 + S_z2_Sz +! end +! diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 06dcb9b7..1c8573dd 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1299,74 +1299,74 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) enddo end - -subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii,sh - integer :: i0, j0 - - - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) & - !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold) - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - v_0 = 0.d0 - !$OMP DO SCHEDULE(guided) - - - - - - do ii=1,n - idx(0) = ii - i = ii - call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx) - - do jj=1,idx(0) - j = idx(jj) - if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then - call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) - endif - enddo - enddo - - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL - do i=1,n - v_0(i) += H_jj(i) * u_0(i) - enddo -end - +! +! subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Computes v_0 = H|u_0> +! ! +! ! n : number of determinants +! ! +! ! H_jj : array of +! END_DOC +! integer, intent(in) :: n,Nint +! double precision, intent(out) :: v_0(n) +! double precision, intent(in) :: u_0(n) +! double precision, intent(in) :: H_jj(n) +! integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) +! integer, allocatable :: idx(:) +! double precision :: hij +! double precision, allocatable :: vt(:) +! integer :: i,j,k,l, jj,ii,sh +! integer :: i0, j0 +! +! +! +! ASSERT (Nint > 0) +! ASSERT (Nint == N_int) +! ASSERT (n>0) +! PROVIDE ref_bitmask_energy +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) & +! !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold) +! allocate(idx(0:n), vt(n)) +! Vt = 0.d0 +! v_0 = 0.d0 +! !$OMP DO SCHEDULE(guided) +! +! +! +! +! +! do ii=1,n +! idx(0) = ii +! i = ii +! call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx) +! +! do jj=1,idx(0) +! j = idx(jj) +! if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then +! call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) +! vt (i) = vt (i) + hij*u_0(j) +! vt (j) = vt (j) + hij*u_0(i) +! endif +! enddo +! enddo +! +! !$OMP END DO +! !$OMP CRITICAL +! do i=1,n +! v_0(i) = v_0(i) + vt(i) +! enddo +! !$OMP END CRITICAL +! deallocate(idx,vt) +! !$OMP END PARALLEL +! do i=1,n +! v_0(i) += H_jj(i) * u_0(i) +! enddo +! end +! BEGIN_PROVIDER [ integer, N_con_int ] @@ -1377,166 +1377,169 @@ BEGIN_PROVIDER [ integer, N_con_int ] N_con_int = 1 + ishft(N_det-1,-11) END_PROVIDER -BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ] - implicit none - BEGIN_DOC - ! Build connection proxy between determinants - END_DOC - integer :: i,j - integer :: degree - integer :: j_int, j_k, j_l - integer, allocatable :: idx(:) - integer :: thread_num - integer :: omp_get_thread_num - - PROVIDE progress_bar - call start_progress(N_det,'Det connections',0.d0) - - select case(N_int) - - case(1) - - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, & - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case(2) - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & - popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & - popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case(3) - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & - popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & - popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & - popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + & - popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + & - popcnt(xor( psi_det(3,2,i),psi_det(3,2,j))) - if (degree < 5) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - case default - - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& - !$OMP progress_bar,progress_value)& - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) - !$ thread_num = omp_get_thread_num() - allocate (idx(0:N_det)) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - if (thread_num == 0) then - progress_bar(1) = i - progress_value = dble(i) - endif - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,11) - do j_l = j_k,min(j_k+2047,N_det), 32 - do j = j_l+1,min(j_l+32,i) - !DIR$ FORCEINLINE - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if (degree < 3) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - deallocate(idx) - !$OMP END PARALLEL - - end select - call stop_progress - -END_PROVIDER +! BEGIN_PROVIDER [ integer*8, det_connectionsqsd, (N_con_int,N_det) ] +! implicit none +! BEGIN_DOC +! ! Build connection proxy between determinants +! END_DOC +! integer :: i,j +! integer :: degree +! integer :: j_int, j_k, j_l +! integer, allocatable :: idx(:) +! integer :: thread_num +! integer :: omp_get_thread_num +! +! PROVIDE progress_bar +! +! print *,"totolabanane" +! +! call start_progress(N_det,'Det connections',0.d0) +! +! select case(N_int) +! +! case(1) +! +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, & +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & +! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) +! if (degree < 5) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! case(2) +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & +! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & +! popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & +! popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) +! if (degree < 5) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! case(3) +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & +! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & +! popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & +! popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + & +! popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + & +! popcnt(xor( psi_det(3,2,i),psi_det(3,2,j))) +! if (degree < 5) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! case default +! +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& +! !$OMP progress_bar,progress_value)& +! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) +! !$ thread_num = omp_get_thread_num() +! allocate (idx(0:N_det)) +! !$OMP DO SCHEDULE(guided) +! do i=1,N_det +! if (thread_num == 0) then +! progress_bar(1) = i +! progress_value = dble(i) +! endif +! do j_int=1,N_con_int +! det_connections(j_int,i) = 0_8 +! j_k = ishft(j_int-1,11) +! do j_l = j_k,min(j_k+2047,N_det), 32 +! do j = j_l+1,min(j_l+32,i) +! !DIR$ FORCEINLINE +! call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) +! if (degree < 3) then +! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) +! exit +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP ENDDO +! deallocate(idx) +! !$OMP END PARALLEL +! +! end select +! call stop_progress +! +! END_PROVIDER From 67836776e1646f008bbab9da48f3f121e011e525 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Sun, 1 Nov 2015 08:57:01 +0100 Subject: [PATCH 2/5] optimized H_u_0_mrcc and get_s2_u0 - some unused functions commented out --- plugins/MRCC_Utils/davidson.irp.f | 132 +++++++++++++++++- src/Determinants/davidson.irp.f | 24 ++-- src/Determinants/s2.irp.f | 224 ++++++++++++++++++++++-------- 3 files changed, 307 insertions(+), 73 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 33aa8ee5..1757305c 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -215,8 +215,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin ! ---------------------- do k=1,N_st - call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate) + !call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in_sorted,shortcut,idx,Nint,istate) !call H_u_0_mrcc_org(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) + call H_u_0_mrcc(W(1,k,iter),U(1,k,iter),H_jj,sze,dets_in,Nint,istate) enddo ! Compute h_kl = = @@ -368,7 +369,9 @@ subroutine davidson_diag_hjj_mrcc(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nin abort_here = abort_all end -subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) + + +subroutine H_u_0_mrcc_myold(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) use bitmasks implicit none BEGIN_DOC @@ -449,6 +452,131 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) + !$OMP DO SCHEDULE(guided) + do ii=1,n_det_ref + i = idx_ref(ii) + do jj = 1, n_det_non_ref + j = idx_non_ref(jj) + vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) + vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) + enddo + enddo + !$OMP END DO + !$OMP CRITICAL + do i=1,n + v_0(i) = v_0(i) + vt(i) + enddo + !$OMP END CRITICAL + deallocate(idx,vt) + !$OMP END PARALLEL +end + + +subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) + use bitmasks + implicit none + BEGIN_DOC + ! Computes v_0 = H|u_0> + ! + ! n : number of determinants + ! + ! H_jj : array of + END_DOC + integer, intent(in) :: n,Nint,istate + double precision, intent(out) :: v_0(n) + double precision, intent(in) :: u_0(n) + double precision, intent(in) :: H_jj(n) + integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) + integer, allocatable :: idx(:) + double precision :: hij + double precision, allocatable :: vt(:) + integer :: i,j,k,l, jj,ii + integer :: i0, j0 + + integer :: shortcut(0:n+1), sort_idx(n) + integer(bit_kind) :: sorted(Nint,n), version(Nint,n) + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass +! + + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + ASSERT (n>0) + PROVIDE ref_bitmask_energy delta_ij + integer, parameter :: block_size = 157 + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) & + !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,sorted,shortcut,sort_idx,version) + + + + !$OMP DO SCHEDULE(static) + do i=1,n + v_0(i) = H_jj(i) * u_0(i) + enddo + !$OMP END DO + + allocate(idx(0:n), vt(n)) + Vt = 0.d0 + + do pass=1,2 + if(pass == 1) then + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + else + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + end if + + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + + if(pass == 2) then + endi = sh + else + endi = 1 + end if + + do sh2=endi,sh + exa = 0 + do ni=1,Nint + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 + end if + + do j=shortcut(sh2),endi + ext = exa + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + if ( (dabs(u_0(org_j)) > 1.d-7).or.((dabs(u_0(org_i)) > 1.d-7)) ) then + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + endif + end if + end do + end do + end do + enddo + !$OMP END DO + end do + + + + !$OMP DO SCHEDULE(guided) do ii=1,n_det_ref i = idx_ref(ii) diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index b017570a..ebc30e99 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -121,35 +121,35 @@ subroutine tamiser(key, idx, no, n, Nint, N_key) end subroutine -subroutine sort_dets_ba_v(key, idx, shortcut, version, N_key, Nint) +subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint) use bitmasks implicit none - integer(bit_kind),intent(inout) :: key(Nint,2,N_key) + integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) + integer(bit_kind) :: key(Nint,2,N_key) + integer(bit_kind),intent(out) :: key_out(Nint,N_key) integer,intent(out) :: idx(N_key) integer,intent(out) :: shortcut(0:N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1) integer, intent(in) :: Nint, N_key integer(bit_kind) :: tmp(Nint, 2,N_key) - tmp(:,1,:N_key) = key(:,2,:N_key) - tmp(:,2,:N_key) = key(:,1,:N_key) + key(:,1,:N_key) = key_in(:,2,:N_key) + key(:,2,:N_key) = key_in(:,1,:N_key) - call sort_dets_ab_v(tmp, idx, shortcut, version, N_key, Nint) - - - key(:,1,:N_key) = tmp(:,2,:N_key) - key(:,2,:N_key) = tmp(:,1,:N_key) + call sort_dets_ab_v(key, key_out, idx, shortcut, version, N_key, Nint) end subroutine -subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) +subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) use bitmasks implicit none - integer(bit_kind),intent(inout) :: key(Nint,2,N_key) + integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) + integer(bit_kind) :: key(Nint,2,N_key) + integer(bit_kind),intent(out) :: key_out(Nint,N_key) integer,intent(out) :: idx(N_key) integer,intent(out) :: shortcut(0:N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1) @@ -157,6 +157,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) integer(bit_kind) :: tmp(Nint, 2) integer :: tmpidx,i,ni + key(:,:,:) = key_in(:,:,:) do i=1,N_key idx(i) = i end do @@ -188,6 +189,7 @@ subroutine sort_dets_ab_v(key, idx, shortcut, version, N_key, Nint) end do end do shortcut(shortcut(0)+1) = N_key+1 + key_out(:,:) = key(:,2,:) end subroutine c diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 33b3c45d..89d0a72b 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -106,78 +106,182 @@ subroutine get_s2_u0_old(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) s2 += S_z2_Sz end - subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) - implicit none - use bitmasks - integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) - integer, intent(in) :: n,nmax - double precision, intent(in) :: psi_coefs_tmp(nmax) - double precision, intent(out) :: s2 - double precision :: s2_tmp - integer :: i,j,l,jj,ii - integer, allocatable :: idx(:) + implicit none + use bitmasks + integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) + integer, intent(in) :: n,nmax + double precision, intent(in) :: psi_coefs_tmp(nmax) + double precision, intent(out) :: s2 + double precision :: s2_tmp + integer :: i,j,l,jj,ii + integer, allocatable :: idx(:) - integer(bit_kind) :: psi_keys_srt(N_int,2,n) - integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp - - - print *, "totolacitrouille" - call write_time(6) - psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:) - call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int) - print *, "totolacitrouille 2" - s2 = 0.d0 - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp) & - !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)& - !$OMP REDUCTION(+:s2) - allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) - - do sh=1,shortcut(0) - warp(1,0) = 0 - do ii=1,sh!shortcut(0) - tmp = 0 - do ni=1,N_int - tmp = popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh)))) - end do - if(tmp <= 4) then - tmp = warp(1,0) + 1 - warp(1,0) = tmp - warp(1,tmp) = shortcut(ii) - warp(2,tmp) = shortcut(ii+1)-1 - end if - end do + integer :: shortcut(0:n+1), sort_idx(n) + integer(bit_kind) :: sorted(N_int,n), version(N_int,n) + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass + double precision :: davidson_threshold_bis + + !PROVIDE davidson_threshold + + s2 = 0.d0 + davidson_threshold_bis = davidson_threshold + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,s2_tmp,idx,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) & + !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)& + !$OMP REDUCTION(+:s2) + allocate(idx(0:n)) + + do pass=1,2 + if(pass == 1) then + call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + else + call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + end if - do ii=shortcut(sh),shortcut(sh+1)-1 - !do ii=1,n - idx(0) = ii - call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx) - i = sort_idx(ii) - do jj=1,idx(0) - j = sort_idx(idx(jj)) - if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & - > davidson_threshold ) then - call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp - endif - enddo - end do - enddo - !$OMP END DO + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + + if(pass == 2) then + endi = sh + else + endi = 1 + end if + + do sh2=endi,sh + exa = 0 + do ni=1,N_int + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle + end if + + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 + end if + + do j=shortcut(sh2),endi + ext = exa + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + + end if + end do + end do + end do + enddo + !$OMP END DO + end do deallocate(idx) !$OMP END PARALLEL s2 = s2+s2 do i=1,n - call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int) + call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp enddo s2 = s2 + S_z2_Sz - print *, "totolacitrouille 3" - call write_time(6) end + + + +! subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) +! implicit none +! use bitmasks +! integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) +! integer, intent(in) :: n,nmax +! double precision, intent(in) :: psi_coefs_tmp(nmax) +! double precision, intent(out) :: s2 +! double precision :: s2_tmp +! integer :: i,j,l,jj,ii +! integer, allocatable :: idx(:) +! +! integer(bit_kind) :: psi_keys_srt(N_int,2,n) +! integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp +! integer :: mon, bie, egz +! +! +! psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:) +! call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int) +! +! s2 = 0.d0 +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp,mon,bie,egz) & +! !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)& +! !$OMP REDUCTION(+:s2) +! allocate(idx(0:n)) +! !$OMP DO SCHEDULE(dynamic) +! +! do sh=1,shortcut(0) +! mon = 0 +! bie = 0 +! +! warp(1,0) = 0 +! do ii=1,sh!shortcut(0) +! tmp = 0 +! do ni=1,N_int +! tmp += popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh)))) +! end do +! egz = tmp +! if(tmp <= 4) then +! tmp = warp(1,0) + 1 +! warp(1,0) = tmp +! warp(1,tmp) = shortcut(ii) +! warp(2,tmp) = shortcut(ii+1)-1 +! if(egz == 4) then +! bie = bie + shortcut(ii+1) - shortcut(ii) +! else +! mon = mon + shortcut(ii+1) - shortcut(ii) +! end if +! end if +! end do +! +! if(shortcut(sh+1) - shortcut(sh) /= 1) then +! print *, shortcut(sh+1) - shortcut(sh), shortcut(sh+1), mon, bie +! end if +! +! do ii=shortcut(sh),shortcut(sh+1)-1 +! !do ii=1,n +! idx(0) = ii +! call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx) +! i = sort_idx(ii) +! do jj=1,idx(0) +! j = sort_idx(idx(jj)) +! if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & +! > davidson_threshold ) then +! call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int) +! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp +! endif +! enddo +! end do +! enddo +! !$OMP END DO +! deallocate(idx) +! !$OMP END PARALLEL +! s2 = s2+s2 +! do i=1,n +! call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int) +! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp +! enddo +! s2 = s2 + S_z2_Sz +! end + ! ! subroutine get_s2_u0_org(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) ! implicit none From 4eed037ecf8a1d2828b19845525fe33655c7f645 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Mon, 2 Nov 2015 20:56:02 +0100 Subject: [PATCH 3/5] corrected parallelism bug --- plugins/MRCC_Utils/davidson.irp.f | 103 +++++++++++++++------------ src/Determinants/davidson.irp.f | 1 + src/Determinants/s2.irp.f | 112 +++++++++++++++++------------- 3 files changed, 123 insertions(+), 93 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 1757305c..9216877c 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -495,6 +495,8 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) integer :: shortcut(0:n+1), sort_idx(n) integer(bit_kind) :: sorted(Nint,n), version(Nint,n) + + integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass ! @@ -520,61 +522,72 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) allocate(idx(0:n), vt(n)) Vt = 0.d0 - do pass=1,2 - if(pass == 1) then - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - else - call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - end if - + + !$OMP SINGLE + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0) - - if(pass == 2) then - endi = sh - else - endi = 1 + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do sh2=1,sh + exa = 0 + do ni=1,Nint + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle end if - do sh2=endi,sh - exa = 0 - do ni=1,Nint - exa += popcnt(xor(version(ni,sh), version(ni,sh2))) - end do - if(exa > 2) then - cycle + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 end if - do i=shortcut(sh),shortcut(sh+1)-1 - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1)-1 - end if - - do j=shortcut(sh2),endi - ext = exa - do ni=1,Nint - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext <= 4) then - org_i = sort_idx(i) - org_j = sort_idx(j) - if ( (dabs(u_0(org_j)) > 1.d-7).or.((dabs(u_0(org_i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - vt (org_i) = vt (org_i) + hij*u_0(org_j) - vt (org_j) = vt (org_j) + hij*u_0(org_i) - endif - end if + do j=shortcut(sh2),endi + ext = exa + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if end do end do - enddo - !$OMP END DO end do - + enddo + !$OMP END DO + !$OMP SINGLE + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,Nint + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + end if + end do + end do + enddo + !$OMP END DO !$OMP DO SCHEDULE(guided) diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index ebc30e99..5e931f14 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -178,6 +178,7 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) shortcut(0) = 1 shortcut(1) = 1 + version(:,1) = key(:,1,1) do i=2,N_key do ni=1,nint if(key(ni,1,i) /= key(ni,1,i-1)) then diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 89d0a72b..c11b2161 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -132,62 +132,78 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) !$OMP REDUCTION(+:s2) allocate(idx(0:n)) - do pass=1,2 - if(pass == 1) then - call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) - else - call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + + !$OMP SINGLE + call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + + do sh2=1,sh + exa = 0 + do ni=1,N_int + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle end if - - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0) - - if(pass == 2) then - endi = sh - else - endi = 1 - end if - - do sh2=endi,sh - exa = 0 - do ni=1,N_int - exa += popcnt(xor(version(ni,sh), version(ni,sh2))) - end do - if(exa > 2) then - cycle + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 end if - do i=shortcut(sh),shortcut(sh+1)-1 - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1)-1 - end if - - do j=shortcut(sh2),endi - ext = exa - do ni=1,N_int - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext <= 4) then - org_i = sort_idx(i) - org_j = sort_idx(j) - - - if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & - > davidson_threshold ) then - call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp - endif - - end if + do j=shortcut(sh2),endi + ext = exa + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if end do end do - enddo - !$OMP END DO end do + enddo + !$OMP END DO + + !$OMP SINGLE + call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + !$OMP END SINGLE + + !$OMP DO SCHEDULE(dynamic) + do sh=1,shortcut(0) + do i=shortcut(sh),shortcut(sh+1)-1 + do j=shortcut(sh),i-1 + ext = 0 + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if + end do + end do + enddo + !$OMP END DO + deallocate(idx) !$OMP END PARALLEL s2 = s2+s2 From c3bbbd60ded73cd3f217adf389380768aa696479 Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Nov 2015 11:37:13 +0100 Subject: [PATCH 4/5] corrected bug in H_u_0_mrcc and get_s2_u0 --- plugins/MRCC_Utils/davidson.irp.f | 2 +- src/Determinants/s2.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index 9216877c..f1cff31a 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -577,7 +577,7 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) do ni=1,Nint ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) end do - if(ext <= 4) then + if(ext == 4) then org_i = sort_idx(i) org_j = sort_idx(j) call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index c11b2161..252810f3 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -189,7 +189,7 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) do ni=1,N_int ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) end do - if(ext <= 4) then + if(ext == 4) then org_i = sort_idx(i) org_j = sort_idx(j) From 238a5d6dd6877431a05f3b63002a183174f93a8a Mon Sep 17 00:00:00 2001 From: Yann Garniron Date: Wed, 4 Nov 2015 16:21:21 +0100 Subject: [PATCH 5/5] removed some unused functions --- plugins/MRCC_Utils/davidson.irp.f | 178 -------------------- src/Determinants/davidson.irp.f | 1 - src/Determinants/s2.irp.f | 124 -------------- src/Determinants/slater_rules.irp.f | 243 ---------------------------- 4 files changed, 546 deletions(-) diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f index f1cff31a..db2fe26e 100644 --- a/plugins/MRCC_Utils/davidson.irp.f +++ b/plugins/MRCC_Utils/davidson.irp.f @@ -371,107 +371,6 @@ end -subroutine H_u_0_mrcc_myold(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint,istate) - use bitmasks - implicit none - BEGIN_DOC - ! Computes v_0 = H|u_0> - ! - ! n : number of determinants - ! - ! H_jj : array of - END_DOC - integer, intent(in) :: n,Nint,istate - double precision, intent(out) :: v_0(n) - double precision, intent(in) :: u_0(n) - double precision, intent(in) :: H_jj(n) - integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - integer, allocatable :: idx(:) - double precision :: hij - double precision, allocatable :: vt(:) - integer :: i,j,k,l, jj,ii - integer :: i0, j0 - - integer,intent(in) :: shortcut(0:n+1), sort_idx(n) - integer :: tmp, warp(2,0:n+1), sh, ni -! - - ASSERT (Nint > 0) - ASSERT (Nint == N_int) - ASSERT (n>0) - PROVIDE ref_bitmask_energy delta_ij - integer, parameter :: block_size = 157 - - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,warp,tmp,sh) & - !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij,shortcut,sort_idx) - - !$OMP DO SCHEDULE(static) - do i=1,n - v_0(i) = H_jj(i) * u_0(i) - enddo - !$OMP END DO - - allocate(idx(0:n), vt(n)) - Vt = 0.d0 - !$OMP DO SCHEDULE(dynamic) - do sh=1,shortcut(0) - warp(1,0) = 0 - do ii=1,sh!shortcut(0) - tmp = 0 - do ni=1,Nint - tmp = popcnt(xor(keys_tmp(ni,1, shortcut(ii)), keys_tmp(ni,1,shortcut(sh)))) - end do - if(tmp <= 4) then - tmp = warp(1,0) + 1 - warp(1,0) = tmp - warp(1,tmp) = shortcut(ii) - warp(2,tmp) = shortcut(ii+1)-1 - end if - end do - - do ii=shortcut(sh),shortcut(sh+1)-1 - idx(0) = ii - !call filter_connected_davidson_mwen(keys_tmp,shortcut,keys_tmp(1,1,ii),Nint,ii-1,idx) - call filter_connected_davidson_warp(keys_tmp,warp,keys_tmp(1,1,ii),Nint,ii-1,idx) - i = sort_idx(ii) - - do jj=1,idx(0) - j = sort_idx(idx(jj)) - !j = idx(jj) - if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then - call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) - endif - enddo - enddo - enddo - !$OMP END DO - - - - !$OMP DO SCHEDULE(guided) - do ii=1,n_det_ref - i = idx_ref(ii) - do jj = 1, n_det_non_ref - j = idx_non_ref(jj) - vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) - vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) - enddo - enddo - !$OMP END DO - !$OMP CRITICAL - do i=1,n - v_0(i) = v_0(i) + vt(i) - enddo - !$OMP END CRITICAL - deallocate(idx,vt) - !$OMP END PARALLEL -end - - subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) use bitmasks implicit none @@ -609,80 +508,3 @@ subroutine H_u_0_mrcc(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) !$OMP END PARALLEL end - -! -! subroutine H_u_0_mrcc_org(v_0,u_0,H_jj,n,keys_tmp,Nint,istate) -! use bitmasks -! implicit none -! BEGIN_DOC -! ! Computes v_0 = H|u_0> -! ! -! ! n : number of determinants -! ! -! ! H_jj : array of -! END_DOC -! integer, intent(in) :: n,Nint,istate -! double precision, intent(out) :: v_0(n) -! double precision, intent(in) :: u_0(n) -! double precision, intent(in) :: H_jj(n) -! integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) -! integer, allocatable :: idx(:) -! double precision :: hij -! double precision, allocatable :: vt(:) -! integer :: i,j,k,l, jj,ii -! integer :: i0, j0 -! -! -! -! -! -! ASSERT (Nint > 0) -! ASSERT (Nint == N_int) -! ASSERT (n>0) -! PROVIDE ref_bitmask_energy delta_ij -! integer, parameter :: block_size = 157 -! !$OMP PARALLEL DEFAULT(NONE) & -! !$OMP PRIVATE(i,hij,j,k,idx,jj,ii,vt) & -! !$OMP SHARED(n_det_ref,n_det_non_ref,idx_ref,idx_non_ref,n,H_jj,u_0,keys_tmp,Nint,v_0,istate,delta_ij) -! !$OMP DO SCHEDULE(static) -! do i=1,n -! v_0(i) = H_jj(i) * u_0(i) -! enddo -! !$OMP END DO -! allocate(idx(0:n), vt(n)) -! Vt = 0.d0 -! !$OMP DO SCHEDULE(guided) -! do i=1,n -! idx(0) = i -! call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) -! do jj=1,idx(0) -! j = idx(jj) -! ! if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then -! call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) -! hij = hij -! vt (i) = vt (i) + hij*u_0(j) -! vt (j) = vt (j) + hij*u_0(i) -! ! endif -! enddo -! enddo -! !$OMP END DO -! -! !$OMP DO SCHEDULE(guided) -! do ii=1,n_det_ref -! i = idx_ref(ii) -! do jj = 1, n_det_non_ref -! j = idx_non_ref(jj) -! vt (i) = vt (i) + delta_ij(ii,jj,istate)*u_0(j) -! vt (j) = vt (j) + delta_ij(ii,jj,istate)*u_0(i) -! enddo -! enddo -! !$OMP END DO -! !$OMP CRITICAL -! do i=1,n -! v_0(i) = v_0(i) + vt(i) -! enddo -! !$OMP END CRITICAL -! deallocate(idx,vt) -! !$OMP END PARALLEL -! end - diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index 5e931f14..87605964 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -122,7 +122,6 @@ end subroutine subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint) - use bitmasks implicit none integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index 252810f3..e836d25d 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -215,127 +215,3 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) end - - -! subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) -! implicit none -! use bitmasks -! integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) -! integer, intent(in) :: n,nmax -! double precision, intent(in) :: psi_coefs_tmp(nmax) -! double precision, intent(out) :: s2 -! double precision :: s2_tmp -! integer :: i,j,l,jj,ii -! integer, allocatable :: idx(:) -! -! integer(bit_kind) :: psi_keys_srt(N_int,2,n) -! integer :: shortcut(0:n+1), sort_idx(n), warp(2,0:n+1), ni, sh, tmp -! integer :: mon, bie, egz -! -! -! psi_keys_srt(:,:,:) = psi_keys_tmp(:,:,:) -! call sort_dets_ab(psi_keys_srt, sort_idx, shortcut, n, N_int) -! -! s2 = 0.d0 -! !$OMP PARALLEL DEFAULT(NONE) & -! !$OMP PRIVATE(i,j,s2_tmp,idx,warp,tmp,mon,bie,egz) & -! !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,psi_keys_srt,sort_idx)& -! !$OMP REDUCTION(+:s2) -! allocate(idx(0:n)) -! !$OMP DO SCHEDULE(dynamic) -! -! do sh=1,shortcut(0) -! mon = 0 -! bie = 0 -! -! warp(1,0) = 0 -! do ii=1,sh!shortcut(0) -! tmp = 0 -! do ni=1,N_int -! tmp += popcnt(xor(psi_keys_tmp(ni,1, shortcut(ii)), psi_keys_tmp(ni,1,shortcut(sh)))) -! end do -! egz = tmp -! if(tmp <= 4) then -! tmp = warp(1,0) + 1 -! warp(1,0) = tmp -! warp(1,tmp) = shortcut(ii) -! warp(2,tmp) = shortcut(ii+1)-1 -! if(egz == 4) then -! bie = bie + shortcut(ii+1) - shortcut(ii) -! else -! mon = mon + shortcut(ii+1) - shortcut(ii) -! end if -! end if -! end do -! -! if(shortcut(sh+1) - shortcut(sh) /= 1) then -! print *, shortcut(sh+1) - shortcut(sh), shortcut(sh+1), mon, bie -! end if -! -! do ii=shortcut(sh),shortcut(sh+1)-1 -! !do ii=1,n -! idx(0) = ii -! call filter_connected_davidson_warp(psi_keys_srt,warp,psi_keys_srt(1,1,ii),N_int,ii-1,idx) -! i = sort_idx(ii) -! do jj=1,idx(0) -! j = sort_idx(idx(jj)) -! if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & -! > davidson_threshold ) then -! call get_s2(psi_keys_srt(1,1,ii),psi_keys_srt(1,1,idx(jj)),s2_tmp,N_int) -! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp -! endif -! enddo -! end do -! enddo -! !$OMP END DO -! deallocate(idx) -! !$OMP END PARALLEL -! s2 = s2+s2 -! do i=1,n -! call get_s2(psi_keys_srt(1,1,sort_idx(i)),psi_keys_srt(1,1,sort_idx(i)),s2_tmp,N_int) -! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp -! enddo -! s2 = s2 + S_z2_Sz -! end - -! -! subroutine get_s2_u0_org(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) -! implicit none -! use bitmasks -! integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) -! integer, intent(in) :: n,nmax -! double precision, intent(in) :: psi_coefs_tmp(nmax) -! double precision, intent(out) :: s2 -! double precision :: s2_tmp -! integer :: i,j,l,jj -! integer, allocatable :: idx(:) -! s2 = 0.d0 -! !$OMP PARALLEL DEFAULT(NONE) & -! !$OMP PRIVATE(i,j,s2_tmp,idx) & -! !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold)& -! !$OMP REDUCTION(+:s2) -! allocate(idx(0:n)) -! !$OMP DO SCHEDULE(dynamic) -! do i=1,n -! idx(0) = i -! call filter_connected_davidson(psi_keys_tmp,psi_keys_tmp(1,1,i),N_int,i-1,idx) -! do jj=1,idx(0) -! j = idx(jj) -! if ( dabs(psi_coefs_tmp(j)) + dabs(psi_coefs_tmp(i)) & -! > davidson_threshold ) then -! call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,j),s2_tmp,N_int) -! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(j)*s2_tmp -! endif -! enddo -! enddo -! !$OMP END DO -! deallocate(idx) -! !$OMP END PARALLEL -! s2 = s2+s2 -! do i=1,n -! call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) -! s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp -! enddo -! s2 = s2 + S_z2_Sz -! end -! diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 1c8573dd..acffeb3d 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1299,247 +1299,4 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,shortcut,sort_idx,Nint) enddo end -! -! subroutine H_u_0_org(v_0,u_0,H_jj,n,keys_tmp,Nint) -! use bitmasks -! implicit none -! BEGIN_DOC -! ! Computes v_0 = H|u_0> -! ! -! ! n : number of determinants -! ! -! ! H_jj : array of -! END_DOC -! integer, intent(in) :: n,Nint -! double precision, intent(out) :: v_0(n) -! double precision, intent(in) :: u_0(n) -! double precision, intent(in) :: H_jj(n) -! integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) -! integer, allocatable :: idx(:) -! double precision :: hij -! double precision, allocatable :: vt(:) -! integer :: i,j,k,l, jj,ii,sh -! integer :: i0, j0 -! -! -! -! ASSERT (Nint > 0) -! ASSERT (Nint == N_int) -! ASSERT (n>0) -! PROVIDE ref_bitmask_energy -! !$OMP PARALLEL DEFAULT(NONE) & -! !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii) & -! !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold) -! allocate(idx(0:n), vt(n)) -! Vt = 0.d0 -! v_0 = 0.d0 -! !$OMP DO SCHEDULE(guided) -! -! -! -! -! -! do ii=1,n -! idx(0) = ii -! i = ii -! call filter_connected_davidson(keys_tmp,keys_tmp(1,1,ii),Nint,ii-1,idx) -! -! do jj=1,idx(0) -! j = idx(jj) -! if ( dabs(u_0(j)) + dabs(u_0(i)) > davidson_threshold ) then -! call i_H_j(keys_tmp(1,1,idx(jj)),keys_tmp(1,1,ii),Nint,hij) -! vt (i) = vt (i) + hij*u_0(j) -! vt (j) = vt (j) + hij*u_0(i) -! endif -! enddo -! enddo -! -! !$OMP END DO -! !$OMP CRITICAL -! do i=1,n -! v_0(i) = v_0(i) + vt(i) -! enddo -! !$OMP END CRITICAL -! deallocate(idx,vt) -! !$OMP END PARALLEL -! do i=1,n -! v_0(i) += H_jj(i) * u_0(i) -! enddo -! end -! - - -BEGIN_PROVIDER [ integer, N_con_int ] - implicit none - BEGIN_DOC - ! Number of integers to represent the connections between determinants - END_DOC - N_con_int = 1 + ishft(N_det-1,-11) -END_PROVIDER - -! BEGIN_PROVIDER [ integer*8, det_connectionsqsd, (N_con_int,N_det) ] -! implicit none -! BEGIN_DOC -! ! Build connection proxy between determinants -! END_DOC -! integer :: i,j -! integer :: degree -! integer :: j_int, j_k, j_l -! integer, allocatable :: idx(:) -! integer :: thread_num -! integer :: omp_get_thread_num -! -! PROVIDE progress_bar -! -! print *,"totolabanane" -! -! call start_progress(N_det,'Det connections',0.d0) -! -! select case(N_int) -! -! case(1) -! -! -! !$OMP PARALLEL DEFAULT (NONE) & -! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections, & -! !$OMP progress_bar,progress_value)& -! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) -! -! !$ thread_num = omp_get_thread_num() -! allocate (idx(0:N_det)) -! !$OMP DO SCHEDULE(guided) -! do i=1,N_det -! if (thread_num == 0) then -! progress_bar(1) = i -! progress_value = dble(i) -! endif -! do j_int=1,N_con_int -! det_connections(j_int,i) = 0_8 -! j_k = ishft(j_int-1,11) -! do j_l = j_k,min(j_k+2047,N_det), 32 -! do j = j_l+1,min(j_l+32,i) -! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & -! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) -! if (degree < 5) then -! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) -! exit -! endif -! enddo -! enddo -! enddo -! enddo -! !$OMP ENDDO -! deallocate(idx) -! !$OMP END PARALLEL -! -! case(2) -! -! !$OMP PARALLEL DEFAULT (NONE) & -! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& -! !$OMP progress_bar,progress_value)& -! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) -! !$ thread_num = omp_get_thread_num() -! allocate (idx(0:N_det)) -! !$OMP DO SCHEDULE(guided) -! do i=1,N_det -! if (thread_num == 0) then -! progress_bar(1) = i -! progress_value = dble(i) -! endif -! do j_int=1,N_con_int -! det_connections(j_int,i) = 0_8 -! j_k = ishft(j_int-1,11) -! do j_l = j_k,min(j_k+2047,N_det), 32 -! do j = j_l+1,min(j_l+32,i) -! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & -! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & -! popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & -! popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) -! if (degree < 5) then -! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) -! exit -! endif -! enddo -! enddo -! enddo -! enddo -! !$OMP ENDDO -! deallocate(idx) -! !$OMP END PARALLEL -! -! case(3) -! -! !$OMP PARALLEL DEFAULT (NONE) & -! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& -! !$OMP progress_bar,progress_value)& -! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) -! !$ thread_num = omp_get_thread_num() -! allocate (idx(0:N_det)) -! !$OMP DO SCHEDULE(guided) -! do i=1,N_det -! if (thread_num == 0) then -! progress_bar(1) = i -! progress_value = dble(i) -! endif -! do j_int=1,N_con_int -! det_connections(j_int,i) = 0_8 -! j_k = ishft(j_int-1,11) -! do j_l = j_k,min(j_k+2047,N_det), 32 -! do j = j_l+1,min(j_l+32,i) -! degree = popcnt(xor( psi_det(1,1,i),psi_det(1,1,j))) + & -! popcnt(xor( psi_det(1,2,i),psi_det(1,2,j))) + & -! popcnt(xor( psi_det(2,1,i),psi_det(2,1,j))) + & -! popcnt(xor( psi_det(2,2,i),psi_det(2,2,j))) + & -! popcnt(xor( psi_det(3,1,i),psi_det(3,1,j))) + & -! popcnt(xor( psi_det(3,2,i),psi_det(3,2,j))) -! if (degree < 5) then -! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) -! exit -! endif -! enddo -! enddo -! enddo -! enddo -! !$OMP ENDDO -! deallocate(idx) -! !$OMP END PARALLEL -! -! case default -! -! -! !$OMP PARALLEL DEFAULT (NONE) & -! !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections,& -! !$OMP progress_bar,progress_value)& -! !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx,thread_num) -! !$ thread_num = omp_get_thread_num() -! allocate (idx(0:N_det)) -! !$OMP DO SCHEDULE(guided) -! do i=1,N_det -! if (thread_num == 0) then -! progress_bar(1) = i -! progress_value = dble(i) -! endif -! do j_int=1,N_con_int -! det_connections(j_int,i) = 0_8 -! j_k = ishft(j_int-1,11) -! do j_l = j_k,min(j_k+2047,N_det), 32 -! do j = j_l+1,min(j_l+32,i) -! !DIR$ FORCEINLINE -! call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) -! if (degree < 3) then -! det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) -! exit -! endif -! enddo -! enddo -! enddo -! enddo -! !$OMP ENDDO -! deallocate(idx) -! !$OMP END PARALLEL -! -! end select -! call stop_progress -! -! END_PROVIDER