From 7cefd341bb742efdf7203a57c94c21195b6ec148 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 30 May 2014 17:12:10 +0200 Subject: [PATCH 1/4] Accelerated Davidson with connection map --- src/CISD/cisd.irp.f | 3 + src/Dets/README.rst | 79 ++++++++++++++++----- src/Dets/connections.irp.f | 41 +++++++++++ src/Dets/davidson.irp.f | 2 + src/Dets/filter_connected.irp.f | 117 ++++++++++++++++++++++++++++++++ src/Dets/slater_rules.irp.f | 3 +- src/NEEDED_MODULES | 2 +- src/TestFock/README.rst | 31 +++++++++ 8 files changed, 259 insertions(+), 19 deletions(-) create mode 100644 src/Dets/connections.irp.f create mode 100644 src/TestFock/README.rst diff --git a/src/CISD/cisd.irp.f b/src/CISD/cisd.irp.f index e46b8bf8..67e6f2e2 100644 --- a/src/CISD/cisd.irp.f +++ b/src/CISD/cisd.irp.f @@ -5,6 +5,9 @@ program cisd print *, 'HF = ', HF_energy print *, 'N_states = ', N_states call H_apply_cisd +! do i=1,N_det +! print '(100(X,O32))', det_connections(:,i) +! enddo print *, 'N_det = ', N_det do i = 1,N_states print *, 'energy = ',CI_energy(i) diff --git a/src/Dets/README.rst b/src/Dets/README.rst index 7c7ffade..5c731ff9 100644 --- a/src/Dets/README.rst +++ b/src/Dets/README.rst @@ -51,9 +51,10 @@ Documentation .. NEEDED_MODULES file. `copy_h_apply_buffer_to_wf `_ - Undocumented + Copies the H_apply buffer to psi_coef. You need to touch psi_det, psi_coef and N_det + after calling this function. -`fill_h_apply_buffer_no_selection `_ +`fill_h_apply_buffer_no_selection `_ Fill the H_apply buffer with determiants for CISD `h_apply_buffer_allocated `_ @@ -69,18 +70,18 @@ Documentation `connected_to_ref `_ Undocumented -`det_is_not_or_may_be_in_ref `_ +`det_is_not_or_may_be_in_ref `_ If true, det is not in ref If false, det may be in ref -`key_pattern_not_in_ref `_ +`key_pattern_not_in_ref `_ Min and max values of the integers of the keys of the reference -`davidson_converged `_ +`davidson_converged `_ True if the Davidson algorithm is converged -`davidson_criterion `_ - Can be : [ energy | residual | both ] +`davidson_criterion `_ + Can be : [ energy | residual | both | wall_time | cpu_time | iterations ] `davidson_diag `_ Davidson diagonalization. @@ -126,24 +127,41 @@ Documentation `davidson_sze_max `_ Max number of Davidson sizes -`davidson_threshold `_ - Can be : [ energy | residual | both ] +`davidson_threshold `_ + Can be : [ energy | residual | both | wall_time | cpu_time | iterations ] -`n_det `_ +`n_det `_ Number of determinants in the wave function +`n_det_reference `_ + Number of determinants in the reference wave function + `n_states `_ Number of states to consider -`psi_coef `_ - The wave function. Initialized with Hartree-Fock +`psi_average_norm_contrib `_ + Contribution of determinants to the state-averaged density -`psi_det `_ - The wave function. Initialized with Hartree-Fock +`psi_average_norm_contrib_sorted `_ + Wave function sorted by determinants (state-averaged) -`psi_det_size `_ +`psi_coef `_ + The wave function. Initialized with Hartree-Fock if the EZFIO file + is empty + +`psi_coef_sorted `_ + Wave function sorted by determinants (state-averaged) + +`psi_det `_ + The wave function. Initialized with Hartree-Fock if the EZFIO file + is empty + +`psi_det_size `_ Size of the psi_det/psi_coef arrays +`psi_det_sorted `_ + Wave function sorted by determinants (state-averaged) + `double_exc_bitmask `_ double_exc_bitmask(:,1,i) is the bitmask for holes of excitation 1 double_exc_bitmask(:,2,i) is the bitmask for particles of excitation 1 @@ -162,6 +180,22 @@ Documentation single_exc_bitmask(:,2,i) is the bitmask for particles for a given couple of hole/particle excitations i. +`ci_eigenvectors `_ + Eigenvectors/values of the CI matrix + +`ci_electronic_energy `_ + Eigenvectors/values of the CI matrix + +`ci_energy `_ + N_states lowest eigenvalues of the CI matrix + +`diag_algorithm `_ + Diagonalization algorithm (Davidson or Lapack) + +`diagonalize_ci `_ + Replace the coefficients of the CI states by the coefficients of the + eigenstates of the CI matrix + `filter_connected `_ Filters out the determinants that are not connected by H .br @@ -173,7 +207,9 @@ Documentation .br idx(0) is the number of determinants that interact with key1 -`filter_connected_i_h_psi0 `_ +`filter_connected_davidson `_ + Filters out the determinants that are not connected by H + .br returns the array idx which contains the index of the .br determinants in the array key1 that interact @@ -182,7 +218,16 @@ Documentation .br idx(0) is the number of determinants that interact with key1 -`filter_connected_i_h_psi0_sc2 `_ +`filter_connected_i_h_psi0 `_ + returns the array idx which contains the index of the + .br + determinants in the array key1 that interact + .br + via the H operator with key2. + .br + idx(0) is the number of determinants that interact with key1 + +`filter_connected_i_h_psi0_sc2 `_ standard filter_connected_i_H_psi but returns in addition .br the array of the index of the non connected determinants to key1 diff --git a/src/Dets/connections.irp.f b/src/Dets/connections.irp.f new file mode 100644 index 00000000..90fbcaa8 --- /dev/null +++ b/src/Dets/connections.irp.f @@ -0,0 +1,41 @@ +use bitmasks +BEGIN_PROVIDER [ integer, N_con_int ] + implicit none + BEGIN_DOC + ! Number of integers to represent the connections between determinants + END_DOC + N_con_int = 1 + ishft(N_det-1,-13) +END_PROVIDER + +BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ] + implicit none + BEGIN_DOC + ! + END_DOC + integer :: i,j + integer :: degree + integer :: j_int, j_k, j_l + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections) & + !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree) + !$OMP DO SCHEDULE(guided) + do i=1,N_det + do j_int=1,N_con_int + det_connections(j_int,i) = 0_8 + j_k = ishft(j_int-1,13) + do j_l = j_k,min(j_k+8191,N_det), 128 + do j = j_l+1,min(j_l+128,i) + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if (degree < 3) then + det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-7)) ) + exit + endif + enddo + enddo + enddo + enddo + !$OMP ENDDO + !$OMP ENDPARALLEL + +END_PROVIDER + diff --git a/src/Dets/davidson.irp.f b/src/Dets/davidson.irp.f index 2bc500d2..3e874e17 100644 --- a/src/Dets/davidson.irp.f +++ b/src/Dets/davidson.irp.f @@ -114,6 +114,8 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun double precision :: to_print(2,N_st) double precision :: cpu, wall + PROVIDE N_con_int det_connections + call write_time(iunit) call wall_time(wall) call cpu_time(cpu) diff --git a/src/Dets/filter_connected.irp.f b/src/Dets/filter_connected.irp.f index c9814009..c859f20c 100644 --- a/src/Dets/filter_connected.irp.f +++ b/src/Dets/filter_connected.irp.f @@ -91,6 +91,123 @@ subroutine filter_connected(key1,key2,Nint,sze,idx) idx(0) = l-1 end + +subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) + use bitmasks + implicit none + BEGIN_DOC + ! Filters out the determinants that are not connected by H + ! + ! returns the array idx which contains the index of the + ! + ! determinants in the array key1 that interact + ! + ! via the H operator with key2. + ! + ! idx(0) is the number of determinants that interact with key1 + END_DOC + integer, intent(in) :: Nint, sze + integer(bit_kind), intent(in) :: key1(Nint,2,sze) + integer(bit_kind), intent(in) :: key2(Nint,2) + integer, intent(out) :: idx(0:sze) + + integer :: i,j,l + integer :: degree_x2 + integer :: j_int, j_start + integer*8 :: itmp + + PROVIDE N_con_int det_connections + ASSERT (Nint > 0) + ASSERT (sze >= 0) + + l=1 + + if (Nint==1) then + + i = idx(0) + do j_int=1,N_con_int + itmp = det_connections(j_int,i) + do while (itmp /= 0_8) + j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) + do j = j_start+1, min(j_start+128,i-1) + degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & + popcnt(xor( key1(1,2,j), key2(1,2))) + if (degree_x2 < 5) then + idx(l) = j + l = l+1 + endif + enddo + itmp = iand(itmp-1_8,itmp) + enddo + enddo + + ! l=1 + ! !DIR$ LOOP COUNT (1000) + ! do i=1,sze + ! degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & + ! popcnt(xor( key1(2,1,i), key2(2,1))) + ! if (degree_x2 < 5) then + ! if (idx(l) /= i) then + ! print *, l, idx(l), i + ! endif + ! idx(l) = i + ! l = l+1 + ! endif + ! enddo + + else if (Nint==2) then + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & + popcnt(xor( key1(2,1,i), key2(2,1))) + & + popcnt(xor( key1(1,2,i), key2(1,2))) + & + popcnt(xor( key1(2,2,i), key2(2,2))) + if (degree_x2 < 5) then + idx(l) = i + l = l+1 + endif + enddo + + else if (Nint==3) then + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & + popcnt(xor( key1(1,2,i), key2(1,2))) + & + popcnt(xor( key1(2,1,i), key2(2,1))) + & + popcnt(xor( key1(2,2,i), key2(2,2))) + & + popcnt(xor( key1(3,1,i), key2(3,1))) + & + popcnt(xor( key1(3,2,i), key2(3,2))) + if (degree_x2 < 5) then + idx(l) = i + l = l+1 + endif + enddo + + else + + !DIR$ LOOP COUNT (1000) + do i=1,sze + degree_x2 = 0 + !DEC$ LOOP COUNT MIN(4) + do j=1,Nint + degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +& + popcnt(xor( key1(j,2,i), key2(j,2))) + if (degree_x2 > 4) then + exit + endif + enddo + if (degree_x2 <= 5) then + idx(l) = i + l = l+1 + endif + enddo + + endif + idx(0) = l-1 +end + subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) use bitmasks BEGIN_DOC diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index 702ee15a..7c786f8b 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -864,7 +864,8 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) Vt = 0.d0 !$OMP DO SCHEDULE(guided) do i=1,n - call filter_connected(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) + idx(0) = i + call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) do jj=1,idx(0) j = idx(jj) call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) diff --git a/src/NEEDED_MODULES b/src/NEEDED_MODULES index 6151539c..5e9ea4ea 100644 --- a/src/NEEDED_MODULES +++ b/src/NEEDED_MODULES @@ -1 +1 @@ -AOs Bitmask Electrons Ezfio_files MOs Nuclei Output Utils Hartree_Fock BiInts MonoInts MOGuess Dets DensityMatrix CISD Perturbation SingleRefMethod CISD_selected Selectors_full MP2 Generators_full Full_CI +AOs Bitmask Electrons Ezfio_files MOs Nuclei Output Utils Hartree_Fock BiInts MonoInts MOGuess Dets DensityMatrix CISD Perturbation SingleRefMethod CISD_selected Selectors_full MP2 Generators_full Full_CI TestFock diff --git a/src/TestFock/README.rst b/src/TestFock/README.rst new file mode 100644 index 00000000..8405e299 --- /dev/null +++ b/src/TestFock/README.rst @@ -0,0 +1,31 @@ +=============== +TestFock Module +=============== + +* This is a test +Documentation +============= + +.. Do not edit this section. It was auto-generated from the +.. NEEDED_MODULES file. + + + +Needed Modules +============== + +.. Do not edit this section. It was auto-generated from the +.. NEEDED_MODULES file. + +* `AOs `_ +* `BiInts `_ +* `Bitmask `_ +* `Electrons `_ +* `Ezfio_files `_ +* `Hartree_Fock `_ +* `MonoInts `_ +* `MOs `_ +* `Nuclei `_ +* `Output `_ +* `Utils `_ + From 4feffb008fbc350284d65596ab0eb10a3207355f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 30 May 2014 22:48:09 +0200 Subject: [PATCH 2/4] Corrected filter_connected_davidson --- src/Dets/README.rst | 16 ++--- src/Dets/filter_connected.irp.f | 110 +++++++++++++++++--------------- src/Dets/slater_rules.irp.f | 8 ++- 3 files changed, 72 insertions(+), 62 deletions(-) diff --git a/src/Dets/README.rst b/src/Dets/README.rst index 5c731ff9..cc3e6597 100644 --- a/src/Dets/README.rst +++ b/src/Dets/README.rst @@ -77,6 +77,12 @@ Documentation `key_pattern_not_in_ref `_ Min and max values of the integers of the keys of the reference +`det_connections `_ + .br + +`n_con_int `_ + Number of integers to represent the connections between determinants + `davidson_converged `_ True if the Davidson algorithm is converged @@ -228,15 +234,7 @@ Documentation idx(0) is the number of determinants that interact with key1 `filter_connected_i_h_psi0_sc2 `_ - standard filter_connected_i_H_psi but returns in addition - .br - the array of the index of the non connected determinants to key1 - .br - in order to know what double excitation can be repeated on key1 - .br - idx_repeat(0) is the number of determinants that can be used - .br - to repeat the excitations + Undocumented `get_s2 `_ Returns diff --git a/src/Dets/filter_connected.irp.f b/src/Dets/filter_connected.irp.f index c859f20c..058c8689 100644 --- a/src/Dets/filter_connected.irp.f +++ b/src/Dets/filter_connected.irp.f @@ -111,7 +111,7 @@ subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) integer(bit_kind), intent(in) :: key2(Nint,2) integer, intent(out) :: idx(0:sze) - integer :: i,j,l + integer :: i,j,k,l integer :: degree_x2 integer :: j_int, j_start integer*8 :: itmp @@ -140,70 +140,80 @@ subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) itmp = iand(itmp-1_8,itmp) enddo enddo - - ! l=1 - ! !DIR$ LOOP COUNT (1000) - ! do i=1,sze - ! degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & - ! popcnt(xor( key1(2,1,i), key2(2,1))) - ! if (degree_x2 < 5) then - ! if (idx(l) /= i) then - ! print *, l, idx(l), i - ! endif - ! idx(l) = i - ! l = l+1 - ! endif - ! enddo else if (Nint==2) then - !DIR$ LOOP COUNT (1000) - do i=1,sze - degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & - popcnt(xor( key1(2,1,i), key2(2,1))) + & - popcnt(xor( key1(1,2,i), key2(1,2))) + & - popcnt(xor( key1(2,2,i), key2(2,2))) - if (degree_x2 < 5) then - idx(l) = i - l = l+1 - endif + + i = idx(0) + do j_int=1,N_con_int + itmp = det_connections(j_int,i) + do while (itmp /= 0_8) + j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) + do j = j_start+1, min(j_start+128,i-1) + degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & + popcnt(xor( key1(2,1,j), key2(2,1))) + & + popcnt(xor( key1(1,2,j), key2(1,2))) + & + popcnt(xor( key1(2,2,j), key2(2,2))) + if (degree_x2 < 5) then + idx(l) = j + l = l+1 + endif + enddo + itmp = iand(itmp-1_8,itmp) + enddo enddo else if (Nint==3) then !DIR$ LOOP COUNT (1000) - do i=1,sze - degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & - popcnt(xor( key1(1,2,i), key2(1,2))) + & - popcnt(xor( key1(2,1,i), key2(2,1))) + & - popcnt(xor( key1(2,2,i), key2(2,2))) + & - popcnt(xor( key1(3,1,i), key2(3,1))) + & - popcnt(xor( key1(3,2,i), key2(3,2))) - if (degree_x2 < 5) then - idx(l) = i - l = l+1 - endif + i = idx(0) + do j_int=1,N_con_int + itmp = det_connections(j_int,i) + do while (itmp /= 0_8) + j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) + do j = j_start+1, min(j_start+128,i-1) + degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & + popcnt(xor( key1(1,2,j), key2(1,2))) + & + popcnt(xor( key1(2,1,j), key2(2,1))) + & + popcnt(xor( key1(2,2,j), key2(2,2))) + & + popcnt(xor( key1(3,1,j), key2(3,1))) + & + popcnt(xor( key1(3,2,j), key2(3,2))) + if (degree_x2 < 5) then + idx(l) = j + l = l+1 + endif + enddo + itmp = iand(itmp-1_8,itmp) + enddo enddo else !DIR$ LOOP COUNT (1000) - do i=1,sze - degree_x2 = 0 - !DEC$ LOOP COUNT MIN(4) - do j=1,Nint - degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +& - popcnt(xor( key1(j,2,i), key2(j,2))) - if (degree_x2 > 4) then - exit - endif + i = idx(0) + do j_int=1,N_con_int + itmp = det_connections(j_int,i) + do while (itmp /= 0_8) + j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) + do j = j_start+1, min(j_start+128,i-1) + degree_x2 = 0 + !DEC$ LOOP COUNT MIN(4) + do k=1,Nint + degree_x2 = degree_x2+ popcnt(xor( key1(k,1,j), key2(k,1))) +& + popcnt(xor( key1(k,2,j), key2(k,2))) + if (degree_x2 > 4) then + exit + endif + enddo + if (degree_x2 <= 5) then + idx(l) = j + l = l+1 + endif + enddo + itmp = iand(itmp-1_8,itmp) enddo - if (degree_x2 <= 5) then - idx(l) = i - l = l+1 - endif enddo - + endif idx(0) = l-1 end diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index 7c786f8b..0b0ceb08 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -868,9 +868,11 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) call filter_connected_davidson(keys_tmp,keys_tmp(1,1,i),Nint,i-1,idx) do jj=1,idx(0) j = idx(jj) - call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) - vt (i) = vt (i) + hij*u_0(j) - vt (j) = vt (j) + hij*u_0(i) + if ( (dabs(u_0(j)) > 1.d-7).or.((dabs(u_0(i)) > 1.d-7)) ) then + call i_H_j(keys_tmp(1,1,j),keys_tmp(1,1,i),Nint,hij) + vt (i) = vt (i) + hij*u_0(j) + vt (j) = vt (j) + hij*u_0(i) + endif enddo enddo !$OMP END DO From fbe4531125b750c766396eec27c4120b7284080e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 30 May 2014 23:01:56 +0200 Subject: [PATCH 3/4] Acceleration of davidson --- src/Dets/connections.irp.f | 41 ------------------ src/Dets/filter_connected.irp.f | 76 +++++++++++++++++++-------------- src/Dets/slater_rules.irp.f | 44 +++++++++++++++++++ 3 files changed, 88 insertions(+), 73 deletions(-) delete mode 100644 src/Dets/connections.irp.f diff --git a/src/Dets/connections.irp.f b/src/Dets/connections.irp.f deleted file mode 100644 index 90fbcaa8..00000000 --- a/src/Dets/connections.irp.f +++ /dev/null @@ -1,41 +0,0 @@ -use bitmasks -BEGIN_PROVIDER [ integer, N_con_int ] - implicit none - BEGIN_DOC - ! Number of integers to represent the connections between determinants - END_DOC - N_con_int = 1 + ishft(N_det-1,-13) -END_PROVIDER - -BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ] - implicit none - BEGIN_DOC - ! - END_DOC - integer :: i,j - integer :: degree - integer :: j_int, j_k, j_l - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections) & - !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree) - !$OMP DO SCHEDULE(guided) - do i=1,N_det - do j_int=1,N_con_int - det_connections(j_int,i) = 0_8 - j_k = ishft(j_int-1,13) - do j_l = j_k,min(j_k+8191,N_det), 128 - do j = j_l+1,min(j_l+128,i) - call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) - if (degree < 3) then - det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-7)) ) - exit - endif - enddo - enddo - enddo - enddo - !$OMP ENDDO - !$OMP ENDPARALLEL - -END_PROVIDER - diff --git a/src/Dets/filter_connected.irp.f b/src/Dets/filter_connected.irp.f index 058c8689..ef8358cd 100644 --- a/src/Dets/filter_connected.irp.f +++ b/src/Dets/filter_connected.irp.f @@ -32,7 +32,9 @@ subroutine filter_connected(key1,key2,Nint,sze,idx) do i=1,sze degree_x2 = popcnt( xor( key1(1,1,i), key2(1,1))) & + popcnt( xor( key1(1,2,i), key2(1,2))) - if (degree_x2 < 5) then + if (degree_x2 > 4) then + cycle + else idx(l) = i l = l+1 endif @@ -46,7 +48,9 @@ subroutine filter_connected(key1,key2,Nint,sze,idx) popcnt(xor( key1(2,1,i), key2(2,1))) + & popcnt(xor( key1(1,2,i), key2(1,2))) + & popcnt(xor( key1(2,2,i), key2(2,2))) - if (degree_x2 < 5) then + if (degree_x2 > 4) then + cycle + else idx(l) = i l = l+1 endif @@ -62,7 +66,9 @@ subroutine filter_connected(key1,key2,Nint,sze,idx) popcnt(xor( key1(2,2,i), key2(2,2))) + & popcnt(xor( key1(3,1,i), key2(3,1))) + & popcnt(xor( key1(3,2,i), key2(3,2))) - if (degree_x2 < 5) then + if (degree_x2 > 4) then + cycle + else idx(l) = i l = l+1 endif @@ -128,11 +134,13 @@ subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) do j_int=1,N_con_int itmp = det_connections(j_int,i) do while (itmp /= 0_8) - j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) - do j = j_start+1, min(j_start+128,i-1) + j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) + do j = j_start+1, min(j_start+32,i-1) degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & popcnt(xor( key1(1,2,j), key2(1,2))) - if (degree_x2 < 5) then + if (degree_x2 > 4) then + cycle + else idx(l) = j l = l+1 endif @@ -148,13 +156,15 @@ subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) do j_int=1,N_con_int itmp = det_connections(j_int,i) do while (itmp /= 0_8) - j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) - do j = j_start+1, min(j_start+128,i-1) + j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) + do j = j_start+1, min(j_start+32,i-1) degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & popcnt(xor( key1(2,1,j), key2(2,1))) + & popcnt(xor( key1(1,2,j), key2(1,2))) + & popcnt(xor( key1(2,2,j), key2(2,2))) - if (degree_x2 < 5) then + if (degree_x2 > 4) then + cycle + else idx(l) = j l = l+1 endif @@ -170,15 +180,17 @@ subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) do j_int=1,N_con_int itmp = det_connections(j_int,i) do while (itmp /= 0_8) - j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) - do j = j_start+1, min(j_start+128,i-1) + j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) + do j = j_start+1, min(j_start+32,i-1) degree_x2 = popcnt(xor( key1(1,1,j), key2(1,1))) + & popcnt(xor( key1(1,2,j), key2(1,2))) + & popcnt(xor( key1(2,1,j), key2(2,1))) + & popcnt(xor( key1(2,2,j), key2(2,2))) + & popcnt(xor( key1(3,1,j), key2(3,1))) + & popcnt(xor( key1(3,2,j), key2(3,2))) - if (degree_x2 < 5) then + if (degree_x2 > 4) then + cycle + else idx(l) = j l = l+1 endif @@ -194,8 +206,8 @@ subroutine filter_connected_davidson(key1,key2,Nint,sze,idx) do j_int=1,N_con_int itmp = det_connections(j_int,i) do while (itmp /= 0_8) - j_start = ishft(j_int-1,13) + ishft(trailz(itmp),7) - do j = j_start+1, min(j_start+128,i-1) + j_start = ishft(j_int-1,11) + ishft(trailz(itmp),5) + do j = j_start+1, min(j_start+32,i-1) degree_x2 = 0 !DEC$ LOOP COUNT MIN(4) do k=1,Nint @@ -250,11 +262,11 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) do i=1,sze degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) + & popcnt(xor( key1(1,2,i), key2(1,2))) - if (degree_x2 < 5) then - if(degree_x2 .ne. 0)then - idx(l) = i - l = l+1 - endif + if (degree_x2 > 4) then + cycle + else if(degree_x2 .ne. 0)then + idx(l) = i + l = l+1 endif enddo @@ -266,11 +278,11 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) popcnt(xor( key1(2,1,i), key2(2,1))) + & popcnt(xor( key1(1,2,i), key2(1,2))) + & popcnt(xor( key1(2,2,i), key2(2,2))) - if (degree_x2 < 5) then - if(degree_x2 .ne. 0)then - idx(l) = i - l = l+1 - endif + if (degree_x2 > 4) then + cycle + else if(degree_x2 .ne. 0)then + idx(l) = i + l = l+1 endif enddo @@ -284,11 +296,11 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) popcnt(xor( key1(2,2,i), key2(2,2))) + & popcnt(xor( key1(3,1,i), key2(3,1))) + & popcnt(xor( key1(3,2,i), key2(3,2))) - if (degree_x2 < 5) then - if(degree_x2 .ne. 0)then - idx(l) = i - l = l+1 - endif + if (degree_x2 > 4) then + cycle + else if(degree_x2 .ne. 0)then + idx(l) = i + l = l+1 endif enddo @@ -305,11 +317,11 @@ subroutine filter_connected_i_H_psi0(key1,key2,Nint,sze,idx) exit endif enddo - if (degree_x2 <= 5) then - if(degree_x2 .ne. 0)then + if (degree_x2 > 4) then + cycle + else if(degree_x2 .ne. 0)then idx(l) = i l = l+1 - endif endif enddo diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index 0b0ceb08..b84c47f2 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -887,3 +887,47 @@ end +BEGIN_PROVIDER [ integer, N_con_int ] + implicit none + BEGIN_DOC + ! Number of integers to represent the connections between determinants + END_DOC + N_con_int = 1 + ishft(N_det-1,-11) +END_PROVIDER + +BEGIN_PROVIDER [ integer*8, det_connections, (N_con_int,N_det) ] + implicit none + BEGIN_DOC + ! + END_DOC + integer :: i,j + integer :: degree + integer :: j_int, j_k, j_l + integer, allocatable :: idx(:) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP SHARED(N_det, N_con_int, psi_det,N_int, det_connections) & + !$OMP PRIVATE(i,j_int,j_k,j_l,j,degree,idx) + allocate (idx(0:N_det)) + !$OMP DO SCHEDULE(guided) + do i=1,N_det + do j_int=1,N_con_int + det_connections(j_int,i) = 0_8 + j_k = ishft(j_int-1,11) + do j_l = j_k,min(j_k+2047,N_det), 32 + do j = j_l+1,min(j_l+32,i) + !DIR$ FORCEINLINE + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if (degree < 3) then + det_connections(j_int,i) = ibset( det_connections(j_int,i), iand(63,ishft(j_l,-5)) ) + exit + endif + enddo + enddo + enddo + enddo + !$OMP ENDDO + deallocate(idx) + !$OMP ENDPARALLEL + +END_PROVIDER + From 60faffaae549e911bba590d2bf240b52dc9be024 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 31 May 2014 01:18:02 +0200 Subject: [PATCH 4/4] Prints in selection --- scripts/generate_h_apply.py | 28 +++++++++++++++++++ src/Dets/H_apply_template.f | 9 +++++- src/Dets/slater_rules.irp.f | 2 +- .../{pert_sc2.irp.f => pert_sc2.irp.f.BROKEN} | 0 4 files changed, 37 insertions(+), 2 deletions(-) rename src/Perturbation/{pert_sc2.irp.f => pert_sc2.irp.f.BROKEN} (100%) diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py index a6143b23..8b947259 100755 --- a/scripts/generate_h_apply.py +++ b/scripts/generate_h_apply.py @@ -17,6 +17,8 @@ copy_buffer finalization generate_psi_guess init_thread +printout_now +printout_always deinit_thread """.split() @@ -84,6 +86,8 @@ class H_apply(object): s[k] = "" s["size_max"] = str(1024*128) s["copy_buffer"] = "call copy_h_apply_buffer_to_wf" + s["printout_now"] = """write(output_Dets,*) & + 100.*float(i_generator)/float(N_det_generators), '% in ', wall_2-wall_1, 's'""" self.data = s def __setitem__(self,key,value): @@ -154,14 +158,38 @@ class H_apply(object): double precision, intent(inout):: pt2(N_st) double precision, intent(inout):: norm_pert(N_st) double precision, intent(inout):: H_pert_diag(N_st) + double precision :: delta_pt2(N_st), norm_psi(N_st), pt2_old(N_st) PROVIDE CI_electronic_energy N_det_generators key_pattern_not_in_ref do k=1,N_st pt2(k) = 0.d0 norm_pert(k) = 0.d0 H_pert_diag(k) = 0.d0 + norm_psi(k) = 0.d0 + delta_pt2(k) = 0.d0 + pt2_old(k) = 0.d0 enddo + write(output_Dets,'(A12, X, A8, 3(2X, A9), 2X, A8, 2X, A8, 2X, A8)') & + 'N_generators', 'Norm', 'Delta PT2', 'PT2', 'Est. PT2', 'secs' + write(output_Dets,'(A12, X, A8, 3(2X, A9), 2X, A8, 2X, A8, 2X, A8)') & + '============', '========', '=========', '=========', '=========', & + '=========' """ + self.data["printout_always"] = """ + do k=1,N_st + norm_psi(k) = norm_psi(k) + psi_coef(i_generator,k)*psi_coef(i_generator,k) + delta_pt2(k) = pt2(k) - pt2_old(k) + enddo + """ + self.data["printout_now"] = """ + do k=1,N_st + write(output_Dets,'(I10, 4(2X, F9.6), 2X, F8.1)') & + i_generator, norm_psi(k), delta_pt2(k), pt2(k), & + pt2(k)/norm_psi(k), & + wall_2-wall_1 + pt2_old(k) = pt2(k) + enddo + """ if self.openmp: self.data["omp_parallel"] += """& !$OMP SHARED(N_st) PRIVATE(e_2_pert_buffer,coef_pert_buffer) & diff --git a/src/Dets/H_apply_template.f b/src/Dets/H_apply_template.f index 3b6f1e35..d36efb9b 100644 --- a/src/Dets/H_apply_template.f +++ b/src/Dets/H_apply_template.f @@ -214,7 +214,6 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene occ_hole_tmp) $omp_end_parallel $finalization - abort_here = abort_all end subroutine $subroutine_monoexc(key_in, hole_1,particl_1,i_generator $parameters ) @@ -344,7 +343,9 @@ subroutine $subroutine($params_main) PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map N_det_reference psi_generators integer :: i_generator, k + double precision :: wall_0, wall_1, wall_2 + call wall_time(wall_1) do i_generator=1,N_det_generators call $subroutine_diexc(psi_generators(1,1,i_generator), & generators_bitmask(1,1,d_hole1,i_bitmask_gen), & @@ -359,6 +360,12 @@ subroutine $subroutine($params_main) if (abort_here) then exit endif + call wall_time(wall_2) + $printout_always + if (wall_2 - wall_0 > 2.d0) then + wall_0 = wall_2 + $printout_now + endif enddo $copy_buffer diff --git a/src/Dets/slater_rules.irp.f b/src/Dets/slater_rules.irp.f index b84c47f2..62206b59 100644 --- a/src/Dets/slater_rules.irp.f +++ b/src/Dets/slater_rules.irp.f @@ -503,7 +503,7 @@ subroutine i_H_psi(key,keys,coef,Nint,Ndet,Ndet_max,Nstate,i_H_psi_array) double precision :: hij integer :: idx(0:Ndet) BEGIN_DOC - ! for the various Nstate + ! for the various Nstates END_DOC ASSERT (Nint > 0) diff --git a/src/Perturbation/pert_sc2.irp.f b/src/Perturbation/pert_sc2.irp.f.BROKEN similarity index 100% rename from src/Perturbation/pert_sc2.irp.f rename to src/Perturbation/pert_sc2.irp.f.BROKEN