From 2e27f8bd3724d4b64bcbff862744bcf29c0a2e8b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 11 Nov 2015 12:27:33 +0100 Subject: [PATCH 1/8] openmp in qmcchem --- plugins/QmcChem/target_pt2_qmc.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/QmcChem/target_pt2_qmc.irp.f b/plugins/QmcChem/target_pt2_qmc.irp.f index e5142a73..6e6d2a60 100644 --- a/plugins/QmcChem/target_pt2_qmc.irp.f +++ b/plugins/QmcChem/target_pt2_qmc.irp.f @@ -93,7 +93,7 @@ subroutine compute_energy(psi_bilinear_matrix_values_save, E, m, norm) m = 0 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,l,det_i,det_j,ci,cj,hij) REDUCTION(+:norm,m,num) allocate( det_i(N_int,2), det_j(N_int,2)) - !$OMP DO + !$OMP DO schedule(guided) do k=1,n_det if (psi_bilinear_matrix_values_save(k) == 0.d0) then cycle From 1e158234946cdd0500de9f3d90076e7e9a4b6378 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 12 Nov 2015 01:08:04 +0100 Subject: [PATCH 2/8] Added guess_lowest_state --- src/Determinants/H_apply.irp.f | 12 +- src/Determinants/determinants.irp.f | 1 + src/Determinants/guess_lowest_state.irp.f | 170 ++++++++++++++++++ .../program_initial_determinants.irp.f | 138 -------------- src/Determinants/spindeterminants.irp.f | 10 +- src/Determinants/truncate_wf.irp.f | 6 +- 6 files changed, 190 insertions(+), 147 deletions(-) create mode 100644 src/Determinants/guess_lowest_state.irp.f delete mode 100644 src/Determinants/program_initial_determinants.irp.f diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 6e2b3a5a..4814000c 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -41,6 +41,15 @@ type(H_apply_buffer_type), pointer :: H_apply_buffer(:) call omp_init_lock(H_apply_buffer_lock(1,iproc)) !$OMP END PARALLEL endif + do iproc=2,nproc-1 + if (.not.allocated(H_apply_buffer(iproc)%det)) then + print *, ' ===================== Error =================== ' + print *, 'H_apply_buffer_allocated should be provided outside' + print *, 'of an OpenMP section' + print *, ' =============================================== ' + stop + endif + enddo END_PROVIDER @@ -111,7 +120,6 @@ subroutine copy_H_apply_buffer_to_wf double precision, allocatable :: buffer_coef(:,:) integer :: i,j,k integer :: N_det_old - integer :: iproc PROVIDE H_apply_buffer_allocated @@ -158,7 +166,7 @@ subroutine copy_H_apply_buffer_to_wf enddo !$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(j,k,i) FIRSTPRIVATE(N_det_old) & - !$OMP SHARED(N_int,H_apply_buffer,psi_det,psi_coef,N_states) + !$OMP SHARED(N_int,H_apply_buffer,psi_det,psi_coef,N_states,psi_det_size) j=0 !$ j=omp_get_thread_num() do k=0,j-1 diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f index d1c36163..5fe18c49 100644 --- a/src/Determinants/determinants.irp.f +++ b/src/Determinants/determinants.irp.f @@ -8,6 +8,7 @@ BEGIN_PROVIDER [ integer, N_det ] logical :: exists character*64 :: label PROVIDE ezfio_filename + PROVIDE nproc if (read_wf) then call ezfio_has_determinants_n_det(exists) if (exists) then diff --git a/src/Determinants/guess_lowest_state.irp.f b/src/Determinants/guess_lowest_state.irp.f new file mode 100644 index 00000000..feea2d66 --- /dev/null +++ b/src/Determinants/guess_lowest_state.irp.f @@ -0,0 +1,170 @@ +program first_guess + use bitmasks + implicit none + BEGIN_DOC + ! Select all the determinants with the lowest energy as a starting point. + END_DOC + integer :: i,j + double precision, allocatable :: orb_energy(:) + double precision :: E + integer, allocatable :: kept(:) + integer :: nelec_kept(2) + + PROVIDE H_apply_buffer_allocated + allocate (orb_energy(mo_tot_num), kept(0:mo_tot_num)) + nelec_kept(1:2) = 0 + kept(0) = 0 + + do i=1,mo_tot_num + orb_energy(i) = mo_mono_elec_integral(i,i) + do j=1,elec_beta_num + if (i==j) cycle + orb_energy(i) += mo_bielec_integral_jj_anti(i,j) + enddo + do j=1,elec_alpha_num + orb_energy(i) += mo_bielec_integral_jj(i,j) + enddo + if ( (orb_energy(i) > -1.d0).and.(orb_energy(i) < .5d0) ) then + kept(0) += 1 + kept( kept(0) ) = i + if (i <= elec_beta_num) then + nelec_kept(2) += 1 + endif + if (i <= elec_alpha_num) then + nelec_kept(1) += 1 + endif + endif + enddo + + integer, allocatable :: list (:,:) + integer(bit_kind), allocatable :: string(:,:) + allocate ( list(N_int*bit_kind_size,2), string(N_int,2) ) + + string = ref_bitmask + call bitstring_to_list( string(1,1), list(1,1), elec_alpha_num, N_int) + call bitstring_to_list( string(1,2), list(1,2), elec_beta_num , N_int) + + psi_det_alpha_unique(:,1) = string(:,1) + psi_det_beta_unique (:,1) = string(:,2) + N_det_alpha_unique = 1 + N_det_beta_unique = 1 + + integer :: i1,i2,i3,i4,i5,i6,i7,i8,i9 +! select case (nelec_kept(1)) +! +! case(0) +! continue +! +! case(1) +! do i1=kept(0),1,-1 +! list(elec_alpha_num-0,1) = kept(i1) +! call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) +! N_det_alpha_unique += 1 +! psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) +! enddo +! +! case(2) +! do i1=kept(0),1,-1 +! list(elec_alpha_num-0,1) = kept(i1) +! call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) +! N_det_alpha_unique += 1 +! psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) +! do i2=i1-1,1,-1 +! list(elec_alpha_num-1,1) = kept(i2) +! call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) +! N_det_alpha_unique += 1 +! psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) +! enddo +! enddo + +psi_det_size = kept(0)**(nelec_kept(1)+nelec_kept(2)) +TOUCH psi_det_size + +BEGIN_SHELL [ /usr/bin/python ] + +template_alpha_ext = """ +do %(i2)s = %(i1)s-1,1,-1 + list(elec_alpha_num-%(i)d,1) = kept(%(i2)s) + call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) +""" + +template_alpha = """ +do %(i2)s = %(i1)s-1,1,-1 + list(elec_alpha_num-%(i)d,1) = kept(%(i2)s) + call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) + N_det_alpha_unique += 1 + psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) +""" + +template_beta_ext = """ +do %(i2)s = %(i1)s-1,1,-1 + list(elec_beta_num-%(i)d,2) = kept(%(i2)s) + call list_to_bitstring( string(1,2), list(1,2), elec_beta_num, N_int) +""" +template_beta = """ +do %(i2)s = %(i1)s-1,1,-1 + list(elec_beta_num-%(i)d,2) = kept(%(i2)s) + call list_to_bitstring( string(1,2), list(1,2), elec_beta_num, N_int) + N_det_beta_unique += 1 + psi_det_beta_unique(:,N_det_beta_unique) = string(:,2) +""" + +def write(template_ext,template,imax): + print "case(%d)"%(imax) + def aux(i2,i1,i,j): + if (i==imax-1): + print template%locals() + else: + print template_ext%locals() + i += 1 + j -= 1 + if (i != imax): + i1 = "i%d"%(i) + i2 = "i%d"%(i+1) + aux(i2,i1,i,j) + print "enddo" + + i2 = "i1" + i1 = "kept(0)+1" + i = 0 + aux(i2,i1,i,imax) + +def main(): + print """ + select case (nelec_kept(1)) + case(0) + continue + """ + for imax in range(1,10): + write(template_alpha_ext,template_alpha,imax) + + print """ + end select + + select case (nelec_kept(2)) + case(0) + continue + """ + for imax in range(1,10): + write(template_beta_ext,template_beta,imax) + print "end select" + +main() + +END_SHELL + + TOUCH N_det_alpha_unique N_det_beta_unique psi_det_alpha_unique psi_det_beta_unique + call create_wf_of_psi_bilinear_matrix(.False.) + call diagonalize_ci + j= N_det + do i=1,N_det + if (psi_average_norm_contrib_sorted(i) < 1.d-6) then + j = i-1 + exit + endif +! call debug_det(psi_det_sorted(1,1,i),N_int) + enddo + call save_wavefunction_general(j,N_states,psi_det_sorted,size(psi_coef_sorted,1),psi_coef_sorted) + + deallocate(orb_energy, kept, list, string) +end diff --git a/src/Determinants/program_initial_determinants.irp.f b/src/Determinants/program_initial_determinants.irp.f deleted file mode 100644 index 6375af22..00000000 --- a/src/Determinants/program_initial_determinants.irp.f +++ /dev/null @@ -1,138 +0,0 @@ -program pouet - implicit none - print*,'HF energy = ',ref_bitmask_energy + nuclear_repulsion - call routine - -end -subroutine routine - use bitmasks - implicit none - integer :: i,j,k,l - double precision :: hij,get_mo_bielec_integral - double precision :: hmono,h_bi_ispin,h_bi_other_spin - integer(bit_kind),allocatable :: key_tmp(:,:) - integer, allocatable :: occ(:,:) - integer :: n_occ_alpha, n_occ_beta - ! First checks - print*,'N_int = ',N_int - print*,'mo_tot_num = ',mo_tot_num - print*,'mo_tot_num / 64+1= ',mo_tot_num/64+1 - ! We print the HF determinant - do i = 1, N_int - print*,'ref_bitmask(i,1) = ',ref_bitmask(i,1) - print*,'ref_bitmask(i,2) = ',ref_bitmask(i,2) - enddo - print*,'' - print*,'Hartree Fock determinant ...' - call debug_det(ref_bitmask,N_int) - allocate(key_tmp(N_int,2)) - ! We initialize key_tmp to the Hartree Fock one - key_tmp = ref_bitmask - integer :: i_hole,i_particle,ispin,i_ok,other_spin - ! We do a mono excitation on the top of the HF determinant - write(*,*)'Enter the (hole, particle) couple for the mono excitation ...' - read(5,*)i_hole,i_particle -!!i_hole = 4 -!!i_particle = 20 - write(*,*)'Enter the ispin variable ...' - write(*,*)'ispin = 1 ==> alpha ' - write(*,*)'ispin = 2 ==> beta ' - read(5,*)ispin - if(ispin == 1)then - other_spin = 2 - else if(ispin == 2)then - other_spin = 1 - else - print*,'PB !! ' - print*,'ispin must be 1 or 2 !' - stop - endif -!!ispin = 1 - call do_mono_excitation(key_tmp,i_hole,i_particle,ispin,i_ok) - ! We check if it the excitation was possible with "i_ok" - if(i_ok == -1)then - print*,'i_ok = ',i_ok - print*,'You can not do this excitation because of Pauli principle ...' - print*,'check your hole particle couple, there must be something wrong ...' - stop - - endif - print*,'New det = ' - call debug_det(key_tmp,N_int) - call i_H_j(key_tmp,ref_bitmask,N_int,hij) - ! We calculate the H matrix element between the new determinant and HF - print*,' = ',hij - print*,'' - print*,'' - print*,'Recalculating it old school style ....' - print*,'' - print*,'' - ! We recalculate this old school style !!! - ! Mono electronic part - hmono = mo_mono_elec_integral(i_hole,i_particle) - print*,'' - print*,'Mono electronic part ' - print*,'' - print*,' = ',hmono - h_bi_ispin = 0.d0 - h_bi_other_spin = 0.d0 - print*,'' - print*,'Getting all the info for the calculation of the bi electronic part ...' - print*,'' - allocate (occ(N_int*bit_kind_size,2)) - ! We get the occupation of the alpha electrons in occ(:,1) - call bitstring_to_list(key_tmp(1,1), occ(1,1), n_occ_alpha, N_int) - print*,'n_occ_alpha = ',n_occ_alpha - print*,'elec_alpha_num = ',elec_alpha_num - ! We get the occupation of the beta electrons in occ(:,2) - call bitstring_to_list(key_tmp(1,2), occ(1,2), n_occ_beta, N_int) - print*,'n_occ_beta = ',n_occ_beta - print*,'elec_beta_num = ',elec_beta_num - ! We print the occupation of the alpha electrons - print*,'Alpha electrons !' - do i = 1, n_occ_alpha - print*,'i = ',i - print*,'occ(i,1) = ',occ(i,1) - enddo - ! We print the occupation of the beta electrons - print*,'Alpha electrons !' - do i = 1, n_occ_beta - print*,'i = ',i - print*,'occ(i,2) = ',occ(i,2) - enddo - integer :: exc(0:2,2,2),degree,h1,p1,h2,p2,s1,s2 - double precision :: phase - - call get_excitation_degree(key_tmp,ref_bitmask,degree,N_int) - print*,'degree = ',degree - call get_mono_excitation(ref_bitmask,key_tmp,exc,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - print*,'h1 = ',h1 - print*,'p1 = ',p1 - print*,'s1 = ',s1 - print*,'phase = ',phase - do i = 1, elec_num_tab(ispin) - integer :: orb_occupied - orb_occupied = occ(i,ispin) - h_bi_ispin += get_mo_bielec_integral(i_hole,orb_occupied,i_particle,orb_occupied,mo_integrals_map) & - -get_mo_bielec_integral(i_hole,i_particle,orb_occupied,orb_occupied,mo_integrals_map) - enddo - print*,'h_bi_ispin = ',h_bi_ispin - - do i = 1, elec_num_tab(other_spin) - orb_occupied = occ(i,other_spin) - h_bi_other_spin += get_mo_bielec_integral(i_hole,orb_occupied,i_particle,orb_occupied,mo_integrals_map) - enddo - print*,'h_bi_other_spin = ',h_bi_other_spin - print*,'h_bi_ispin + h_bi_other_spin = ',h_bi_ispin + h_bi_other_spin - - print*,'Total matrix element = ',phase*(h_bi_ispin + h_bi_other_spin + hmono) -!i = 1 -!j = 1 -!k = 1 -!l = 1 -!hij = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) -!print*,' = ',hij - - -end diff --git a/src/Determinants/spindeterminants.irp.f b/src/Determinants/spindeterminants.irp.f index b8f1b68c..0ca6301a 100644 --- a/src/Determinants/spindeterminants.irp.f +++ b/src/Determinants/spindeterminants.irp.f @@ -442,13 +442,14 @@ BEGIN_PROVIDER [ double precision, psi_bilinear_matrix, (N_det_alpha_unique,N_de enddo END_PROVIDER -subroutine create_wf_of_psi_bilinear_matrix +subroutine create_wf_of_psi_bilinear_matrix(truncate) use bitmasks implicit none BEGIN_DOC ! Generate a wave function containing all possible products ! of alpha and beta determinants END_DOC + logical, intent(in) :: truncate integer :: i,j,k integer(bit_kind) :: tmp_det(N_int,2) integer :: idx @@ -488,8 +489,10 @@ subroutine create_wf_of_psi_bilinear_matrix norm(1) = 0.d0 do i=1,N_det norm(1) += psi_average_norm_contrib_sorted(i) - if (norm(1) >= 0.999999d0) then - exit + if (truncate) then + if (norm(1) >= 0.999999d0) then + exit + endif endif enddo N_det = min(i,N_det) @@ -532,7 +535,6 @@ subroutine generate_all_alpha_beta_det_products !$OMP END DO NOWAIT deallocate(tmp_det) !$OMP END PARALLEL - deallocate (tmp_det) call copy_H_apply_buffer_to_wf SOFT_TOUCH psi_det psi_coef N_det end diff --git a/src/Determinants/truncate_wf.irp.f b/src/Determinants/truncate_wf.irp.f index f867ad7e..42340c71 100644 --- a/src/Determinants/truncate_wf.irp.f +++ b/src/Determinants/truncate_wf.irp.f @@ -8,10 +8,10 @@ program cisd N_det=10000 do i=1,N_det do k=1,N_int - psi_det(k,1,i) = psi_det_sorted(k,1,i) - psi_det(k,2,i) = psi_det_sorted(k,2,i) + psi_det(k,1,i) = psi_det_sorted(k,1,i) + psi_det(k,2,i) = psi_det_sorted(k,2,i) enddo - psi_coef(k,:) = psi_coef_sorted(k,:) + psi_coef(i,:) = psi_coef_sorted(i,:) enddo TOUCH psi_det psi_coef psi_det_sorted psi_coef_sorted psi_average_norm_contrib_sorted N_det call save_wavefunction From bc0d6624acda0c83f87c594657d423617f07bdb7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 12 Nov 2015 01:12:24 +0100 Subject: [PATCH 3/8] If associated --- src/Determinants/H_apply.irp.f | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Determinants/H_apply.irp.f b/src/Determinants/H_apply.irp.f index 4814000c..7e9861fe 100644 --- a/src/Determinants/H_apply.irp.f +++ b/src/Determinants/H_apply.irp.f @@ -2,11 +2,11 @@ use bitmasks use omp_lib type H_apply_buffer_type -integer :: N_det -integer :: sze -integer(bit_kind), pointer :: det(:,:,:) -double precision , pointer :: coef(:,:) -double precision , pointer :: e2(:,:) + integer :: N_det + integer :: sze + integer(bit_kind), pointer :: det(:,:,:) + double precision , pointer :: coef(:,:) + double precision , pointer :: e2(:,:) end type H_apply_buffer_type type(H_apply_buffer_type), pointer :: H_apply_buffer(:) @@ -42,7 +42,7 @@ type(H_apply_buffer_type), pointer :: H_apply_buffer(:) !$OMP END PARALLEL endif do iproc=2,nproc-1 - if (.not.allocated(H_apply_buffer(iproc)%det)) then + if (.not.associated(H_apply_buffer(iproc)%det)) then print *, ' ===================== Error =================== ' print *, 'H_apply_buffer_allocated should be provided outside' print *, 'of an OpenMP section' From 27eb074c26278bb1d280775fef962fff87df5256 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Nov 2015 09:36:04 +0100 Subject: [PATCH 4/8] Removed git --local in is_master_repository.py --- scripts/utility/is_master_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/utility/is_master_repository.py b/scripts/utility/is_master_repository.py index 9ead14a2..da5fb56f 100755 --- a/scripts/utility/is_master_repository.py +++ b/scripts/utility/is_master_repository.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import subprocess -pipe = subprocess.Popen("git config --local --get remote.origin.url", \ +pipe = subprocess.Popen("git config --get remote.origin.url", \ shell=True, stdout=subprocess.PIPE) result = pipe.stdout.read() is_master_repository = "LCPQ/quantum_package" in result From e033596481c927b15cd19052a92bac3b116f09df Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Nov 2015 10:51:03 +0100 Subject: [PATCH 5/8] Corrected Hartree-Fock MOs when not converged --- plugins/Hartree_Fock/SCF.irp.f | 5 +-- plugins/Hartree_Fock/damping_SCF.irp.f | 2 +- src/Determinants/davidson.irp.f | 14 +++--- src/Determinants/guess_lowest_state.irp.f | 52 ++++++++++------------- 4 files changed, 32 insertions(+), 41 deletions(-) diff --git a/plugins/Hartree_Fock/SCF.irp.f b/plugins/Hartree_Fock/SCF.irp.f index 864e9f3f..f21c7399 100644 --- a/plugins/Hartree_Fock/SCF.irp.f +++ b/plugins/Hartree_Fock/SCF.irp.f @@ -48,10 +48,7 @@ subroutine run E0 = HF_energy - thresh_SCF = 1.d-10 - call damping_SCF mo_label = "Canonical" - TOUCH mo_label mo_coef - call save_mos + call damping_SCF end diff --git a/plugins/Hartree_Fock/damping_SCF.irp.f b/plugins/Hartree_Fock/damping_SCF.irp.f index d7d9c2bf..c7c005c9 100644 --- a/plugins/Hartree_Fock/damping_SCF.irp.f +++ b/plugins/Hartree_Fock/damping_SCF.irp.f @@ -86,7 +86,7 @@ subroutine damping_SCF if ((E_half > E).and.(E_new < E)) then lambda = 1.d0 exit - else if ((E_half > E).and.(lambda > 5.d-2)) then + else if ((E_half > E).and.(lambda > 5.d-4)) then lambda = 0.5d0 * lambda E_new = E_half else diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index bceb145a..6ba39424 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -102,9 +102,11 @@ subroutine tamiser(key, idx, no, n, Nint, N_key) k = no j = 2*k do while(j <= n) - if(j < n .and. det_inf(key(:,:,j), key(:,:,j+1), Nint)) then - j = j+1 - end if + if(j < n) then + if (det_inf(key(:,:,j), key(:,:,j+1), Nint)) then + j = j+1 + endif + endif if(det_inf(key(:,:,k), key(:,:,j), Nint)) then tmp(:,:) = key(:,:,k) key(:,:,k) = key(:,:,j) @@ -113,11 +115,11 @@ subroutine tamiser(key, idx, no, n, Nint, N_key) idx(k) = idx(j) idx(j) = tmpidx k = j - j = 2*k + j = k+k else return - end if - end do + endif + enddo end subroutine diff --git a/src/Determinants/guess_lowest_state.irp.f b/src/Determinants/guess_lowest_state.irp.f index feea2d66..f6d0a004 100644 --- a/src/Determinants/guess_lowest_state.irp.f +++ b/src/Determinants/guess_lowest_state.irp.f @@ -9,13 +9,19 @@ program first_guess double precision :: E integer, allocatable :: kept(:) integer :: nelec_kept(2) + character :: occ_char, keep_char - PROVIDE H_apply_buffer_allocated + PROVIDE H_apply_buffer_allocated psi_det allocate (orb_energy(mo_tot_num), kept(0:mo_tot_num)) nelec_kept(1:2) = 0 kept(0) = 0 + print *, 'Orbital energies' + print *, '================' + print *, '' do i=1,mo_tot_num + keep_char = ' ' + occ_char = '-' orb_energy(i) = mo_mono_elec_integral(i,i) do j=1,elec_beta_num if (i==j) cycle @@ -24,8 +30,9 @@ program first_guess do j=1,elec_alpha_num orb_energy(i) += mo_bielec_integral_jj(i,j) enddo - if ( (orb_energy(i) > -1.d0).and.(orb_energy(i) < .5d0) ) then + if ( (orb_energy(i) > -.5d0).and.(orb_energy(i) < .1d0) ) then kept(0) += 1 + keep_char = 'X' kept( kept(0) ) = i if (i <= elec_beta_num) then nelec_kept(2) += 1 @@ -34,7 +41,16 @@ program first_guess nelec_kept(1) += 1 endif endif + if (i <= elec_alpha_num) then + if (i <= elec_beta_num) then + occ_char = '#' + else + occ_char = '+' + endif + endif + print '(I4, 3X, A, 3X, F10.6, 3X, A)', i, occ_char, orb_energy(i), keep_char enddo + integer, allocatable :: list (:,:) integer(bit_kind), allocatable :: string(:,:) @@ -50,35 +66,11 @@ program first_guess N_det_beta_unique = 1 integer :: i1,i2,i3,i4,i5,i6,i7,i8,i9 -! select case (nelec_kept(1)) -! -! case(0) -! continue -! -! case(1) -! do i1=kept(0),1,-1 -! list(elec_alpha_num-0,1) = kept(i1) -! call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) -! N_det_alpha_unique += 1 -! psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) -! enddo -! -! case(2) -! do i1=kept(0),1,-1 -! list(elec_alpha_num-0,1) = kept(i1) -! call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) -! N_det_alpha_unique += 1 -! psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) -! do i2=i1-1,1,-1 -! list(elec_alpha_num-1,1) = kept(i2) -! call list_to_bitstring( string(1,1), list(1,1), elec_alpha_num, N_int) -! N_det_alpha_unique += 1 -! psi_det_alpha_unique(:,N_det_alpha_unique) = string(:,1) -! enddo -! enddo -psi_det_size = kept(0)**(nelec_kept(1)+nelec_kept(2)) -TOUCH psi_det_size + psi_det_size = kept(0)**(nelec_kept(1)+nelec_kept(2)) + print *, kept(0), nelec_kept(:) + call write_int(6,psi_det_size,'psi_det_size') + TOUCH psi_det_size BEGIN_SHELL [ /usr/bin/python ] From 66e1ef316d8f82da45b215b94c63c1f426285d6a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Nov 2015 13:22:25 +0100 Subject: [PATCH 6/8] Barrier is not implicit after OMP_SINGLE --- src/Determinants/slater_rules.irp.f | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index cf3b8c1e..16fe2adc 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1248,17 +1248,18 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) ASSERT (Nint > 0) ASSERT (Nint == N_int) ASSERT (n>0) - PROVIDE ref_bitmask_energy + PROVIDE ref_bitmask_energy davidson_criterion + v_0 = 0.d0 !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) & - !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version,davidson_criterion_is_built) + !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version) allocate(idx(0:n), vt(n)) Vt = 0.d0 - v_0 = 0.d0 !$OMP SINGLE call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) !$OMP END SINGLE + !$OMP BARRIER !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) @@ -1298,9 +1299,11 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) enddo !$OMP END DO + integer :: omp_get_num_threads , omp_get_thread_num !$OMP SINGLE call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) !$OMP END SINGLE + !$OMP BARRIER !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) From b7e20b34932c20bcdb9e00c1595015cdb06e90f9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Nov 2015 13:38:42 +0100 Subject: [PATCH 7/8] Corrected mysterious openmp bug --- src/Determinants/slater_rules.irp.f | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index 16fe2adc..c566b51f 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1250,17 +1250,15 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) ASSERT (n>0) PROVIDE ref_bitmask_energy davidson_criterion v_0 = 0.d0 + + call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) + !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) & !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version) allocate(idx(0:n), vt(n)) Vt = 0.d0 - !$OMP SINGLE - call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - !$OMP END SINGLE - !$OMP BARRIER - !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) do sh2=1,sh @@ -1299,12 +1297,23 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) enddo !$OMP END DO - integer :: omp_get_num_threads , omp_get_thread_num - !$OMP SINGLE + !$OMP CRITICAL + do i=1,n + v_0(i) = v_0(i) + vt(i) + enddo + !$OMP END CRITICAL + + deallocate(idx,vt) + !$OMP END PARALLEL + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - !$OMP END SINGLE - !$OMP BARRIER + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) & + !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version) + allocate(idx(0:n), vt(n)) + Vt = 0.d0 + !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) do i=shortcut(sh),shortcut(sh+1)-1 @@ -1334,6 +1343,7 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) !$OMP END CRITICAL deallocate(idx,vt) !$OMP END PARALLEL + do i=1,n v_0(i) += H_jj(i) * u_0(i) enddo From 6a640567e89ca28b522b7774aa4054fa7aae30f4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Nov 2015 17:36:29 +0100 Subject: [PATCH 8/8] Davidson optimization --- src/Determinants/davidson.irp.f | 99 +++++++++++++++++------- src/Determinants/s2.irp.f | 116 ++++++++++++++-------------- src/Determinants/slater_rules.irp.f | 104 +++++++++++++------------ 3 files changed, 184 insertions(+), 135 deletions(-) diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index 6ba39424..fd2efcf9 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -90,27 +90,36 @@ end function subroutine tamiser(key, idx, no, n, Nint, N_key) use bitmasks - implicit none - integer(bit_kind),intent(inout) :: key(Nint, 2, N_key) + + BEGIN_DOC +! Uncodumented : TODO + END_DOC integer,intent(in) :: no, n, Nint, N_key + integer(bit_kind),intent(inout) :: key(Nint, 2, N_key) integer,intent(inout) :: idx(N_key) integer :: k,j,tmpidx integer(bit_kind) :: tmp(Nint, 2) logical :: det_inf + integer :: ni k = no j = 2*k do while(j <= n) if(j < n) then - if (det_inf(key(:,:,j), key(:,:,j+1), Nint)) then + if (det_inf(key(1,1,j), key(1,1,j+1), Nint)) then j = j+1 endif endif - if(det_inf(key(:,:,k), key(:,:,j), Nint)) then - tmp(:,:) = key(:,:,k) - key(:,:,k) = key(:,:,j) - key(:,:,j) = tmp(:,:) + if(det_inf(key(1,1,k), key(1,1,j), Nint)) then + do ni=1,Nint + tmp(ni,1) = key(ni,1,k) + tmp(ni,2) = key(ni,2,k) + key(ni,1,k) = key(ni,1,j) + key(ni,2,k) = key(ni,2,j) + key(ni,1,j) = tmp(ni,1) + key(ni,2,j) = tmp(ni,2) + enddo tmpidx = idx(k) idx(k) = idx(j) idx(j) = tmpidx @@ -126,17 +135,25 @@ end subroutine subroutine sort_dets_ba_v(key_in, key_out, idx, shortcut, version, N_key, Nint) use bitmasks implicit none - integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) - integer(bit_kind) :: key(Nint,2,N_key) - integer(bit_kind),intent(out) :: key_out(Nint,N_key) - integer,intent(out) :: idx(N_key) - integer,intent(out) :: shortcut(0:N_key+1) - integer(bit_kind),intent(out) :: version(Nint,N_key+1) - integer, intent(in) :: Nint, N_key - integer(bit_kind) :: tmp(Nint, 2,N_key) + integer, intent(in) :: Nint, N_key + integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) + integer(bit_kind) :: key(Nint,2,N_key) + integer(bit_kind),intent(out) :: key_out(Nint,N_key) + integer,intent(out) :: idx(N_key) + integer,intent(out) :: shortcut(0:N_key+1) + integer(bit_kind),intent(out) :: version(Nint,N_key+1) + integer(bit_kind) :: tmp(Nint, 2,N_key) + integer :: i,ni - key(:,1,:N_key) = key_in(:,2,:N_key) - key(:,2,:N_key) = key_in(:,1,:N_key) + BEGIN_DOC +! Uncodumented : TODO + END_DOC + do i=1,N_key + do ni=1,Nint + key(ni,1,i) = key_in(ni,2,i) + key(ni,2,i) = key_in(ni,1,i) + enddo + enddo call sort_dets_ab_v(key, key_out, idx, shortcut, version, N_key, Nint) @@ -148,18 +165,24 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) use bitmasks implicit none + BEGIN_DOC +! Uncodumented : TODO + END_DOC + integer, intent(in) :: Nint, N_key integer(bit_kind),intent(in) :: key_in(Nint,2,N_key) integer(bit_kind) :: key(Nint,2,N_key) integer(bit_kind),intent(out) :: key_out(Nint,N_key) integer,intent(out) :: idx(N_key) integer,intent(out) :: shortcut(0:N_key+1) integer(bit_kind),intent(out) :: version(Nint,N_key+1) - integer, intent(in) :: Nint, N_key integer(bit_kind) :: tmp(Nint, 2) integer :: tmpidx,i,ni - key(:,:,:) = key_in(:,:,:) do i=1,N_key + do ni=1,Nint + key(ni,1,i) = key_in(ni,1,i) + key(ni,2,i) = key_in(ni,2,i) + enddo idx(i) = i end do @@ -168,9 +191,14 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) end do do i=N_key,2,-1 - tmp(:,:) = key(:,:,i) - key(:,:,i) = key(:,:,1) - key(:,:,1) = tmp(:,:) + do ni=1,Nint + tmp(ni,1) = key(ni,1,i) + tmp(ni,2) = key(ni,2,i) + key(ni,1,i) = key(ni,1,1) + key(ni,2,i) = key(ni,2,1) + key(ni,1,1) = tmp(ni,1) + key(ni,2,1) = tmp(ni,2) + enddo tmpidx = idx(i) idx(i) = idx(1) idx(1) = tmpidx @@ -179,7 +207,9 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) shortcut(0) = 1 shortcut(1) = 1 - version(:,1) = key(:,1,1) + do ni=1,Nint + version(ni,1) = key(ni,1,1) + enddo do i=2,N_key do ni=1,nint if(key(ni,1,i) /= key(ni,1,i-1)) then @@ -191,15 +221,22 @@ subroutine sort_dets_ab_v(key_in, key_out, idx, shortcut, version, N_key, Nint) end do end do shortcut(shortcut(0)+1) = N_key+1 - key_out(:,:) = key(:,2,:) + do i=1,N_key + do ni=1,Nint + key_out(ni,i) = key(ni,2,i) + enddo + enddo end subroutine -c subroutine sort_dets_ab(key, idx, shortcut, N_key, Nint) use bitmasks implicit none + + BEGIN_DOC +! Uncodumented : TODO + END_DOC integer(bit_kind),intent(inout) :: key(Nint,2,N_key) integer,intent(out) :: idx(N_key) integer,intent(out) :: shortcut(0:N_key+1) @@ -216,9 +253,15 @@ subroutine sort_dets_ab(key, idx, shortcut, N_key, Nint) end do do i=N_key,2,-1 - tmp(:,:) = key(:,:,i) - key(:,:,i) = key(:,:,1) - key(:,:,1) = tmp(:,:) + do ni=1,Nint + tmp(ni,1) = key(ni,1,i) + tmp(ni,2) = key(ni,2,i) + key(ni,1,i) = key(ni,1,1) + key(ni,2,i) = key(ni,2,1) + key(ni,1,1) = tmp(ni,1) + key(ni,2,1) = tmp(ni,2) + enddo + tmpidx = idx(i) idx(i) = idx(1) idx(1) = tmpidx diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f index e836d25d..573f094f 100644 --- a/src/Determinants/s2.irp.f +++ b/src/Determinants/s2.irp.f @@ -109,81 +109,82 @@ end subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) implicit none use bitmasks - integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) - integer, intent(in) :: n,nmax - double precision, intent(in) :: psi_coefs_tmp(nmax) - double precision, intent(out) :: s2 - double precision :: s2_tmp - integer :: i,j,l,jj,ii + integer(bit_kind), intent(in) :: psi_keys_tmp(N_int,2,nmax) + integer, intent(in) :: n,nmax + double precision, intent(in) :: psi_coefs_tmp(nmax) + double precision, intent(out) :: s2 + double precision :: s2_tmp + integer :: i,j,l,jj,ii integer, allocatable :: idx(:) - + integer :: shortcut(0:n+1), sort_idx(n) integer(bit_kind) :: sorted(N_int,n), version(N_int,n) integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi, pass double precision :: davidson_threshold_bis - + !PROVIDE davidson_threshold s2 = 0.d0 davidson_threshold_bis = davidson_threshold - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,s2_tmp,idx,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass) & + call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,s2_tmp,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass)& !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)& !$OMP REDUCTION(+:s2) - allocate(idx(0:n)) - - - !$OMP SINGLE - call sort_dets_ab_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) - !$OMP END SINGLE !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) - - do sh2=1,sh - exa = 0 - do ni=1,N_int - exa += popcnt(xor(version(ni,sh), version(ni,sh2))) - end do - if(exa > 2) then - cycle - end if - do i=shortcut(sh),shortcut(sh+1)-1 - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1)-1 + do sh2=1,sh + exa = 0 + do ni=1,N_int + exa += popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle end if - do j=shortcut(sh2),endi - ext = exa - do ni=1,N_int - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext <= 4) then - org_i = sort_idx(i) - org_j = sort_idx(j) - - if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & - > davidson_threshold ) then - call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp - endif + do i=shortcut(sh),shortcut(sh+1)-1 + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 end if + + do j=shortcut(sh2),endi + ext = exa + do ni=1,N_int + ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext <= 4) then + org_i = sort_idx(i) + org_j = sort_idx(j) + + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i))& + > davidson_threshold ) then + call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp + endif + end if + end do end do end do - end do enddo !$OMP END DO - !$OMP SINGLE + !$OMP END PARALLEL + call sort_dets_ba_v(psi_keys_tmp, sorted, sort_idx, shortcut, version, n, N_int) - !$OMP END SINGLE + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,s2_tmp,sh, sh2, ni, exa, ext, org_i, org_j, endi, pass)& + !$OMP SHARED(n,psi_coefs_tmp,psi_keys_tmp,N_int,davidson_threshold,shortcut,sorted,sort_idx,version)& + !$OMP REDUCTION(+:s2) !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) - do i=shortcut(sh),shortcut(sh+1)-1 + do i=shortcut(sh),shortcut(sh+1)-1 do j=shortcut(sh),i-1 ext = 0 do ni=1,N_int @@ -193,25 +194,24 @@ subroutine get_s2_u0(psi_keys_tmp,psi_coefs_tmp,n,nmax,s2) org_i = sort_idx(i) org_j = sort_idx(j) - if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i)) & + if ( dabs(psi_coefs_tmp(org_j)) + dabs(psi_coefs_tmp(org_i))& > davidson_threshold ) then call get_s2(psi_keys_tmp(1,1,org_i),psi_keys_tmp(1,1,org_j),s2_tmp,N_int) s2 = s2 + psi_coefs_tmp(org_i)*psi_coefs_tmp(org_j)*s2_tmp endif end if end do - end do + end do enddo !$OMP END DO - deallocate(idx) - !$OMP END PARALLEL - s2 = s2+s2 - do i=1,n - call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) - s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp - enddo - s2 = s2 + S_z2_Sz + !$OMP END PARALLEL + s2 = s2+s2 + do i=1,n + call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),s2_tmp,N_int) + s2 = s2 + psi_coefs_tmp(i)*psi_coefs_tmp(i)*s2_tmp + enddo + s2 = s2 + S_z2_Sz end diff --git a/src/Determinants/slater_rules.irp.f b/src/Determinants/slater_rules.irp.f index c566b51f..4cb1afff 100644 --- a/src/Determinants/slater_rules.irp.f +++ b/src/Determinants/slater_rules.irp.f @@ -1068,14 +1068,14 @@ double precision function diag_H_mat_elem(det_in,Nint) nexc(1) = 0 nexc(2) = 0 do i=1,Nint - hole(i,1) = xor(det_in(i,1),ref_bitmask(i,1)) - hole(i,2) = xor(det_in(i,2),ref_bitmask(i,2)) + hole(i,1) = xor(det_in(i,1),ref_bitmask(i,1)) + hole(i,2) = xor(det_in(i,2),ref_bitmask(i,2)) particle(i,1) = iand(hole(i,1),det_in(i,1)) particle(i,2) = iand(hole(i,2),det_in(i,2)) hole(i,1) = iand(hole(i,1),ref_bitmask(i,1)) hole(i,2) = iand(hole(i,2),ref_bitmask(i,2)) - nexc(1) += popcnt(hole(i,1)) - nexc(2) += popcnt(hole(i,2)) + nexc(1) = nexc(1) + popcnt(hole(i,1)) + nexc(2) = nexc(2) + popcnt(hole(i,2)) enddo diag_H_mat_elem = ref_bitmask_energy @@ -1239,10 +1239,11 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) integer :: shortcut(0:n+1), sort_idx(n) integer(bit_kind) :: sorted(Nint,n), version(Nint,n) + integer(bit_kind) :: sorted_i(Nint) integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi -! + double precision :: local_threshold ASSERT (Nint > 0) @@ -1254,47 +1255,51 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) & + !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,local_threshold,sorted_i)& !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version) - allocate(idx(0:n), vt(n)) + allocate(vt(n)) Vt = 0.d0 !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) - do sh2=1,sh - exa = 0 - do ni=1,Nint - exa += popcnt(xor(version(ni,sh), version(ni,sh2))) - end do - if(exa > 2) then - cycle - end if - - do i=shortcut(sh),shortcut(sh+1)-1 - if(sh==sh2) then - endi = i-1 - else - endi = shortcut(sh2+1)-1 + do sh2=1,sh + exa = 0 + do ni=1,Nint + exa = exa + popcnt(xor(version(ni,sh), version(ni,sh2))) + end do + if(exa > 2) then + cycle end if - do j=shortcut(sh2),endi - ext = exa + do i=shortcut(sh),shortcut(sh+1)-1 + org_i = sort_idx(i) + local_threshold = davidson_threshold - dabs(u_0(org_i)) + if(sh==sh2) then + endi = i-1 + else + endi = shortcut(sh2+1)-1 + end if do ni=1,Nint - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext <= 4) then - org_i = sort_idx(i) + sorted_i(ni) = sorted(ni,i) + enddo + + do j=shortcut(sh2),endi org_j = sort_idx(j) - if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then - call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) - vt (org_i) = vt (org_i) + hij*u_0(org_j) - vt (org_j) = vt (org_j) + hij*u_0(org_i) + if ( dabs(u_0(org_j)) > local_threshold ) then + ext = exa + do ni=1,Nint + ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j))) + end do + if(ext <= 4) then + call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) + vt (org_i) = vt (org_i) + hij*u_0(org_j) + vt (org_j) = vt (org_j) + hij*u_0(org_i) + endif endif - endif + enddo enddo enddo enddo - enddo !$OMP END DO !$OMP CRITICAL @@ -1302,30 +1307,31 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) v_0(i) = v_0(i) + vt(i) enddo !$OMP END CRITICAL - - deallocate(idx,vt) + + deallocate(vt) !$OMP END PARALLEL - + call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) - + !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,hij,j,k,idx,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi) & + !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,local_threshold)& !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,davidson_threshold,sorted,shortcut,sort_idx,version) - allocate(idx(0:n), vt(n)) + allocate(vt(n)) Vt = 0.d0 !$OMP DO SCHEDULE(dynamic) do sh=1,shortcut(0) do i=shortcut(sh),shortcut(sh+1)-1 + local_threshold = davidson_threshold - dabs(u_0(org_i)) + org_i = sort_idx(i) do j=shortcut(sh),i-1 - ext = 0 - do ni=1,Nint - ext += popcnt(xor(sorted(ni,i), sorted(ni,j))) - end do - if(ext == 4) then - org_i = sort_idx(i) - org_j = sort_idx(j) - if ( dabs(u_0(org_j)) + dabs(u_0(org_i)) > davidson_threshold ) then + org_j = sort_idx(j) + if ( dabs(u_0(org_j)) > local_threshold ) then + ext = 0 + do ni=1,Nint + ext = ext + popcnt(xor(sorted(ni,i), sorted(ni,j))) + end do + if(ext == 4) then call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) vt (org_i) = vt (org_i) + hij*u_0(org_j) vt (org_j) = vt (org_j) + hij*u_0(org_i) @@ -1341,9 +1347,9 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint) v_0(i) = v_0(i) + vt(i) enddo !$OMP END CRITICAL - deallocate(idx,vt) + deallocate(vt) !$OMP END PARALLEL - + do i=1,n v_0(i) += H_jj(i) * u_0(i) enddo