From 103a3d92f4c8001e04637eac58c615ce01da3060 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 6 Jun 2014 16:19:14 +0200 Subject: [PATCH] Acceleration for single ref methods --- scripts/generate_h_apply.py | 18 +++++++------ src/BiInts/map_integrals.irp.f | 42 +++++++++++++++++++++++++++++ src/CISD/H_apply.irp.f | 2 +- src/CISD_SC2_selected/H_apply.irp.f | 2 +- src/CISD_selected/H_apply.irp.f | 2 +- src/Dets/H_apply_template.f | 24 ++++++++++++----- src/Full_CI/H_apply.irp.f | 4 +-- src/MP2/H_apply.irp.f | 2 +- src/Utils/map_module.f90 | 39 ++++++++++++++++++++++++++- 9 files changed, 113 insertions(+), 22 deletions(-) diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py index feab4c12..a9d1c966 100755 --- a/scripts/generate_h_apply.py +++ b/scripts/generate_h_apply.py @@ -22,17 +22,17 @@ printout_always deinit_thread skip init_main +filter_integrals """.split() class H_apply(object): - def __init__(self,sub,openmp=True): + def __init__(self,sub,SingleRef=False): s = {} for k in keywords: s[k] = "" s["subroutine"] = "H_apply_%s"%(sub) s["params_post"] = "" - self.openmp = openmp self.selection_pt2 = None self.perturbation = None @@ -41,7 +41,7 @@ class H_apply(object): s["omp_parallel"] = """!$OMP PARALLEL DEFAULT(SHARED) & !$OMP PRIVATE(i,j,k,l,keys_out,hole,particle, & !$OMP occ_particle,occ_hole,j_a,k_a,other_spin, & - !$OMP hole_save,ispin,jj,l_a,ib_jb_pairs, & + !$OMP hole_save,ispin,jj,l_a,ib_jb_pairs,array_pairs, & !$OMP accu,i_a,hole_tmp,particle_tmp,occ_particle_tmp, & !$OMP occ_hole_tmp,key_idx,i_b,j_b,key,N_elec_in_key_part_1,& !$OMP N_elec_in_key_hole_1,N_elec_in_key_part_2, & @@ -58,6 +58,12 @@ class H_apply(object): s["keys_work"] += "call fill_H_apply_buffer_no_selection(key_idx,keys_out,N_int,iproc)" + s["filter_integrals"] = "array_pairs = .True." + if SingleRef: + s["filter_integrals"] = """ + call get_mo_bielec_integrals_existing_ik(i_a,j_a,mo_tot_num,array_pairs,mo_integrals_map) + """ + s["generate_psi_guess"] = """ ! Sort H_jj to find the N_states lowest states integer :: i @@ -83,9 +89,6 @@ class H_apply(object): deallocate(H_jj,iorder) """ - if not openmp: - for k in s: - s[k] = "" s["size_max"] = str(1024*128) s["copy_buffer"] = "call copy_h_apply_buffer_to_wf" s["printout_now"] = """write(output_Dets,*) & @@ -192,8 +195,7 @@ class H_apply(object): pt2_old(k) = pt2(k) enddo """ - if self.openmp: - self.data["omp_parallel"] += """& + self.data["omp_parallel"] += """& !$OMP SHARED(N_st) PRIVATE(e_2_pert_buffer,coef_pert_buffer) & !$OMP PRIVATE(sum_e_2_pert, sum_norm_pert, sum_H_pert_diag)""" diff --git a/src/BiInts/map_integrals.irp.f b/src/BiInts/map_integrals.irp.f index e4ae14f5..d6a74c1b 100644 --- a/src/BiInts/map_integrals.irp.f +++ b/src/BiInts/map_integrals.irp.f @@ -232,6 +232,48 @@ subroutine get_mo_bielec_integrals(j,k,l,sze,out_val,map) call map_get_many(map, hash, out_val, sze) end +subroutine get_mo_bielec_integrals_existing_ik(j,l,sze,out_array,map) + use map_module + implicit none + BEGIN_DOC + ! Returns multiple integrals in the MO basis, all + ! i for j,k,l fixed. + END_DOC + integer, intent(in) :: j,l, sze + logical, intent(out) :: out_array(sze,sze) + type(map_type), intent(inout) :: map + integer :: i,k,kk,ll,m + integer*8,allocatable :: hash(:) + integer ,allocatable :: pairs(:,:), iorder(:) + PROVIDE mo_bielec_integrals_in_map + allocate (hash(sze*sze), pairs(2,sze*sze),iorder(sze*sze)) + + kk=0 + do k=1,sze + do i=1,sze + kk += 1 + !DIR$ FORCEINLINE + call bielec_integrals_index(i,j,k,l,hash(kk)) + pairs(1,kk) = i + pairs(2,kk) = k + iorder(kk) = kk + enddo + enddo + + logical :: integral_is_in_map + call i8radix_sort(hash,iorder,kk,-1) + call map_exists_many(mo_integrals_map, hash, kk) + + do ll=1,kk + m = iorder(ll) + i=pairs(1,m) + k=pairs(2,m) + out_array(i,k) = (hash(ll) /= 0_8) + enddo + + deallocate(pairs,hash,iorder) +end + integer*8 function get_mo_map_size() implicit none BEGIN_DOC diff --git a/src/CISD/H_apply.irp.f b/src/CISD/H_apply.irp.f index a7dd568f..0df1da38 100644 --- a/src/CISD/H_apply.irp.f +++ b/src/CISD/H_apply.irp.f @@ -3,7 +3,7 @@ BEGIN_SHELL [ /usr/bin/env python ] from generate_h_apply import H_apply -H = H_apply("cisd",openmp=True) +H = H_apply("cisd") print H END_SHELL diff --git a/src/CISD_SC2_selected/H_apply.irp.f b/src/CISD_SC2_selected/H_apply.irp.f index 29248bc5..79668af7 100644 --- a/src/CISD_SC2_selected/H_apply.irp.f +++ b/src/CISD_SC2_selected/H_apply.irp.f @@ -3,7 +3,7 @@ BEGIN_SHELL [ /usr/bin/env python ] from generate_h_apply import * from perturbation import perturbations -s = H_apply("PT2",openmp=True) +s = H_apply("PT2",SingleRef=True) s.set_perturbation("epstein_nesbet_sc2_projected") print s END_SHELL diff --git a/src/CISD_selected/H_apply.irp.f b/src/CISD_selected/H_apply.irp.f index 1a01b53e..91dfb9fc 100644 --- a/src/CISD_selected/H_apply.irp.f +++ b/src/CISD_selected/H_apply.irp.f @@ -4,7 +4,7 @@ from generate_h_apply import * from perturbation import perturbations for perturbation in perturbations: - s = H_apply("cisd_selection_"+perturbation,openmp=True) + s = H_apply("cisd_selection_"+perturbation) s.set_selection_pt2(perturbation) print s END_SHELL diff --git a/src/Dets/H_apply_template.f b/src/Dets/H_apply_template.f index 5eb25ed5..39f85033 100644 --- a/src/Dets/H_apply_template.f +++ b/src/Dets/H_apply_template.f @@ -88,6 +88,8 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene integer :: i_a,j_a,i_b,j_b,k_a,l_a,k_b,l_b integer(bit_kind) :: test(N_int,2) double precision :: accu + logical, allocatable :: array_pairs(:,:) + allocate(array_pairs(mo_tot_num,mo_tot_num)) accu = 0.d0 do ispin=1,2 other_spin = iand(ispin,1)+1 @@ -128,6 +130,9 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene ! hole = a^(+)_j_a(ispin) a_i_a(ispin)|key_in> : mono exc :: orb(i_a,ispin) --> orb(j_a,ispin) hole_save = hole + + ! Build array of the non-zero integrals of second excitation + $filter_integrals if (ispin == 1) then integer :: jjj @@ -140,9 +145,11 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene j_b = occ_particle_tmp(jjj,other_spin) ASSERT (j_b > 0) ASSERT (j_b <= mo_tot_num) - i+= 1 - ib_jb_pairs(1,i) = i_b - ib_jb_pairs(2,i) = j_b + if (array_pairs(i_b,j_b)) then + i+= 1 + ib_jb_pairs(1,i) = i_b + ib_jb_pairs(2,i) = j_b + endif enddo enddo ib_jb_pairs(1,0) = i @@ -186,9 +193,11 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene ASSERT (j_b > 0) ASSERT (j_b <= mo_tot_num) if (j_b <= j_a) cycle - i+= 1 - ib_jb_pairs(1,i) = i_b - ib_jb_pairs(2,i) = j_b + if (array_pairs(i_b,j_b)) then + i+= 1 + ib_jb_pairs(1,i) = i_b + ib_jb_pairs(2,i) = j_b + endif enddo enddo ib_jb_pairs(1,0) = i @@ -229,7 +238,7 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene key,hole, particle, hole_tmp,& particle_tmp, occ_particle, & occ_hole, occ_particle_tmp,& - occ_hole_tmp) + occ_hole_tmp,array_pairs) $omp_end_parallel $finalization end @@ -262,6 +271,7 @@ subroutine $subroutine_monoexc(key_in, hole_1,particl_1,i_generator $parameters integer :: N_elec_in_key_hole_2(2),N_elec_in_key_part_2(2) integer, allocatable :: ia_ja_pairs(:,:,:) + logical, allocatable :: array_pairs(:,:) double precision :: diag_H_mat_elem integer :: iproc integer(omp_lock_kind), save :: lck, ifirst=0 diff --git a/src/Full_CI/H_apply.irp.f b/src/Full_CI/H_apply.irp.f index 63ea8252..76c046ae 100644 --- a/src/Full_CI/H_apply.irp.f +++ b/src/Full_CI/H_apply.irp.f @@ -2,11 +2,11 @@ use bitmasks BEGIN_SHELL [ /usr/bin/env python ] from generate_h_apply import * -s = H_apply("FCI",openmp=True) +s = H_apply("FCI") s.set_selection_pt2("epstein_nesbet_2x2") print s -s = H_apply("FCI_PT2",openmp=True) +s = H_apply("FCI_PT2") s.set_perturbation("epstein_nesbet_2x2") print s diff --git a/src/MP2/H_apply.irp.f b/src/MP2/H_apply.irp.f index e3e5e602..2f15391f 100644 --- a/src/MP2/H_apply.irp.f +++ b/src/MP2/H_apply.irp.f @@ -3,7 +3,7 @@ BEGIN_SHELL [ /usr/bin/env python ] from generate_h_apply import * from perturbation import perturbations -s = H_apply("mp2",openmp=True) +s = H_apply("mp2") s.set_perturbation("Moller_plesset") print s END_SHELL diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90 index cd739660..a92bd868 100644 --- a/src/Utils/map_module.f90 +++ b/src/Utils/map_module.f90 @@ -495,7 +495,7 @@ subroutine map_get_many(map, key, value, sze) integer(key_kind), intent(in) :: key(sze) real(integral_kind), intent(out) :: value(sze) integer :: i - integer(map_size_kind) :: idx_cache, idx_cache_prev + integer(map_size_kind) :: idx_cache integer(cache_map_size_kind) :: ibegin, iend integer(cache_map_size_kind), allocatable :: idx(:) !DIR$ ATTRIBUTES ALIGN : 64 :: idx @@ -518,6 +518,43 @@ subroutine map_get_many(map, key, value, sze) deallocate(idx) end +subroutine map_exists_many(map, key, sze) + use map_module + implicit none + type (map_type), intent(inout) :: map + integer, intent(in) :: sze + integer(key_kind), intent(inout):: key(sze) + integer :: i + integer(map_size_kind) :: idx_cache, idx_cache_prev + integer(cache_map_size_kind) :: ibegin, iend + integer(cache_map_size_kind), allocatable :: idx(:) + !DIR$ ATTRIBUTES ALIGN : 64 :: idx + + idx_cache_prev = -1_map_size_kind + allocate(idx(sze)) + do i=1,sze + idx_cache = ishft(key(i),map_shift) + iend = map%map(idx_cache)%n_elements + if (idx_cache == idx_cache_prev) then + if ((idx(i-1) > 0_cache_map_size_kind).and.(idx(i-1) < iend)) then + if ((key(i) == key(i-1)+1).and.(map%map(idx_cache)%key(idx(i-1))+1) == key(i)) then + idx(i) = idx(i-1)+1 + cycle + endif + endif + endif + !DIR$ FORCEINLINE + call search_key_big_interval(key(i),map%map(idx_cache)%key, iend, idx(i), 1, iend) + idx_cache_prev = idx_cache + enddo + do i=1,sze + idx_cache = ishft(key(i),map_shift) + if (idx(i) <= 0) then + key(i) = 0_key_kind + endif + enddo + deallocate(idx) +end subroutine search_key_big(key,X,sze,idx) use map_module