diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py index 6d3a207d..d39273b2 100755 --- a/scripts/generate_h_apply.py +++ b/scripts/generate_h_apply.py @@ -118,7 +118,6 @@ class H_apply(object): double precision :: sum_H_pert_diag(N_st) double precision, allocatable :: e_2_pert_buffer(:,:) double precision, allocatable :: coef_pert_buffer(:,:) - !$ call omp_init_lock(lck) ASSERT (Nint == N_int) """ self.data["init_thread"] = """ @@ -149,7 +148,6 @@ class H_apply(object): sum_norm_pert,sum_H_pert_diag,N_st,N_int) """%(pert,) self.data["finalization"] = """ - !$ call omp_destroy_lock(lck) """ self.data["copy_buffer"] = "" self.data["generate_psi_guess"] = "" diff --git a/src/Dets/H_apply_template.f b/src/Dets/H_apply_template.f index 78c95f5f..48b117cc 100644 --- a/src/Dets/H_apply_template.f +++ b/src/Dets/H_apply_template.f @@ -28,7 +28,11 @@ subroutine $subroutine_diexc(key_in, hole_1,particl_1, hole_2, particl_2, i_gene integer, allocatable :: ia_ja_pairs(:,:,:) double precision :: diag_H_mat_elem integer :: iproc - integer(omp_lock_kind) :: lck + integer(omp_lock_kind), save :: lck, ifirst=0 + if (ifirst == 0) then + ifirst=1 +!$ call omp_init_lock(lck) + endif PROVIDE H_apply_threshold $initialization @@ -246,7 +250,11 @@ subroutine $subroutine_monoexc(key_in, hole_1,particl_1,i_generator $parameters integer, allocatable :: ia_ja_pairs(:,:,:) double precision :: diag_H_mat_elem integer :: iproc - integer(omp_lock_kind) :: lck + integer(omp_lock_kind), save :: lck, ifirst=0 + if (ifirst == 0) then + ifirst=1 +!$ call omp_init_lock(lck) + endif PROVIDE H_apply_threshold $initialization @@ -335,6 +343,7 @@ end subroutine $subroutine($params_main) implicit none + use omp_lib use bitmasks BEGIN_DOC ! Calls H_apply on the HF determinant and selects all connected single and double @@ -343,12 +352,23 @@ subroutine $subroutine($params_main) $decls_main + integer :: i_generator, k, nmax + double precision :: wall_0, wall_1, wall_2, d + integer(omp_lock_kind) :: lck + PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map N_det_selectors psi_generators - integer :: i_generator, k - double precision :: wall_0, wall_1, wall_2 + PROVIDE psi_det_sorted_bit coef_hf_selector + nmax = ( N_det_generators/nproc ) *nproc call wall_time(wall_1) - do i_generator=1,N_det_generators + !$ call omp_init_lock(lck) + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(i_generator,wall_2) + !$OMP DO SCHEDULE(guided) + do i_generator=1,nmax + if (abort_here) then + cycle + endif call $subroutine_diexc(psi_generators(1,1,i_generator), & generators_bitmask(1,1,d_hole1,i_bitmask_gen), & generators_bitmask(1,1,d_part1,i_bitmask_gen), & @@ -359,9 +379,33 @@ subroutine $subroutine($params_main) generators_bitmask(1,1,s_hole ,i_bitmask_gen), & generators_bitmask(1,1,s_part ,i_bitmask_gen), & i_generator $params_post) + !$ call omp_set_lock(lck) + call wall_time(wall_2) + $printout_always + if (wall_2 - wall_0 > 2.d0) then + wall_0 = wall_2 + $printout_now + endif + !$ call omp_unset_lock(lck) + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + !$ call omp_destroy_lock(lck) + + do i_generator=nmax+1,N_det_generators if (abort_here) then exit endif + call $subroutine_diexc(psi_generators(1,1,i_generator), & + generators_bitmask(1,1,d_hole1,i_bitmask_gen), & + generators_bitmask(1,1,d_part1,i_bitmask_gen), & + generators_bitmask(1,1,d_hole2,i_bitmask_gen), & + generators_bitmask(1,1,d_part2,i_bitmask_gen), & + i_generator $params_post) + call $subroutine_monoexc(psi_generators(1,1,i_generator), & + generators_bitmask(1,1,s_hole ,i_bitmask_gen), & + generators_bitmask(1,1,s_part ,i_bitmask_gen), & + i_generator $params_post) call wall_time(wall_2) $printout_always if (wall_2 - wall_0 > 2.d0) then