From 2472e63bd514b354fd85d856cde815c53f58e007 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Dec 2017 10:48:09 +0100 Subject: [PATCH] Removed parallelism in old selection --- plugins/MP2/NEEDED_CHILDREN_MODULES | 2 +- plugins/MP2/mp2.irp.f | 2 +- scripts/generate_h_apply.py | 79 +++++++---------------- src/Determinants/H_apply.template.f | 34 ---------- src/Determinants/H_apply_nozmq.template.f | 69 +------------------- src/Determinants/H_apply_zmq.template.f | 10 +-- 6 files changed, 32 insertions(+), 164 deletions(-) diff --git a/plugins/MP2/NEEDED_CHILDREN_MODULES b/plugins/MP2/NEEDED_CHILDREN_MODULES index d26e4dee..bdf4b736 100644 --- a/plugins/MP2/NEEDED_CHILDREN_MODULES +++ b/plugins/MP2/NEEDED_CHILDREN_MODULES @@ -1 +1 @@ -Perturbation Selectors_full SingleRefMethod +Perturbation Selectors_full SingleRefMethod ZMQ diff --git a/plugins/MP2/mp2.irp.f b/plugins/MP2/mp2.irp.f index d4721c71..2ea5fd14 100644 --- a/plugins/MP2/mp2.irp.f +++ b/plugins/MP2/mp2.irp.f @@ -9,7 +9,7 @@ subroutine run double precision, allocatable :: pt2(:), norm_pert(:) double precision :: H_pert_diag, E_old integer :: N_st, iter - PROVIDE Fock_matrix_diag_mo + PROVIDE Fock_matrix_diag_mo H_apply_buffer_allocated N_st = N_states allocate (pt2(N_st), norm_pert(N_st)) E_old = HF_energy diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py index a7e95e06..da1aac5a 100755 --- a/scripts/generate_h_apply.py +++ b/scripts/generate_h_apply.py @@ -8,7 +8,6 @@ copy_buffer declarations decls_main deinit_thread -skip init_main filter_integrals filter2p @@ -56,7 +55,6 @@ parameters params_main printout_always printout_now -skip subroutine """.split() @@ -82,25 +80,24 @@ class H_apply(object): self.energy = "CI_electronic_energy" self.perturbation = None self.do_double_exc = do_double_exc - #s["omp_parallel"] = """!$OMP PARALLEL DEFAULT(NONE) & - s["omp_parallel"] = """ PROVIDE elec_num_tab - !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(i,j,k,l,keys_out,hole,particle, & - !$OMP occ_particle,occ_hole,j_a,k_a,other_spin, & - !$OMP hole_save,ispin,jj,l_a,ib_jb_pairs,array_pairs, & - !$OMP accu,i_a,hole_tmp,particle_tmp,occ_particle_tmp, & - !$OMP occ_hole_tmp,key_idx,i_b,j_b,key,N_elec_in_key_part_1,& - !$OMP N_elec_in_key_hole_1,N_elec_in_key_part_2, & - !$OMP N_elec_in_key_hole_2,ia_ja_pairs,key_union_hole_part) & - !$OMP SHARED(key_in,N_int,elec_num_tab,mo_tot_num, & - !$OMP hole_1, particl_1, hole_2, particl_2, & - !$OMP elec_alpha_num,i_generator) FIRSTPRIVATE(iproc)""" - s["omp_end_parallel"] = "!$OMP END PARALLEL" - s["omp_master"] = "!$OMP MASTER" - s["omp_end_master"] = "!$OMP END MASTER" - s["omp_barrier"] = "!$OMP BARRIER" - s["omp_do"] = "!$OMP DO SCHEDULE (static,1)" - s["omp_enddo"] = "!$OMP ENDDO NOWAIT" +# s["omp_parallel"] = """ PROVIDE elec_num_tab +# !$OMP PARALLEL DEFAULT(SHARED) & +# !$OMP PRIVATE(i,j,k,l,keys_out,hole,particle, & +# !$OMP occ_particle,occ_hole,j_a,k_a,other_spin, & +# !$OMP hole_save,ispin,jj,l_a,ib_jb_pairs,array_pairs, & +# !$OMP accu,i_a,hole_tmp,particle_tmp,occ_particle_tmp, & +# !$OMP occ_hole_tmp,key_idx,i_b,j_b,key,N_elec_in_key_part_1,& +# !$OMP N_elec_in_key_hole_1,N_elec_in_key_part_2, & +# !$OMP N_elec_in_key_hole_2,ia_ja_pairs,key_union_hole_part) & +# !$OMP SHARED(key_in,N_int,elec_num_tab,mo_tot_num, & +# !$OMP hole_1, particl_1, hole_2, particl_2, & +# !$OMP elec_alpha_num,i_generator) FIRSTPRIVATE(iproc)""" +# s["omp_end_parallel"] = "!$OMP END PARALLEL" +# s["omp_master"] = "!$OMP MASTER" +# s["omp_end_master"] = "!$OMP END MASTER" +# s["omp_barrier"] = "!$OMP BARRIER" +# s["omp_do"] = "!$OMP DO SCHEDULE (static,1)" +# s["omp_enddo"] = "!$OMP ENDDO" d = { True : '.True.', False : '.False.'} s["do_mono_excitations"] = d[do_mono_exc] @@ -289,11 +286,6 @@ class H_apply(object): """ - def unset_skip(self): - self["skip"] = """ - """ - - def set_filter_2h_2p(self): self["filter2h2p_double"] = """ if (is_a_two_holes_two_particles(key)) cycle @@ -400,9 +392,9 @@ class H_apply(object): pt2_old(k) = pt2(k) enddo """ - self.data["omp_parallel"] += """& - !$OMP SHARED(N_st) PRIVATE(e_2_pert_buffer,coef_pert_buffer) & - !$OMP PRIVATE(sum_e_2_pert, sum_norm_pert, sum_H_pert_diag)""" +# self.data["omp_parallel"] += """& +# !$OMP SHARED(N_st) PRIVATE(e_2_pert_buffer,coef_pert_buffer) & +# !$OMP PRIVATE(sum_e_2_pert, sum_norm_pert, sum_H_pert_diag)""" def set_selection_pt2(self,pert): if self.selection_pt2 is not None: @@ -434,22 +426,8 @@ class H_apply(object): call fill_H_apply_buffer_selection(key_idx,keys_out,e_2_pert_buffer, & coef_pert_buffer,N_st,N_int,iproc,select_max_out) """ - self.data["omp_parallel"] += """& - !$OMP REDUCTION (max:select_max_out)""" - self.data["skip"] = """ - if (i_generator < size_select_max) then - if (select_max(i_generator) < selection_criterion_min*selection_criterion_factor) then - ! OMP CRITICAL - do k=1,N_st - norm_psi(k) = norm_psi(k) + psi_coef_generators(i_generator,k)*psi_coef_generators(i_generator,k) - pt2_old(k) = 0.d0 - enddo - ! OMP END CRITICAL - cycle - endif - select_max(i_generator) = 0.d0 - endif - """ +# self.data["omp_parallel"] += """& +# !$OMP REDUCTION (max:select_max_out)""" def unset_openmp(self): @@ -498,15 +476,4 @@ class H_apply_zmq(H_apply): def set_selection_pt2(self,pert): H_apply.set_selection_pt2(self,pert) - self.data["skip"] = """ - if (i_generator < size_select_max) then - if (select_max(i_generator) < selection_criterion_min*selection_criterion_factor) then - do k=1,N_st - pt2(k) = select_max(i_generator) - enddo - cycle - endif - select_max(i_generator) = 0.d0 - endif - """ diff --git a/src/Determinants/H_apply.template.f b/src/Determinants/H_apply.template.f index 5f59fe84..81d0d465 100644 --- a/src/Determinants/H_apply.template.f +++ b/src/Determinants/H_apply.template.f @@ -14,14 +14,6 @@ subroutine $subroutine_diexc(key_in, key_prev, hole_1,particl_1, hole_2, particl $declarations -! print *, "bbbbbbbbbbbbbbb" -! call debug_det(key_in, N_int) -! call debug_det(hole_1, N_int) -! call debug_det(hole_2, N_int) -! call debug_det(particl_1, N_int) -! call debug_det(particl_2, N_int) -! print *, "eeeeeeeeeeeeeeee" - highest = 0 do k=1,N_int*bit_kind_size status(k,1) = 0 @@ -43,32 +35,6 @@ subroutine $subroutine_diexc(key_in, key_prev, hole_1,particl_1, hole_2, particl end do end do -! ! GEL D'ELECTRONS -! ! nt = 0 -! do i=1,i_generator-1 -! if(key_in(1,1) == key_prev(1,1,i)) then -! tmp = xor(key_in(1,2), key_prev(1,2,i)) -! if(popcnt(tmp) == 2) then -! ns = 1+trailz(iand(tmp, key_in(1,2))) -! ! if(status(ns, 2) /= 0) then -! ! nt += 1 -! ! end if -! status(ns, 2) = 0 -! end if -! else if(key_in(1,2) == key_prev(1,2,i)) then -! tmp = xor(key_in(1,1), key_prev(1,1,i)) -! if(popcnt(tmp) == 2) then -! ns = 1+trailz(iand(tmp, key_in(1,1))) -! ! if(status(ns, 1) /= 0) then -! ! nt += 1 -! ! end if -! status(ns, 1) = 0 -! end if -! end if -! end do -! ! print *, "nt", nt, i_generator - - do sp=1,2 do p1=1,highest if(status(p1, sp) == 0) then diff --git a/src/Determinants/H_apply_nozmq.template.f b/src/Determinants/H_apply_nozmq.template.f index 0c319fe3..a8ea9b76 100644 --- a/src/Determinants/H_apply_nozmq.template.f +++ b/src/Determinants/H_apply_nozmq.template.f @@ -9,7 +9,7 @@ subroutine $subroutine($params_main) $decls_main - integer :: i_generator, nmax + integer :: i_generator double precision :: wall_0, wall_1 integer(bit_kind), allocatable :: mask(:,:,:) integer :: ispin, k @@ -20,15 +20,11 @@ subroutine $subroutine($params_main) PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators - nmax = mod( N_det_generators,nproc ) - call wall_time(wall_0) iproc = 0 allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_tot_num+1) ) - do i_generator=1,nmax - - $skip + do i_generator=1,N_det_generators ! Compute diagonal of the Fock matrix call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) @@ -78,67 +74,6 @@ subroutine $subroutine($params_main) deallocate( mask, fock_diag_tmp ) - !$OMP PARALLEL DEFAULT(SHARED) & - !$OMP PRIVATE(i_generator,wall_1,wall_0,ispin,k,mask,iproc,fock_diag_tmp) - call wall_time(wall_0) - !$ iproc = omp_get_thread_num() - allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_tot_num+1) ) - !$OMP DO SCHEDULE(dynamic,1) - do i_generator=nmax+1,N_det_generators - $skip - - ! Compute diagonal of the Fock matrix - call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) - - ! Create bit masks for holes and particles - do ispin=1,2 - do k=1,N_int - mask(k,ispin,s_hole) = & - iand(generators_bitmask(k,ispin,s_hole,i_bitmask_gen), & - psi_det_generators(k,ispin,i_generator) ) - mask(k,ispin,s_part) = & - iand(generators_bitmask(k,ispin,s_part,i_bitmask_gen), & - not(psi_det_generators(k,ispin,i_generator)) ) - mask(k,ispin,d_hole1) = & - iand(generators_bitmask(k,ispin,d_hole1,i_bitmask_gen), & - psi_det_generators(k,ispin,i_generator) ) - mask(k,ispin,d_part1) = & - iand(generators_bitmask(k,ispin,d_part1,i_bitmask_gen), & - not(psi_det_generators(k,ispin,i_generator)) ) - mask(k,ispin,d_hole2) = & - iand(generators_bitmask(k,ispin,d_hole2,i_bitmask_gen), & - psi_det_generators(k,ispin,i_generator) ) - mask(k,ispin,d_part2) = & - iand(generators_bitmask(k,ispin,d_part2,i_bitmask_gen), & - not (psi_det_generators(k,ispin,i_generator)) ) - enddo - enddo - - if($do_double_excitations)then - call $subroutine_diexc(psi_det_generators(1,1,i_generator), & - psi_det_generators(1,1,1), & - mask(1,1,d_hole1), mask(1,1,d_part1), & - mask(1,1,d_hole2), mask(1,1,d_part2), & - fock_diag_tmp, i_generator, iproc $params_post) - endif - if($do_mono_excitations)then - call $subroutine_monoexc(psi_det_generators(1,1,i_generator), & - mask(1,1,s_hole ), mask(1,1,s_part ), & - fock_diag_tmp, i_generator, iproc $params_post) - endif - !$OMP CRITICAL - call wall_time(wall_1) - $printout_always - if (wall_1 - wall_0 > 2.d0) then - $printout_now - wall_0 = wall_1 - endif - !$OMP END CRITICAL - enddo - !$OMP END DO - deallocate( mask, fock_diag_tmp ) - !$OMP END PARALLEL - $copy_buffer $generate_psi_guess diff --git a/src/Determinants/H_apply_zmq.template.f b/src/Determinants/H_apply_zmq.template.f index 7bc5795f..57499e2a 100644 --- a/src/Determinants/H_apply_zmq.template.f +++ b/src/Determinants/H_apply_zmq.template.f @@ -22,8 +22,8 @@ subroutine $subroutine($params_main) $initialization PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators - integer(ZMQ_PTR), external :: new_zmq_pair_socket, zmq_socket_pull - integer(ZMQ_PTR) :: zmq_socket_pair + integer(ZMQ_PTR), external :: new_zmq_pair_socket + integer(ZMQ_PTR) :: zmq_socket_pair, zmq_socket_pull integer(ZMQ_PTR) :: zmq_to_qp_run_socket double precision, allocatable :: pt2_generators(:,:), norm_pert_generators(:,:) @@ -41,10 +41,10 @@ subroutine $subroutine($params_main) if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then stop 'Unable to put psi on ZMQ server' endif - if (zmq_put_N_det_generators(zmq_to_qp_run_socket, worker_id) == -1) then + if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then stop 'Unable to put N_det_generators on ZMQ server' endif - if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, worker_id) == -1) then + if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then stop 'Unable to put N_det_selectors on ZMQ server' endif if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then @@ -67,7 +67,7 @@ subroutine $subroutine($params_main) PROVIDE nproc N_states !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(i) & - !$OMP SHARED(zmq_socket_pair,N_states, pt2_generators, norm_pert_generators, H_pert_diag_generators, n, task_id, i_generator) & + !$OMP SHARED(zmq_socket_pair,N_states, pt2_generators, norm_pert_generators, H_pert_diag_generators, n, task_id, i_generator,zmq_socket_pull) & !$OMP num_threads(nproc+1) i = omp_get_thread_num() if (i == 0) then