Removed parallelism in old selection

This commit is contained in:
Anthony Scemama 2017-12-08 10:48:09 +01:00
parent 2b1d6bc9fc
commit 2472e63bd5
6 changed files with 32 additions and 164 deletions

View File

@ -1 +1 @@
Perturbation Selectors_full SingleRefMethod
Perturbation Selectors_full SingleRefMethod ZMQ

View File

@ -9,7 +9,7 @@ subroutine run
double precision, allocatable :: pt2(:), norm_pert(:)
double precision :: H_pert_diag, E_old
integer :: N_st, iter
PROVIDE Fock_matrix_diag_mo
PROVIDE Fock_matrix_diag_mo H_apply_buffer_allocated
N_st = N_states
allocate (pt2(N_st), norm_pert(N_st))
E_old = HF_energy

View File

@ -8,7 +8,6 @@ copy_buffer
declarations
decls_main
deinit_thread
skip
init_main
filter_integrals
filter2p
@ -56,7 +55,6 @@ parameters
params_main
printout_always
printout_now
skip
subroutine
""".split()
@ -82,25 +80,24 @@ class H_apply(object):
self.energy = "CI_electronic_energy"
self.perturbation = None
self.do_double_exc = do_double_exc
#s["omp_parallel"] = """!$OMP PARALLEL DEFAULT(NONE) &
s["omp_parallel"] = """ PROVIDE elec_num_tab
!$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(i,j,k,l,keys_out,hole,particle, &
!$OMP occ_particle,occ_hole,j_a,k_a,other_spin, &
!$OMP hole_save,ispin,jj,l_a,ib_jb_pairs,array_pairs, &
!$OMP accu,i_a,hole_tmp,particle_tmp,occ_particle_tmp, &
!$OMP occ_hole_tmp,key_idx,i_b,j_b,key,N_elec_in_key_part_1,&
!$OMP N_elec_in_key_hole_1,N_elec_in_key_part_2, &
!$OMP N_elec_in_key_hole_2,ia_ja_pairs,key_union_hole_part) &
!$OMP SHARED(key_in,N_int,elec_num_tab,mo_tot_num, &
!$OMP hole_1, particl_1, hole_2, particl_2, &
!$OMP elec_alpha_num,i_generator) FIRSTPRIVATE(iproc)"""
s["omp_end_parallel"] = "!$OMP END PARALLEL"
s["omp_master"] = "!$OMP MASTER"
s["omp_end_master"] = "!$OMP END MASTER"
s["omp_barrier"] = "!$OMP BARRIER"
s["omp_do"] = "!$OMP DO SCHEDULE (static,1)"
s["omp_enddo"] = "!$OMP ENDDO NOWAIT"
# s["omp_parallel"] = """ PROVIDE elec_num_tab
# !$OMP PARALLEL DEFAULT(SHARED) &
# !$OMP PRIVATE(i,j,k,l,keys_out,hole,particle, &
# !$OMP occ_particle,occ_hole,j_a,k_a,other_spin, &
# !$OMP hole_save,ispin,jj,l_a,ib_jb_pairs,array_pairs, &
# !$OMP accu,i_a,hole_tmp,particle_tmp,occ_particle_tmp, &
# !$OMP occ_hole_tmp,key_idx,i_b,j_b,key,N_elec_in_key_part_1,&
# !$OMP N_elec_in_key_hole_1,N_elec_in_key_part_2, &
# !$OMP N_elec_in_key_hole_2,ia_ja_pairs,key_union_hole_part) &
# !$OMP SHARED(key_in,N_int,elec_num_tab,mo_tot_num, &
# !$OMP hole_1, particl_1, hole_2, particl_2, &
# !$OMP elec_alpha_num,i_generator) FIRSTPRIVATE(iproc)"""
# s["omp_end_parallel"] = "!$OMP END PARALLEL"
# s["omp_master"] = "!$OMP MASTER"
# s["omp_end_master"] = "!$OMP END MASTER"
# s["omp_barrier"] = "!$OMP BARRIER"
# s["omp_do"] = "!$OMP DO SCHEDULE (static,1)"
# s["omp_enddo"] = "!$OMP ENDDO"
d = { True : '.True.', False : '.False.'}
s["do_mono_excitations"] = d[do_mono_exc]
@ -289,11 +286,6 @@ class H_apply(object):
"""
def unset_skip(self):
self["skip"] = """
"""
def set_filter_2h_2p(self):
self["filter2h2p_double"] = """
if (is_a_two_holes_two_particles(key)) cycle
@ -400,9 +392,9 @@ class H_apply(object):
pt2_old(k) = pt2(k)
enddo
"""
self.data["omp_parallel"] += """&
!$OMP SHARED(N_st) PRIVATE(e_2_pert_buffer,coef_pert_buffer) &
!$OMP PRIVATE(sum_e_2_pert, sum_norm_pert, sum_H_pert_diag)"""
# self.data["omp_parallel"] += """&
# !$OMP SHARED(N_st) PRIVATE(e_2_pert_buffer,coef_pert_buffer) &
# !$OMP PRIVATE(sum_e_2_pert, sum_norm_pert, sum_H_pert_diag)"""
def set_selection_pt2(self,pert):
if self.selection_pt2 is not None:
@ -434,22 +426,8 @@ class H_apply(object):
call fill_H_apply_buffer_selection(key_idx,keys_out,e_2_pert_buffer, &
coef_pert_buffer,N_st,N_int,iproc,select_max_out)
"""
self.data["omp_parallel"] += """&
!$OMP REDUCTION (max:select_max_out)"""
self.data["skip"] = """
if (i_generator < size_select_max) then
if (select_max(i_generator) < selection_criterion_min*selection_criterion_factor) then
! OMP CRITICAL
do k=1,N_st
norm_psi(k) = norm_psi(k) + psi_coef_generators(i_generator,k)*psi_coef_generators(i_generator,k)
pt2_old(k) = 0.d0
enddo
! OMP END CRITICAL
cycle
endif
select_max(i_generator) = 0.d0
endif
"""
# self.data["omp_parallel"] += """&
# !$OMP REDUCTION (max:select_max_out)"""
def unset_openmp(self):
@ -498,15 +476,4 @@ class H_apply_zmq(H_apply):
def set_selection_pt2(self,pert):
H_apply.set_selection_pt2(self,pert)
self.data["skip"] = """
if (i_generator < size_select_max) then
if (select_max(i_generator) < selection_criterion_min*selection_criterion_factor) then
do k=1,N_st
pt2(k) = select_max(i_generator)
enddo
cycle
endif
select_max(i_generator) = 0.d0
endif
"""

View File

@ -14,14 +14,6 @@ subroutine $subroutine_diexc(key_in, key_prev, hole_1,particl_1, hole_2, particl
$declarations
! print *, "bbbbbbbbbbbbbbb"
! call debug_det(key_in, N_int)
! call debug_det(hole_1, N_int)
! call debug_det(hole_2, N_int)
! call debug_det(particl_1, N_int)
! call debug_det(particl_2, N_int)
! print *, "eeeeeeeeeeeeeeee"
highest = 0
do k=1,N_int*bit_kind_size
status(k,1) = 0
@ -43,32 +35,6 @@ subroutine $subroutine_diexc(key_in, key_prev, hole_1,particl_1, hole_2, particl
end do
end do
! ! GEL D'ELECTRONS
! ! nt = 0
! do i=1,i_generator-1
! if(key_in(1,1) == key_prev(1,1,i)) then
! tmp = xor(key_in(1,2), key_prev(1,2,i))
! if(popcnt(tmp) == 2) then
! ns = 1+trailz(iand(tmp, key_in(1,2)))
! ! if(status(ns, 2) /= 0) then
! ! nt += 1
! ! end if
! status(ns, 2) = 0
! end if
! else if(key_in(1,2) == key_prev(1,2,i)) then
! tmp = xor(key_in(1,1), key_prev(1,1,i))
! if(popcnt(tmp) == 2) then
! ns = 1+trailz(iand(tmp, key_in(1,1)))
! ! if(status(ns, 1) /= 0) then
! ! nt += 1
! ! end if
! status(ns, 1) = 0
! end if
! end if
! end do
! ! print *, "nt", nt, i_generator
do sp=1,2
do p1=1,highest
if(status(p1, sp) == 0) then

View File

@ -9,7 +9,7 @@ subroutine $subroutine($params_main)
$decls_main
integer :: i_generator, nmax
integer :: i_generator
double precision :: wall_0, wall_1
integer(bit_kind), allocatable :: mask(:,:,:)
integer :: ispin, k
@ -20,15 +20,11 @@ subroutine $subroutine($params_main)
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators
nmax = mod( N_det_generators,nproc )
call wall_time(wall_0)
iproc = 0
allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_tot_num+1) )
do i_generator=1,nmax
$skip
do i_generator=1,N_det_generators
! Compute diagonal of the Fock matrix
call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
@ -78,67 +74,6 @@ subroutine $subroutine($params_main)
deallocate( mask, fock_diag_tmp )
!$OMP PARALLEL DEFAULT(SHARED) &
!$OMP PRIVATE(i_generator,wall_1,wall_0,ispin,k,mask,iproc,fock_diag_tmp)
call wall_time(wall_0)
!$ iproc = omp_get_thread_num()
allocate( mask(N_int,2,6), fock_diag_tmp(2,mo_tot_num+1) )
!$OMP DO SCHEDULE(dynamic,1)
do i_generator=nmax+1,N_det_generators
$skip
! Compute diagonal of the Fock matrix
call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
! Create bit masks for holes and particles
do ispin=1,2
do k=1,N_int
mask(k,ispin,s_hole) = &
iand(generators_bitmask(k,ispin,s_hole,i_bitmask_gen), &
psi_det_generators(k,ispin,i_generator) )
mask(k,ispin,s_part) = &
iand(generators_bitmask(k,ispin,s_part,i_bitmask_gen), &
not(psi_det_generators(k,ispin,i_generator)) )
mask(k,ispin,d_hole1) = &
iand(generators_bitmask(k,ispin,d_hole1,i_bitmask_gen), &
psi_det_generators(k,ispin,i_generator) )
mask(k,ispin,d_part1) = &
iand(generators_bitmask(k,ispin,d_part1,i_bitmask_gen), &
not(psi_det_generators(k,ispin,i_generator)) )
mask(k,ispin,d_hole2) = &
iand(generators_bitmask(k,ispin,d_hole2,i_bitmask_gen), &
psi_det_generators(k,ispin,i_generator) )
mask(k,ispin,d_part2) = &
iand(generators_bitmask(k,ispin,d_part2,i_bitmask_gen), &
not (psi_det_generators(k,ispin,i_generator)) )
enddo
enddo
if($do_double_excitations)then
call $subroutine_diexc(psi_det_generators(1,1,i_generator), &
psi_det_generators(1,1,1), &
mask(1,1,d_hole1), mask(1,1,d_part1), &
mask(1,1,d_hole2), mask(1,1,d_part2), &
fock_diag_tmp, i_generator, iproc $params_post)
endif
if($do_mono_excitations)then
call $subroutine_monoexc(psi_det_generators(1,1,i_generator), &
mask(1,1,s_hole ), mask(1,1,s_part ), &
fock_diag_tmp, i_generator, iproc $params_post)
endif
!$OMP CRITICAL
call wall_time(wall_1)
$printout_always
if (wall_1 - wall_0 > 2.d0) then
$printout_now
wall_0 = wall_1
endif
!$OMP END CRITICAL
enddo
!$OMP END DO
deallocate( mask, fock_diag_tmp )
!$OMP END PARALLEL
$copy_buffer
$generate_psi_guess

View File

@ -22,8 +22,8 @@ subroutine $subroutine($params_main)
$initialization
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators
integer(ZMQ_PTR), external :: new_zmq_pair_socket, zmq_socket_pull
integer(ZMQ_PTR) :: zmq_socket_pair
integer(ZMQ_PTR), external :: new_zmq_pair_socket
integer(ZMQ_PTR) :: zmq_socket_pair, zmq_socket_pull
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
double precision, allocatable :: pt2_generators(:,:), norm_pert_generators(:,:)
@ -41,10 +41,10 @@ subroutine $subroutine($params_main)
if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
stop 'Unable to put psi on ZMQ server'
endif
if (zmq_put_N_det_generators(zmq_to_qp_run_socket, worker_id) == -1) then
if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_det_generators on ZMQ server'
endif
if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, worker_id) == -1) then
if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_det_selectors on ZMQ server'
endif
if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then
@ -67,7 +67,7 @@ subroutine $subroutine($params_main)
PROVIDE nproc N_states
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i) &
!$OMP SHARED(zmq_socket_pair,N_states, pt2_generators, norm_pert_generators, H_pert_diag_generators, n, task_id, i_generator) &
!$OMP SHARED(zmq_socket_pair,N_states, pt2_generators, norm_pert_generators, H_pert_diag_generators, n, task_id, i_generator,zmq_socket_pull) &
!$OMP num_threads(nproc+1)
i = omp_get_thread_num()
if (i == 0) then