From 39618c4300f8aaf64fac22317f99e632051422c0 Mon Sep 17 00:00:00 2001 From: Yann GARNIRON Date: Thu, 26 May 2016 13:52:48 +0200 Subject: [PATCH] corrected mrsc2 for large systems --- config/ifort.cfg | 6 ++-- plugins/MRCC_Utils/mrcc_utils.irp.f | 51 +++++++++++++++++++++++----- plugins/mrcepa0/dressing.irp.f | 49 +++++++++++++------------- plugins/mrcepa0/dressing_slave.irp.f | 30 ++++++++-------- src/Determinants/davidson.irp.f | 4 +-- 5 files changed, 90 insertions(+), 50 deletions(-) diff --git a/config/ifort.cfg b/config/ifort.cfg index 2b2fe0a2..acbfde1f 100644 --- a/config/ifort.cfg +++ b/config/ifort.cfg @@ -31,14 +31,14 @@ OPENMP : 1 ; Append OpenMP flags # -ftz : Flushes denormal results to zero # [OPT] -FCFLAGS : -xSSE4.2 -O2 -ip -ftz -g +FCFLAGS : -C -xAVX -O2 -ip -ftz -g -traceback # Profiling flags ################# # [PROFILE] FC : -p -g -FCFLAGS : -xSSE4.2 -O2 -ip -ftz +FCFLAGS : -xAVX -O2 -ip -ftz # Debugging flags ################# @@ -51,7 +51,7 @@ FCFLAGS : -xSSE4.2 -O2 -ip -ftz # [DEBUG] FC : -g -traceback -FCFLAGS : -xSSE2 -C -fpe0 +FCFLAGS : -xAVX -C -fpe0 IRPF90_FLAGS : --openmp # OpenMP flags diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f index 42058145..cfd6481e 100644 --- a/plugins/MRCC_Utils/mrcc_utils.irp.f +++ b/plugins/MRCC_Utils/mrcc_utils.irp.f @@ -102,7 +102,7 @@ END_PROVIDER if (ihpsi_current(k) == 0.d0) then ihpsi_current(k) = 1.d-32 endif - lambda_mrcc(k,i) = min(0.d0,psi_non_ref_coef(i,k)/ihpsi_current(k) ) + lambda_mrcc(k,i) = min(1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) ) lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) if (lambda_pert / lambda_mrcc(k,i) < 0.5d0) then i_pert_count += 1 @@ -356,7 +356,7 @@ integer function searchDet(dets, det, n, Nint) h = n do while(.true.) searchDet = (l+h)/2 - c = detCmp(dets(1,1,searchDet), det(:,:), Nint) + c = detCmp(dets(1,1,searchDet), det(1,1), Nint) if(c == 0) return if(c == 1) then h = searchDet-1 @@ -386,7 +386,7 @@ integer function searchExc(excs, exc, n) h = n do searchExc = (l+h)/2 - c = excCmp(excs(1,searchExc), exc(:)) + c = excCmp(excs(1,searchExc), exc(1)) if(c == 0) return if(c == 1) then h = searchExc-1 @@ -407,7 +407,7 @@ subroutine sort_det(key, idx, N_key, Nint) integer, intent(in) :: Nint, N_key integer(8),intent(inout) :: key(Nint,2,N_key) - integer,intent(out) :: idx(N_key) + integer,intent(inout) :: idx(N_key) integer(8) :: tmp(Nint, 2) integer :: tmpidx,i,ni @@ -557,9 +557,44 @@ subroutine dec_exc(exc, h1, h2, p1, p2) end subroutine - BEGIN_PROVIDER [ integer*2, hh_exists, (4, N_det_ref * N_det_non_ref) ] -&BEGIN_PROVIDER [ integer, hh_shortcut, (0:N_det_ref * N_det_non_ref + 1) ] -&BEGIN_PROVIDER [ integer*2, pp_exists, (4, N_det_ref * N_det_non_ref) ] + BEGIN_PROVIDER [ integer, N_hh_exists ] +&BEGIN_PROVIDER [ integer, N_pp_exists ] +&BEGIN_PROVIDER [ integer, N_ex_exists ] + implicit none + integer :: exc(0:2, 2, 2), degree, n, on, s, l, i + integer*2 :: h1, h2, p1, p2 + double precision :: phase + logical,allocatable :: hh(:,:) , pp(:,:) + + allocate(hh(0:mo_tot_num*2, 0:mo_tot_num*2)) + allocate(pp(0:mo_tot_num*2, 0:mo_tot_num*2)) + hh = .false. + pp = .false. + N_hh_exists = 0 + N_pp_exists = 0 + N_ex_exists = 0 + + n = 0 + do i=1, N_det_ref + do l=1, N_det_non_ref + call get_excitation(psi_ref(1,1,i), psi_non_ref(1,1,l), exc, degree, phase, N_int) + if(degree == -1) cycle + call dec_exc(exc, h1, h2, p1, p2) + N_ex_exists += 1 + if(.not. hh(h1,h2)) N_hh_exists = N_hh_exists + 1 + if(.not. pp(p1,p2)) N_pp_exists = N_pp_exists + 1 + hh(h1,h2) = .true. + pp(p1,p2) = .true. + end do + end do + N_pp_exists = min(N_ex_exists, N_pp_exists * N_hh_exists) +END_PROVIDER + + + + BEGIN_PROVIDER [ integer*2, hh_exists, (4, N_hh_exists) ] +&BEGIN_PROVIDER [ integer, hh_shortcut, (0:N_hh_exists + 1) ] +&BEGIN_PROVIDER [ integer*2, pp_exists, (4, N_pp_exists) ] implicit none integer*2,allocatable :: num(:,:) integer :: exc(0:2, 2, 2), degree, n, on, s, l, i @@ -567,7 +602,7 @@ end subroutine double precision :: phase logical, external :: excEq - allocate(num(4, N_det_ref * N_det_non_ref)) + allocate(num(4, N_ex_exists+1)) hh_shortcut = 0 hh_exists = 0 diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f index 639b62e4..d2e88d96 100644 --- a/plugins/mrcepa0/dressing.irp.f +++ b/plugins/mrcepa0/dressing.irp.f @@ -8,7 +8,7 @@ use bitmasks implicit none integer :: gen, h, p, i_state, n, t, i, h1, h2, p1, p2, s1, s2, iproc integer(bit_kind) :: mask(N_int, 2), omask(N_int, 2) - integer(bit_kind) :: buf(N_int, 2, N_det_non_ref) + integer(bit_kind),allocatable :: buf(:,:,:) logical :: ok logical, external :: detEq @@ -16,27 +16,29 @@ use bitmasks delta_ii_mrcc = 0d0 i_state = 1 provide hh_shortcut psi_det_size lambda_mrcc - !$OMP PARALLEL DO default(none) schedule(dynamic) & !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) & !$OMP shared(N_states, N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc) & !$OMP private(h, n, mask, omask, buf, ok, iproc) do gen= 1, N_det_generators + allocate(buf(N_int, 2, N_det_non_ref)) iproc = omp_get_thread_num() + 1 print *, gen, "/", N_det_generators do h=1, hh_shortcut(0) call apply_hole(psi_det_generators(1,1,gen), hh_exists(1, h), mask, ok, N_int) if(.not. ok) cycle omask = 0_bit_kind - if(hh_exists(1, h) /= 0) omask = mask + !if(hh_exists(1, h) /= 0) omask = mask n = 1 do p=hh_shortcut(h), hh_shortcut(h+1)-1 call apply_particle(mask, pp_exists(1, p), buf(1,1,n), ok, N_int) if(ok) n = n + 1 + if(n > N_det_non_ref) stop "MRCC..." end do n = n - 1 if(n /= 0) call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc,gen,n,buf,N_int,omask) end do + deallocate(buf) end do !$OMP END PARALLEL DO END_PROVIDER @@ -58,11 +60,10 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer(bit_kind), intent(in) :: det_buffer(Nint,2,n_selected) integer :: i,j,k,l,m - integer :: degree_alpha(psi_det_size) - integer :: idx_alpha(0:psi_det_size) + integer,allocatable :: idx_alpha(:), degree_alpha(:) logical :: good, fullMatch - integer(bit_kind) :: tq(Nint,2,n_selected) + integer(bit_kind),allocatable :: tq(:,:,:) integer :: N_tq, c_ref ,degree double precision :: hIk, hla, hIl, dIk(N_states), dka(N_states), dIa(N_states) @@ -76,7 +77,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe integer :: i_state, k_sd, l_sd, i_I, i_alpha integer(bit_kind),allocatable :: miniList(:,:,:) - integer(bit_kind) :: key_mask(Nint, 2) + integer(bit_kind),intent(in) :: key_mask(Nint, 2) integer,allocatable :: idx_miniList(:) integer :: N_miniList, ni, leng double precision, allocatable :: hij_cache(:) @@ -88,8 +89,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe leng = max(N_det_generators, N_det_non_ref) - allocate(miniList(Nint, 2, leng), idx_minilist(leng), hij_cache(N_det_non_ref)) - + allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref)) + allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size)) !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint) call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint) @@ -373,10 +374,15 @@ END_PROVIDER use bitmasks implicit none - integer(bit_kind) :: det_noactive(N_int, 2, N_det_non_ref), nonactive_sorb(N_int,2), det(N_int, 2) - integer i, II, j, k, n, ni, idx(N_det_non_ref), shortcut(0:N_det_non_ref+1), blok, degree + integer(bit_kind),allocatable :: det_noactive(:,:,:) + integer, allocatable :: shortcut(:), idx(:) + integer(bit_kind) :: nonactive_sorb(N_int,2), det(N_int, 2) + integer i, II, j, k, n, ni, blok, degree logical, external :: detEq + allocate(det_noactive(N_int, 2, N_det_non_ref)) + allocate(idx(N_det_non_ref), shortcut(0:N_det_non_ref+1)) + print *, "pre start" active_sorb(:,:) = 0_8 nonactive_sorb(:,:) = not(0_8) @@ -507,12 +513,10 @@ END_PROVIDER end do end do !!$OMP END PARALLEL DO - print *, npres npre=0 do i=1,N_det_ref npre += npres(i) end do - print *, npre !stop do i=1,N_det_ref do j=1,i @@ -609,7 +613,8 @@ end function double precision :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1) double precision :: contrib, HIIi, HJk, wall integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ - integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2), sortRef(N_int,2,N_det_ref) + integer(bit_kind) :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2) + integer(bit_kind),allocatable :: sortRef(:,:,:) integer, allocatable :: idx_sorted_bit(:) integer, external :: get_index_in_psi_det_sorted_bit, searchDet logical, external :: is_in_wavefunction, detEq @@ -618,10 +623,7 @@ end function integer*8, save :: notf = 0 call wall_time(wall) - print *, "cepa0", wall -! provide det_cepa0_active delta_cas lambda_mrcc -! provide mo_bielec_integrals_in_map - allocate(idx_sorted_bit(N_det)) + allocate(idx_sorted_bit(N_det), sortRef(N_int,2,N_det_ref)) sortRef(:,:,:) = det_ref_active(:,:,:) call sort_det(sortRef, sortRefIdx, N_det_ref, N_int) @@ -842,10 +844,10 @@ subroutine filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_m integer :: i,j,k,m logical :: is_in_wavefunction integer :: degree(psi_det_size) - integer :: idx(0:psi_det_size) + integer,allocatable :: idx(:) logical :: good - integer(bit_kind), intent(out) :: tq(Nint,2,n_selected) + integer(bit_kind), intent(inout) :: tq(Nint,2,n_selected) !! intent(out) integer, intent(out) :: N_tq integer :: nt,ni @@ -854,7 +856,7 @@ subroutine filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_m integer(bit_kind),intent(in) :: miniList(Nint,2,N_det_generators) integer,intent(in) :: N_miniList - + allocate(idx(0:psi_det_size)) N_tq = 0 i_loop : do i=1,N_selected @@ -897,10 +899,10 @@ subroutine filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microl integer :: i,j,k,m logical :: is_in_wavefunction integer :: degree(psi_det_size) - integer :: idx(0:psi_det_size) + integer,allocatable :: idx(:) logical :: good - integer(bit_kind), intent(out) :: tq(Nint,2,n_selected) + integer(bit_kind), intent(inout) :: tq(Nint,2,n_selected) !! intent(out) integer, intent(out) :: N_tq integer :: nt,ni @@ -914,6 +916,7 @@ subroutine filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microl integer :: mobiles(2), smallerlist + allocate(idx(0:psi_det_size)) N_tq = 0 i_loop : do i=1,N_selected diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f index 3491ba7f..08099341 100644 --- a/plugins/mrcepa0/dressing_slave.irp.f +++ b/plugins/mrcepa0/dressing_slave.irp.f @@ -42,17 +42,18 @@ subroutine mrsc2_dressing_slave(thread,iproc) integer :: i_state, i, i_I, J, k, k2, k1, kk, ll, degree, degree2, m, l, deg, ni, m2 - integer :: idx(N_det_non_ref, 2), n(2) + integer :: n(2) integer :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn logical :: ok double precision :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al double precision :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states) - double precision :: contrib, wall, iwall, dleat(N_states,N_det_non_ref,2) + double precision :: contrib, wall, iwall + double precision, allocatable :: dleat(:,:,:) integer, dimension(0:2,2,2) :: exc_iI, exc_Ik, exc_IJ integer(bit_kind) :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt integer, external :: get_index_in_psi_det_sorted_bit, searchDet, detCmp logical, external :: is_in_wavefunction, isInCassd, detEq - integer :: komon(0:N_det_non_ref) + integer,allocatable :: komon(:) logical :: komoned @@ -61,8 +62,8 @@ subroutine mrsc2_dressing_slave(thread,iproc) call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) - allocate (delta(N_states,0:N_det_non_ref, 2)) - + allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2)) + allocate(komon(0:N_det_non_ref)) do call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) @@ -219,12 +220,14 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) integer, intent(in) :: i_I, J integer(ZMQ_PTR), intent(in) :: zmq_socket_push - double precision :: delta(N_states, 0:N_det_non_ref, 2) + double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) integer, intent(in) :: task_id integer :: rc , i_state, i, kk, li - integer :: idx(N_det_non_ref,2), n(2) + integer,allocatable :: idx(:,:) + integer ::n(2) logical :: ok + allocate(idx(N_det_non_ref,2)) rc = f77_zmq_send( zmq_socket_push, i_I, 4, ZMQ_SNDMORE) if (rc /= 4) then print *, irp_here, 'f77_zmq_send( zmq_socket_push, i_I, 4, ZMQ_SNDMORE)' @@ -317,9 +320,9 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2) integer, intent(out) :: task_id integer :: rc , i, kk - integer,intent(out) :: idx(N_det_non_ref, 2) + integer,intent(inout) :: idx(N_det_non_ref,2) logical :: ok - + rc = f77_zmq_recv( zmq_socket_pull, i_I, 4, ZMQ_SNDMORE) if (rc /= 4) then print *, irp_here, 'f77_zmq_recv( zmq_socket_pull, i_I, 4, ZMQ_SNDMORE)' @@ -397,7 +400,7 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) integer :: task_id, more integer :: I_i, J, l, i_state, n(2), kk - integer :: idx(N_det_non_ref,2) + integer,allocatable :: idx(:,:) delta_ii_(:,:) = 0d0 delta_ij_(:,:,:) = 0d0 @@ -406,10 +409,11 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) zmq_socket_pull = new_zmq_pull_socket() allocate ( delta(N_states,0:N_det_non_ref,2) ) - + + allocate(idx(N_det_non_ref,2)) more = 1 do while (more == 1) - + call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) @@ -453,8 +457,6 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_) enddo - print *, "-------------" , delta_ii_(1,:) - print *, "dfdf", delta_ij_(1,10,:) deallocate( delta ) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) diff --git a/src/Determinants/davidson.irp.f b/src/Determinants/davidson.irp.f index c78a3826..3d074563 100644 --- a/src/Determinants/davidson.irp.f +++ b/src/Determinants/davidson.irp.f @@ -241,8 +241,8 @@ subroutine sort_dets_ab(key, idx, shortcut, N_key, Nint) END_DOC integer, intent(in) :: Nint, N_key integer(bit_kind),intent(inout) :: key(Nint,2,N_key) - integer,intent(out) :: idx(N_key) - integer,intent(out) :: shortcut(0:N_key+1) + integer,intent(inout) :: idx(N_key) + integer,intent(inout) :: shortcut(0:N_key+1) integer(bit_kind) :: tmp(Nint, 2) integer :: tmpidx,i,ni