10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-10 04:58:25 +01:00

Davidson thresold

This commit is contained in:
Anthony Scemama 2016-05-02 10:31:57 +02:00
parent 68247ee14a
commit d35d555eda
7 changed files with 89 additions and 95 deletions

View File

@ -9,7 +9,7 @@ print s
s = H_apply_zmq("FCI_PT2") s = H_apply_zmq("FCI_PT2")
s.set_perturbation("epstein_nesbet_2x2") s.set_perturbation("epstein_nesbet_2x2")
#s.unset_openmp() s.unset_openmp()
print s print s
s = H_apply("FCI_no_skip") s = H_apply("FCI_no_skip")

View File

@ -350,12 +350,12 @@ subroutine push_pt2(zmq_socket_push,pt2,norm_pert,H_pert_diag,N_st,task_id)
endif endif
! Activate if zmq_socket_push is a REQ ! Activate if zmq_socket_push is a REQ
integer :: idummy ! integer :: idummy
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) ! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
if (rc /= 4) then ! if (rc /= 4) then
print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' ! print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
stop 'error' ! stop 'error'
endif ! endif
end end
subroutine pull_pt2(zmq_socket_pull,pt2,norm_pert,H_pert_diag,N_st,n,task_id) subroutine pull_pt2(zmq_socket_pull,pt2,norm_pert,H_pert_diag,N_st,n,task_id)
@ -411,11 +411,11 @@ subroutine pull_pt2(zmq_socket_pull,pt2,norm_pert,H_pert_diag,N_st,n,task_id)
endif endif
! Activate if zmq_socket_pull is a REP ! Activate if zmq_socket_pull is a REP
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) ! rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
if (rc /= 4) then ! if (rc /= 4) then
print *, irp_here, 'f77_zmq_send( zmq_socket_pull, 0, 4, 0)' ! print *, irp_here, 'f77_zmq_send( zmq_socket_pull, 0, 4, 0)'
stop 'error' ! stop 'error'
endif ! endif
end end

View File

@ -315,6 +315,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
double precision, intent(inout) :: u_in(dim_in,N_st) double precision, intent(inout) :: u_in(dim_in,N_st)
double precision, intent(out) :: energies(N_st) double precision, intent(out) :: energies(N_st)
integer :: sze_8
integer :: iter integer :: iter
integer :: i,j,k,l,m integer :: i,j,k,l,m
logical :: converged logical :: converged
@ -334,6 +335,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
double precision :: to_print(2,N_st) double precision :: to_print(2,N_st)
double precision :: cpu, wall double precision :: cpu, wall
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, R, Wt, y, h, lambda
call write_time(iunit) call write_time(iunit)
@ -362,12 +364,15 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
enddo enddo
write(iunit,'(A)') trim(write_buffer) write(iunit,'(A)') trim(write_buffer)
integer, external :: align_double
sze_8 = align_double(sze)
allocate( & allocate( &
kl_pairs(2,N_st*(N_st+1)/2), & kl_pairs(2,N_st*(N_st+1)/2), &
W(sze,N_st,davidson_sze_max), & W(sze_8,N_st,davidson_sze_max), &
Wt(sze), & Wt(sze), &
U(sze,N_st,davidson_sze_max), & U(sze_8,N_st,davidson_sze_max), &
R(sze,N_st), & R(sze_8,N_st), &
h(N_st,davidson_sze_max,N_st,davidson_sze_max), & h(N_st,davidson_sze_max,N_st,davidson_sze_max), &
y(N_st,davidson_sze_max,N_st,davidson_sze_max), & y(N_st,davidson_sze_max,N_st,davidson_sze_max), &
lambda(N_st*davidson_sze_max)) lambda(N_st*davidson_sze_max))
@ -473,7 +478,10 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
! Express eigenvectors of h in the determinant basis ! Express eigenvectors of h in the determinant basis
! -------------------------------------------------- ! --------------------------------------------------
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(k,i,l,iter2) SHARED(U,W,R,y,iter,lambda,N_st,sze)
do k=1,N_st do k=1,N_st
!$OMP DO
do i=1,sze do i=1,sze
U(i,k,iter+1) = 0.d0 U(i,k,iter+1) = 0.d0
W(i,k,iter+1) = 0.d0 W(i,k,iter+1) = 0.d0
@ -484,7 +492,9 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,Nint,iun
enddo enddo
enddo enddo
enddo enddo
!$OMP END DO
enddo enddo
!$OMP END PARALLEL
! Compute residual vector ! Compute residual vector
! ----------------------- ! -----------------------

View File

@ -1608,12 +1608,11 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
integer :: i,j,k,l, jj,ii integer :: i,j,k,l, jj,ii
integer :: i0, j0 integer :: i0, j0
integer, allocatable :: shortcut(:), sort_idx(:) integer, allocatable :: shortcut(:,:), sort_idx(:,:)
integer(bit_kind), allocatable :: sorted(:,:), version(:,:) integer(bit_kind), allocatable :: sorted(:,:,:), version(:,:,:)
integer(bit_kind) :: sorted_i(Nint) integer(bit_kind) :: sorted_i(Nint)
integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi integer :: sh, sh2, ni, exa, ext, org_i, org_j, endi
double precision :: local_threshold
ASSERT (Nint > 0) ASSERT (Nint > 0)
@ -1621,104 +1620,83 @@ subroutine H_u_0(v_0,u_0,H_jj,n,keys_tmp,Nint)
ASSERT (n>0) ASSERT (n>0)
PROVIDE ref_bitmask_energy davidson_criterion PROVIDE ref_bitmask_energy davidson_criterion
allocate (shortcut(0:n+1), sort_idx(n), sorted(Nint,n), version(Nint,n)) allocate (shortcut(0:n+1,2), sort_idx(n,2), sorted(Nint,n,2), version(Nint,n,2))
v_0 = 0.d0 v_0 = 0.d0
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint) call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
!$OMP PARALLEL DEFAULT(NONE) & !$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,local_threshold,sorted_i)& !$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i)&
!$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,threshold_davidson,sorted,shortcut,sort_idx,version) !$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,sorted,shortcut,sort_idx,version)
allocate(vt(n)) allocate(vt(n))
Vt = 0.d0 Vt = 0.d0
!$OMP DO SCHEDULE(dynamic) !$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0) do sh=1,shortcut(0,1)
do sh2=1,sh do sh2=sh,shortcut(0,1)
exa = 0 exa = 0
do ni=1,Nint do ni=1,Nint
exa = exa + popcnt(xor(version(ni,sh), version(ni,sh2))) exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
end do end do
if(exa > 2) then if(exa > 2) then
cycle cycle
end if end if
do i=shortcut(sh),shortcut(sh+1)-1 do i=shortcut(sh,1),shortcut(sh+1,1)-1
org_i = sort_idx(i) org_i = sort_idx(i,1)
local_threshold = threshold_davidson - dabs(u_0(org_i))
if(sh==sh2) then if(sh==sh2) then
endi = i-1 endi = i-1
else else
endi = shortcut(sh2+1)-1 endi = shortcut(sh2+1,1)-1
end if end if
do ni=1,Nint do ni=1,Nint
sorted_i(ni) = sorted(ni,i) sorted_i(ni) = sorted(ni,i,1)
enddo enddo
do j=shortcut(sh2),endi do j=shortcut(sh2,1),endi
org_j = sort_idx(j) org_j = sort_idx(j,1)
if ( dabs(u_0(org_j)) > local_threshold ) then
ext = exa ext = exa
do ni=1,Nint do ni=1,Nint
ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j))) ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
end do end do
if(ext <= 4) then if(ext <= 4) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j) vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i) vt (org_j) = vt (org_j) + hij*u_0(org_i)
endif endif
endif
enddo enddo
enddo enddo
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO NOWAIT
!$OMP CRITICAL
do i=1,n
v_0(i) = v_0(i) + vt(i)
enddo
!$OMP END CRITICAL
deallocate(vt)
!$OMP END PARALLEL
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut, version, n, Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(i,hij,j,k,jj,vt,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,local_threshold)&
!$OMP SHARED(n,H_jj,u_0,keys_tmp,Nint,v_0,threshold_davidson,sorted,shortcut,sort_idx,version)
allocate(vt(n))
Vt = 0.d0
!$OMP DO SCHEDULE(dynamic) !$OMP DO SCHEDULE(dynamic)
do sh=1,shortcut(0) do sh=1,shortcut(0,2)
do i=shortcut(sh),shortcut(sh+1)-1 do i=shortcut(sh,2),shortcut(sh+1,2)-1
org_i = sort_idx(i) org_i = sort_idx(i,2)
local_threshold = threshold_davidson - dabs(u_0(org_i)) do j=shortcut(sh,2),i-1
do j=shortcut(sh),i-1 org_j = sort_idx(j,2)
org_j = sort_idx(j)
if ( dabs(u_0(org_j)) > local_threshold ) then
ext = 0 ext = 0
do ni=1,Nint do ni=1,Nint
ext = ext + popcnt(xor(sorted(ni,i), sorted(ni,j))) ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
end do end do
if(ext == 4) then if(ext == 4) then
call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij) call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
vt (org_i) = vt (org_i) + hij*u_0(org_j) vt (org_i) = vt (org_i) + hij*u_0(org_j)
vt (org_j) = vt (org_j) + hij*u_0(org_i) vt (org_j) = vt (org_j) + hij*u_0(org_i)
end if end if
end if
end do end do
end do end do
enddo enddo
!$OMP END DO !$OMP END DO NOWAIT
!$OMP CRITICAL !$OMP CRITICAL
do i=1,n do i=n,1,-1
v_0(i) = v_0(i) + vt(i) v_0(i) = v_0(i) + vt(i)
enddo enddo
!$OMP END CRITICAL !$OMP END CRITICAL
deallocate(vt) deallocate(vt)
!$OMP END PARALLEL !$OMP END PARALLEL

View File

@ -57,16 +57,18 @@ subroutine push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value,
endif endif
! Activate is zmq_socket_push is a REQ ! Activate is zmq_socket_push is a REQ
integer :: idummy ! integer :: idummy
rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) ! rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0)
if (rc /= 4) then ! if (rc /= 4) then
print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' ! print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)'
stop 'error' ! stop 'error'
endif ! endif
end end
subroutine ao_bielec_integrals_in_map_slave(thread,iproc) subroutine ao_bielec_integrals_in_map_slave(thread,iproc)
use map_module use map_module
use f77_zmq use f77_zmq
@ -107,7 +109,7 @@ subroutine ao_bielec_integrals_in_map_slave(thread,iproc)
call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, 0) call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, 0)
enddo enddo
call compute_ao_integrals_jl(l,l,n_integrals,buffer_i,buffer_value) call compute_ao_integrals_jl(l,l,n_integrals,buffer_i,buffer_value)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_integrals) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id) call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id)
enddo enddo
@ -127,7 +129,7 @@ subroutine pull_integrals(zmq_socket_pull, n_integrals, buffer_i, buffer_value,
BEGIN_DOC BEGIN_DOC
! How the collector pulls the computed integrals ! How the collector pulls the computed integrals
END_DOC END_DOC
integer(ZMQ_PTR), intent(out) :: zmq_socket_pull integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
integer, intent(out) :: n_integrals integer, intent(out) :: n_integrals
integer(key_kind), intent(out) :: buffer_i(*) integer(key_kind), intent(out) :: buffer_i(*)
real(integral_kind), intent(out) :: buffer_value(*) real(integral_kind), intent(out) :: buffer_value(*)
@ -167,11 +169,11 @@ subroutine pull_integrals(zmq_socket_pull, n_integrals, buffer_i, buffer_value,
endif endif
! Activate if zmq_socket_pull is a REP ! Activate if zmq_socket_pull is a REP
rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0) ! rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
if (rc /= 4) then ! if (rc /= 4) then
print *, irp_here, ' f77_zmq_send (zmq_socket_pull,...' ! print *, irp_here, ' f77_zmq_send (zmq_socket_pull,...'
stop 'error' ! stop 'error'
endif ! endif
end end

View File

@ -324,6 +324,7 @@ double precision function u_dot_v(u,v,sze)
t3 = t2+t2 t3 = t2+t2
t4 = t3+t2 t4 = t3+t2
u_dot_v = 0.d0 u_dot_v = 0.d0
!DIR$ VECTOR ALWAYS
do i=1,t2 do i=1,t2
u_dot_v = u_dot_v + u(t1+i)*v(t1+i) + u(t2+i)*v(t2+i) + & u_dot_v = u_dot_v + u(t1+i)*v(t1+i) + u(t2+i)*v(t2+i) + &
u(t3+i)*v(t3+i) + u(t4+i)*v(t4+i) u(t3+i)*v(t3+i) + u(t4+i)*v(t4+i)
@ -359,6 +360,7 @@ double precision function u_dot_u(u,sze)
! u_dot_u = u_dot_u+u(i)*u(i) ! u_dot_u = u_dot_u+u(i)*u(i)
! enddo ! enddo
!DIR$ VECTOR ALWAYS
do i=1,sze do i=1,sze
u_dot_u = u_dot_u + u(i)*u(i) u_dot_u = u_dot_u + u(i)*u(i)
enddo enddo

View File

@ -217,8 +217,8 @@ function new_zmq_pull_socket()
integer(ZMQ_PTR) :: new_zmq_pull_socket integer(ZMQ_PTR) :: new_zmq_pull_socket
call omp_set_lock(zmq_lock) call omp_set_lock(zmq_lock)
! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL) new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_PULL)
new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP) ! new_zmq_pull_socket = f77_zmq_socket(zmq_context, ZMQ_REP)
call omp_unset_lock(zmq_lock) call omp_unset_lock(zmq_lock)
if (new_zmq_pull_socket == 0_ZMQ_PTR) then if (new_zmq_pull_socket == 0_ZMQ_PTR) then
stop 'Unable to create zmq pull socket' stop 'Unable to create zmq pull socket'
@ -267,8 +267,8 @@ function new_zmq_push_socket(thread)
integer(ZMQ_PTR) :: new_zmq_push_socket integer(ZMQ_PTR) :: new_zmq_push_socket
call omp_set_lock(zmq_lock) call omp_set_lock(zmq_lock)
! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH) new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_PUSH)
new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ) ! new_zmq_push_socket = f77_zmq_socket(zmq_context, ZMQ_REQ)
call omp_unset_lock(zmq_lock) call omp_unset_lock(zmq_lock)
if (new_zmq_push_socket == 0_ZMQ_PTR) then if (new_zmq_push_socket == 0_ZMQ_PTR) then
stop 'Unable to create zmq push socket' stop 'Unable to create zmq push socket'
@ -361,6 +361,8 @@ subroutine end_zmq_pull_socket(zmq_socket_pull)
stop 'error' stop 'error'
endif endif
call sleep(1) ! see https://github.com/zeromq/libzmq/issues/1922
rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,0,4) rc = f77_zmq_setsockopt(zmq_socket_pull,ZMQ_LINGER,0,4)
if (rc /= 0) then if (rc /= 0) then
stop 'Unable to set ZMQ_LINGER on zmq_socket_pull' stop 'Unable to set ZMQ_LINGER on zmq_socket_pull'