10
0
mirror of https://github.com/LCPQ/quantum_package synced 2024-12-23 12:56:14 +01:00

Merge branch 'master' of github.com:scemama/quantum_package

This commit is contained in:
Anthony Scemama 2016-10-11 22:46:40 +02:00
commit b4827421ee
6 changed files with 117 additions and 84 deletions

View File

@ -12,7 +12,7 @@ program selection_slave
end end
subroutine provide_everything subroutine provide_everything
PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context mo_mono_elec_integral
! PROVIDE ci_electronic_energy mo_tot_num N_int ! PROVIDE ci_electronic_energy mo_tot_num N_int
end end

View File

@ -810,14 +810,18 @@ END_PROVIDER
if(res < 1d-8) exit if(res < 1d-8) exit
end do end do
! rho_mrcc now contains A.X
norm = 0.d0 norm = 0.d0
do i=1,N_det_non_ref do i=1,N_det_non_ref
norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s) norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s)
enddo enddo
! Norm now contains the norm of A.X
do i=1,N_det_ref do i=1,N_det_ref
norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s) norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
enddo enddo
! Norm now contains the norm of Psi + A.X
print *, k, "res : ", res, "norm : ", sqrt(norm) print *, k, "res : ", res, "norm : ", sqrt(norm)
@ -829,30 +833,46 @@ END_PROVIDER
if (rho_mrcc(i,s) == 0.d0) then if (rho_mrcc(i,s) == 0.d0) then
rho_mrcc(i,s) = 1.d-32 rho_mrcc(i,s) = 1.d-32
endif endif
! f is such that f.\tilde{c_i} = c_i
f = psi_non_ref_coef(i,s) / rho_mrcc(i,s) f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
! Avoid numerical instabilities ! Avoid numerical instabilities
f = min(f,2.d0) f = min(f,2.d0)
f = max(f,-2.d0) f = max(f,-2.d0)
norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s) norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
rho_mrcc(i,s) = f rho_mrcc(i,s) = f
enddo enddo
! norm now contains the norm of |T.Psi_0>
! rho_mrcc now contains the f factors
f = 1.d0/norm f = 1.d0/norm
norm = 0.d0 ! f now contains 1/ <T.Psi_0|T.Psi_0>
do i=1,N_det_non_ref
norm = norm + psi_non_ref_coef(i,s)*psi_non_ref_coef(i,s)
enddo
f = dsqrt(f*norm)
print *, 'norm of |T Psi_0> = ', norm*f*f norm = 1.d0
do i=1,N_det_ref
norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s)
enddo
! norm now contains <Psi_SD|Psi_SD>
f = dsqrt(f*norm)
! f normalises T.Psi_0 such that (1+T)|Psi> is normalized
norm = norm*f
print *, 'norm of |T Psi_0> = ', dsqrt(norm)
do i=1,N_det_ref
norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
enddo
do i=1,N_det_non_ref do i=1,N_det_non_ref
rho_mrcc(i,s) = rho_mrcc(i,s) * f rho_mrcc(i,s) = rho_mrcc(i,s) * f
enddo enddo
! rho_mrcc now contains the product of the scaling factors and the
! normalization constant
end do end do
print *, "done"
END_PROVIDER END_PROVIDER
@ -860,13 +880,17 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
integer :: s,i,j integer :: s,i,j
double precision, external :: get_dij_index double precision, external :: get_dij_index
print *, "computing amplitudes..." print *, "computing amplitudes..."
!$OMP PARALLEL DEFAULT(shared) PRIVATE(s,i,j)
do s=1, N_states do s=1, N_states
!$OMP DO
do i=1, N_det_non_ref do i=1, N_det_non_ref
do j=1, N_det_ref do j=1, N_det_ref
dij(j, i, s) = get_dij_index(j, i, s, N_int) dij(j, i, s) = get_dij_index(j, i, s, N_int)
end do end do
end do end do
!$OMP END DO
end do end do
!$OMP END PARALLEL
print *, "done computing amplitudes" print *, "done computing amplitudes"
END_PROVIDER END_PROVIDER
@ -879,10 +903,9 @@ double precision function get_dij_index(II, i, s, Nint)
double precision :: HIi, phase double precision :: HIi, phase
if(lambda_type == 0) then if(lambda_type == 0) then
get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint)
! get_dij_index = get_dij_index * rho_mrcc(i,s)
call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int) call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
get_dij_index = get_dij_index * rho_mrcc(i,s) * phase get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase
get_dij_index = get_dij_index * rho_mrcc(i,s)
else else
call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi) call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi)
get_dij_index = HIi * lambda_mrcc(s, i) get_dij_index = HIi * lambda_mrcc(s, i)

View File

@ -114,7 +114,7 @@ subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
if (N_det_selectors_read > 0) then if (N_det_selectors_read > 0) then
N_det_selectors = N_det_selectors_read N_det_selectors = N_det_selectors_read
endif endif
SOFT_TOUCH psi_det psi_coef N_det_selectors N_det_generators psi_coef_generators psi_det_generators SOFT_TOUCH psi_det psi_coef N_det_selectors N_det_generators
end end

View File

@ -4,12 +4,12 @@
use bitmasks use bitmasks
use f77_zmq use f77_zmq
subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs) subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
implicit none implicit none
integer , intent(in) :: blockb, blocke, bs integer , intent(in) :: blockb, bs, blockb2, istep
integer , intent(inout) :: N integer , intent(inout) :: N
integer , intent(inout) :: idx(bs) integer , intent(inout) :: idx(bs)
double precision , intent(inout) :: vt(N_states_diag, bs) double precision , intent(inout) :: vt(N_states_diag, bs)
@ -20,11 +20,12 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
double precision :: s2, hij double precision :: s2, hij
logical, allocatable :: wrotten(:) logical, allocatable :: wrotten(:)
allocate(wrotten(bs)) allocate(wrotten(bs))
wrotten = .false. wrotten = .false.
PROVIDE dav_det
do sh = blockb, blocke ii=0
sh = blockb
do sh2=1,shortcut_(0,1) do sh2=1,shortcut_(0,1)
exa = 0 exa = 0
do ni=1,N_int do ni=1,N_int
@ -32,7 +33,7 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
end do end do
if(exa > 2) cycle if(exa > 2) cycle
do i=shortcut_(sh,1),shortcut_(sh+1,1)-1 do i=blockb2+shortcut_(sh,1),shortcut_(sh+1,1)-1, istep
ii = i - shortcut_(blockb,1) + 1 ii = i - shortcut_(blockb,1) + 1
org_i = sort_idx_(i,1) org_i = sort_idx_(i,1)
@ -60,47 +61,43 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
vt (istate,ii) += hij*dav_ut(istate,org_j) vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j) st (istate,ii) += s2*dav_ut(istate,org_j)
enddo enddo
! call daxpy(N_states_diag,hij,dav_ut(1,org_j),1,vt(1,org_i),1)
! call daxpy(N_states_diag,s2, dav_ut(1,org_j),1,st(1,org_i),1)
endif endif
enddo enddo
enddo enddo
enddo enddo
enddo
do sh=blockb,min(blocke, shortcut_(0,2)) if (blockb <= shortcut_(0,2)) then
do sh2=sh, shortcut_(0,2), shortcut_(0,1) sh=blockb
do i=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 do sh2=sh, shortcut_(0,2), shortcut_(0,1)
ii += 1 do i=blockb2+shortcut_(sh2,2),shortcut_(sh2+1,2)-1, istep
org_i = sort_idx_(i,2) ii += 1
do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1 org_i = sort_idx_(i,2)
if(i == j) cycle do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
org_j = sort_idx_(j,2) if(i == j) cycle
ext = 0 org_j = sort_idx_(j,2)
do ni=1,N_int ext = 0
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2))) do ni=1,N_int
ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
end do
if(ext == 4) then
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
vt (:,ii) = 0d0
st (:,ii) = 0d0
end if
do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j)
enddo
end if
end do end do
if(ext == 4) then
call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
if(.not. wrotten(ii)) then
wrotten(ii) = .true.
idx(ii) = org_i
vt (:,ii) = 0d0
st (:,ii) = 0d0
end if
! call daxpy(N_states_diag,hij,dav_ut(1,org_j),1,vt(1,ii),1)
! call daxpy(N_states_diag,s2, dav_ut(1,org_j),1,st(1,ii),1)
do istate=1,N_states_diag
vt (istate,ii) += hij*dav_ut(istate,org_j)
st (istate,ii) += s2*dav_ut(istate,org_j)
enddo
end if
end do end do
end do enddo
enddo endif
enddo
N=0 N=0
do i=1,bs do i=1,bs
@ -112,16 +109,16 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
end if end if
end do end do
end subroutine end subroutine
subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t) subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
implicit none implicit none
integer , intent(in) :: blockb, blocke
integer , intent(in) :: N integer , intent(in) :: N
integer , intent(in) :: idx(N) integer , intent(in) :: idx(N)
double precision , intent(in) :: vt(N_states_diag, N) double precision , intent(in) :: vt(N_states_diag, N)
@ -175,16 +172,16 @@ end subroutine
subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blocke) subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR) ,intent(in) :: zmq_to_qp_run_socket
integer ,intent(in) :: blockb, blocke integer ,intent(in) :: blockb, blockb2, istep
character*(512) :: task character*(512) :: task
write(task,*) blockb, blocke write(task,*) blockb, blockb2, istep
call add_task_to_taskserver(zmq_to_qp_run_socket, task) call add_task_to_taskserver(zmq_to_qp_run_socket, task)
end subroutine end subroutine
@ -213,7 +210,7 @@ subroutine davidson_run_slave(thread,iproc)
integer, intent(in) :: thread, iproc integer, intent(in) :: thread, iproc
integer :: worker_id, task_id, blockb, blocke integer :: worker_id, task_id, blockb
character*(512) :: task character*(512) :: task
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
@ -253,7 +250,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
character*(512) :: task character*(512) :: task
integer :: blockb, blocke integer :: blockb, blockb2, istep
integer :: N integer :: N
integer , allocatable :: idx(:) integer , allocatable :: idx(:)
double precision , allocatable :: vt(:,:) double precision , allocatable :: vt(:,:)
@ -266,10 +263,10 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
do do
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
if(task_id == 0) exit if(task_id == 0) exit
read (task,*) blockb, blocke read (task,*) blockb, blockb2, istep
bs = shortcut_(blocke+1,1) - shortcut_(blockb, 1) bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1)
do i=blockb, shortcut_(0,2), shortcut_(0,1) do i=blockb, shortcut_(0,2), shortcut_(0,1)
do j=i, min(i+blocke-blockb, shortcut_(0,2)) do j=i, min(i, shortcut_(0,2))
bs += shortcut_(j+1,2) - shortcut_(j, 2) bs += shortcut_(j+1,2) - shortcut_(j, 2)
end do end do
end do end do
@ -280,10 +277,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
allocate(st(N_states_diag, bs)) allocate(st(N_states_diag, bs))
end if end if
call davidson_process(blockb, blocke, N, idx, vt, st, bs) call davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id) call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
end do end do
end subroutine end subroutine
@ -387,7 +383,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
integer :: msize integer :: msize
msize = (max_workload + max_blocksize)*2 msize = (1 + max_blocksize)*2
allocate(idx(msize)) allocate(idx(msize))
allocate(vt(N_states_diag, msize)) allocate(vt(N_states_diag, msize))
allocate(st(N_states_diag, msize)) allocate(st(N_states_diag, msize))
@ -402,7 +398,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
do while (more == 1) do while (more == 1)
call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id) call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
!DIR$ FORCEINLINE !DIR$ FORCEINLINE
call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t) call davidson_collect(N, idx, vt, st , v0t, s0t)
call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
end do end do
deallocate(idx,vt,st) deallocate(idx,vt,st)

View File

@ -208,7 +208,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
integer :: N_st_8 integer :: N_st_8
integer, external :: align_double integer, external :: align_double
integer :: workload, blockb, blocke integer :: blockb, blockb2, istep
double precision :: ave_workload, workload
integer(ZMQ_PTR) :: handler integer(ZMQ_PTR) :: handler
@ -234,21 +235,38 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint) call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint) call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
workload = 0
blockb = shortcut(0,1) blockb = shortcut(0,1)
blocke = blockb
call davidson_init(handler,n,N_st_8,ut) call davidson_init(handler,n,N_st_8,ut)
ave_workload = 0.d0
do sh=1,shortcut(0,1)
ave_workload += shortcut(0,1)
ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
do i=sh, shortcut(0,2), shortcut(0,1)
do j=i, min(i, shortcut(0,2))
ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
end do
end do
enddo
ave_workload = ave_workload/dble(shortcut(0,1))
print *, 'Ave workload :', ave_workload
do sh=shortcut(0,1),1,-1 do sh=shortcut(0,1),1,-1
workload += (shortcut(sh+1,1) - shortcut(sh,1))**2 workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
if(workload > max_workload) then do i=sh, shortcut(0,2), shortcut(0,1)
blocke = sh do j=i, min(i, shortcut(0,2))
call davidson_add_task(handler, blocke, blockb) workload += (shortcut(j+1,2) - shortcut(j, 2))**2
blockb = sh-1 end do
workload = 0 end do
end if istep = 1+ int(0.5d0*workload/ave_workload)
do blockb2=0, istep-1
call davidson_add_task(handler, sh, blockb2, istep)
enddo
enddo enddo
if(blockb > 0) call davidson_add_task(handler, 1, blockb)
call davidson_run(handler, v_0, s_0, size(v_0,1)) call davidson_run(handler, v_0, s_0, size(v_0,1))
do istate=1,N_st do istate=1,N_st
@ -262,8 +280,4 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
end end
BEGIN_PROVIDER [ integer, max_workload ]
max_workload = 1000
END_PROVIDER

View File

@ -364,7 +364,7 @@ double precision function get_mo_bielec_integral(i,j,k,l,map)
integer(key_kind) :: idx integer(key_kind) :: idx
type(map_type), intent(inout) :: map type(map_type), intent(inout) :: map
real(integral_kind) :: tmp real(integral_kind) :: tmp
PROVIDE mo_bielec_integrals_in_map PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
if ( (i >= mo_integrals_cache_min) .and. & if ( (i >= mo_integrals_cache_min) .and. &
(j >= mo_integrals_cache_min) .and. & (j >= mo_integrals_cache_min) .and. &
(k >= mo_integrals_cache_min) .and. & (k >= mo_integrals_cache_min) .and. &
@ -393,7 +393,7 @@ double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map)
integer(key_kind) :: idx integer(key_kind) :: idx
type(map_type), intent(inout) :: map type(map_type), intent(inout) :: map
real(integral_kind) :: tmp real(integral_kind) :: tmp
PROVIDE mo_bielec_integrals_in_map PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then
double precision, external :: get_mo_bielec_integral double precision, external :: get_mo_bielec_integral
!DIR$ FORCEINLINE !DIR$ FORCEINLINE
@ -411,7 +411,7 @@ double precision function mo_bielec_integral(i,j,k,l)
END_DOC END_DOC
integer, intent(in) :: i,j,k,l integer, intent(in) :: i,j,k,l
double precision :: get_mo_bielec_integral double precision :: get_mo_bielec_integral
PROVIDE mo_bielec_integrals_in_map PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
!DIR$ FORCEINLINE !DIR$ FORCEINLINE
mo_bielec_integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map) mo_bielec_integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
return return