10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-05 11:00:10 +01:00

Fixed Davidson

This commit is contained in:
Anthony Scemama 2017-04-19 12:24:09 +02:00
parent dd59338083
commit 48f51a71ce
5 changed files with 51 additions and 59 deletions

View File

@ -44,11 +44,7 @@ subroutine davidson_run_slave(thread,iproc)
return return
end if end if
integer :: sze_8 call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
integer, external :: align_double
sze_8 = align_double(N_det)
call davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, sze_8, worker_id)
call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id) call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push,thread) call end_zmq_push_socket(zmq_socket_push,thread)
@ -56,13 +52,13 @@ end subroutine
subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_8, worker_id) subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, worker_id)
use f77_zmq use f77_zmq
implicit none implicit none
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),intent(in) :: zmq_socket_push integer(ZMQ_PTR),intent(in) :: zmq_socket_push
integer,intent(in) :: worker_id, N_st, sze_8 integer,intent(in) :: worker_id, N_st, sze
integer :: task_id integer :: task_id
character*(512) :: msg character*(512) :: msg
integer :: imin, imax, ishift, istep integer :: imin, imax, ishift, istep
@ -93,7 +89,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_
double precision :: energy(N_st) double precision :: energy(N_st)
allocate(v_0(sze_8,N_st), s_0(sze_8,N_st),u_t(N_st,N_det)) allocate(v_0(sze,N_st), s_0(sze,N_st),u_t(N_st,N_det))
read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, & read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, &
N_det_generators_read, N_det_selectors_read N_det_generators_read, N_det_selectors_read
@ -142,7 +138,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze_
call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg)
if(task_id == 0) exit if(task_id == 0) exit
read (msg,*) imin, imax, ishift, istep read (msg,*) imin, imax, ishift, istep
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,imin,imax,ishift,istep) call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,N_det,imin,imax,ishift,istep)
call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
call davidson_push_results(zmq_socket_push, v_0, s_0, task_id) call davidson_push_results(zmq_socket_push, v_0, s_0, task_id)
end do end do
@ -214,15 +210,15 @@ end subroutine
subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze_8, N_st) subroutine davidson_collector(zmq_to_qp_run_socket, v0, s0, sze, N_st)
use f77_zmq use f77_zmq
implicit none implicit none
integer, intent(in) :: sze_8, N_st integer, intent(in) :: sze, N_st
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
double precision ,intent(inout) :: v0(sze_8, N_st) double precision ,intent(inout) :: v0(sze, N_st)
double precision ,intent(inout) :: s0(sze_8, N_st) double precision ,intent(inout) :: s0(sze, N_st)
integer :: more, task_id integer :: more, task_id
@ -254,7 +250,7 @@ end subroutine
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8) subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
use omp_lib use omp_lib
use bitmasks use bitmasks
use f77_zmq use f77_zmq
@ -268,9 +264,9 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8)
! !
! S2_jj : array of <j|S^2|j> ! S2_jj : array of <j|S^2|j>
END_DOC END_DOC
integer, intent(in) :: N_st, sze_8 integer, intent(in) :: N_st, sze
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
double precision, intent(inout):: u_0(sze_8,N_st) double precision, intent(inout):: u_0(sze,N_st)
integer :: i,j,k integer :: i,j,k
integer :: ithread integer :: ithread
double precision, allocatable :: u_t(:,:) double precision, allocatable :: u_t(:,:)
@ -290,7 +286,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8)
integer(ZMQ_PTR) :: zmq_to_qp_run_socket integer(ZMQ_PTR) :: zmq_to_qp_run_socket
if(N_st /= N_states_diag .or. sze_8 < N_det) stop "assert fail in H_S2_u_0_nstates" if(N_st /= N_states_diag .or. sze < N_det) stop "assert fail in H_S2_u_0_nstates"
ASSERT (Nint > 0) ASSERT (Nint > 0)
ASSERT (Nint == N_int) ASSERT (Nint == N_int)
@ -358,7 +354,7 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze_8)
!$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread) !$OMP PARALLEL NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
if (ithread == 0 ) then if (ithread == 0 ) then
call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, sze_8, N_st) call davidson_collector(zmq_to_qp_run_socket, v_0, s_0, N_det, N_st)
else else
call davidson_slave_inproc(1) call davidson_slave_inproc(1)
endif endif

View File

@ -302,7 +302,6 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag) double precision, intent(out) :: energies(N_st_diag)
integer :: sze_8
integer :: iter integer :: iter
integer :: i,j,k,l,m integer :: i,j,k,l,m
logical :: converged logical :: converged
@ -365,13 +364,12 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
write(iunit,'(A)') trim(write_buffer) write(iunit,'(A)') trim(write_buffer)
integer, external :: align_double integer, external :: align_double
sze_8 = align_double(sze)
allocate( & allocate( &
kl_pairs(2,N_st_diag*(N_st_diag+1)/2), & kl_pairs(2,N_st_diag*(N_st_diag+1)/2), &
W(sze_8,N_st_diag,davidson_sze_max), & W(sze,N_st_diag,davidson_sze_max), &
U(sze_8,N_st_diag,davidson_sze_max), & U(sze,N_st_diag,davidson_sze_max), &
R(sze_8,N_st_diag), & R(sze,N_st_diag), &
h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & h(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), &
y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), & y(N_st_diag,davidson_sze_max,N_st_diag,davidson_sze_max), &
residual_norm(N_st_diag), & residual_norm(N_st_diag), &
@ -426,7 +424,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia
! Compute |W_k> = \sum_i |i><i|H|u_k> ! Compute |W_k> = \sum_i |i><i|H|u_k>
! ----------------------------------------- ! -----------------------------------------
call H_u_0_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,N_st_diag,sze_8) call H_u_0_nstates(W(1,1,iter),U(1,1,iter),H_jj,sze,dets_in,Nint,N_st_diag,sze)
! do k=1,N_st ! do k=1,N_st
! if(store_full_H_mat.and.sze.le.n_det_max_stored)then ! if(store_full_H_mat.and.sze.le.n_det_max_stored)then
! call H_u_0_stored(W(1,k,iter),U(1,k,iter),H_matrix_all_dets,sze) ! call H_u_0_stored(W(1,k,iter),U(1,k,iter),H_matrix_all_dets,sze)

View File

@ -84,7 +84,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
double precision, intent(inout) :: u_in(dim_in,N_st_diag) double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag) double precision, intent(out) :: energies(N_st_diag)
integer :: sze_8
integer :: iter integer :: iter
integer :: i,j,k,l,m integer :: i,j,k,l,m
logical :: converged logical :: converged
@ -115,7 +114,6 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
endif endif
integer, external :: align_double integer, external :: align_double
sze_8 = align_double(sze)
itermax = max(3,min(davidson_sze_max, sze/N_st_diag)) itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse
@ -130,8 +128,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
call write_int(iunit,N_st,'Number of states') call write_int(iunit,N_st,'Number of states')
call write_int(iunit,N_st_diag,'Number of states in diagonalization') call write_int(iunit,N_st_diag,'Number of states in diagonalization')
call write_int(iunit,sze,'Number of determinants') call write_int(iunit,sze,'Number of determinants')
r1 = 8.d0*(3.d0*dble(sze_8*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 & r1 = 8.d0*(3.d0*dble(sze*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 &
+ 4.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze_8)))/(1024.d0**3) + 4.d0*(N_st_diag*itermax)+nproc*(4.d0*N_det_alpha_unique+2.d0*N_st_diag*sze)))/(1024.d0**3)
call write_double(iunit, r1, 'Memory(Gb)') call write_double(iunit, r1, 'Memory(Gb)')
write(iunit,'(A)') '' write(iunit,'(A)') ''
write_buffer = '===== ' write_buffer = '===== '
@ -153,9 +151,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
allocate( & allocate( &
! Large ! Large
W(sze_8,N_st_diag*itermax), & W(sze,N_st_diag*itermax), &
U(sze_8,N_st_diag*itermax), & U(sze,N_st_diag*itermax), &
S(sze_8,N_st_diag*itermax), & S(sze,N_st_diag*itermax), &
! Small ! Small
h(N_st_diag*itermax,N_st_diag*itermax), & h(N_st_diag*itermax,N_st_diag*itermax), &
@ -223,9 +221,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
if (distributed_davidson) then if (distributed_davidson) then
call H_S2_u_0_nstates_zmq (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) call H_S2_u_0_nstates_zmq (W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
else else
call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze_8) call H_S2_u_0_nstates_openmp(W(1,shift+1),S(1,shift+1),U(1,shift+1),N_st_diag,sze)
endif endif

View File

@ -1,4 +1,4 @@
subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8) subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -7,16 +7,16 @@ subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze_8)
! n : number of determinants ! n : number of determinants
! !
END_DOC END_DOC
integer, intent(in) :: n,Nint, N_st, sze_8 integer, intent(in) :: n,Nint, N_st, sze
double precision, intent(out) :: e_0(N_st) double precision, intent(out) :: e_0(N_st)
double precision, intent(inout):: u_0(sze_8,N_st) double precision, intent(inout):: u_0(sze,N_st)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision, allocatable :: v_0(:,:), s_0(:,:) double precision, allocatable :: v_0(:,:), s_0(:,:)
double precision :: u_dot_u,u_dot_v,diag_H_mat_elem double precision :: u_dot_u,u_dot_v,diag_H_mat_elem
integer :: i,j integer :: i,j
allocate (v_0(sze_8,N_st),s_0(sze_8,N_st)) allocate (v_0(sze,N_st),s_0(sze,N_st))
call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze_8) call H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
do i=1,N_st do i=1,N_st
e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n) e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
enddo enddo
@ -33,7 +33,7 @@ END_PROVIDER
subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze_8) subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -43,8 +43,8 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze_8)
! !
! istart, iend, ishift, istep are used in ZMQ parallelization. ! istart, iend, ishift, istep are used in ZMQ parallelization.
END_DOC END_DOC
integer, intent(in) :: N_st,sze_8 integer, intent(in) :: N_st,sze
double precision, intent(inout) :: v_0(sze_8,N_st), s_0(sze_8,N_st), u_0(sze_8,N_st) double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st)
integer :: k integer :: k
double precision, allocatable :: u_t(:,:) double precision, allocatable :: u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
@ -61,7 +61,7 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze_8)
size(u_t, 1), & size(u_t, 1), &
N_det, N_st) N_det, N_st)
call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,1,N_det,0,1) call H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,1,N_det,0,1)
deallocate(u_t) deallocate(u_t)
do k=1,N_st do k=1,N_st
@ -74,7 +74,7 @@ end
subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishift,istep) subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze,istart,iend,ishift,istep)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -82,9 +82,9 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
! !
! Default should be 1,N_det,0,1 ! Default should be 1,N_det,0,1
END_DOC END_DOC
integer, intent(in) :: N_st,sze_8,istart,iend,ishift,istep integer, intent(in) :: N_st,sze,istart,iend,ishift,istep
double precision, intent(in) :: u_t(N_st,N_det) double precision, intent(in) :: u_t(N_st,N_det)
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
PROVIDE ref_bitmask_energy PROVIDE ref_bitmask_energy
@ -132,7 +132,7 @@ subroutine H_S2_u_0_nstates_openmp_work(v_0,s_0,u_t,N_st,sze_8,istart,iend,ishif
!$OMP psi_bilinear_matrix_order_transp_reverse, & !$OMP psi_bilinear_matrix_order_transp_reverse, &
!$OMP singles_alpha_csc, singles_alpha_csc_idx, & !$OMP singles_alpha_csc, singles_alpha_csc_idx, &
!$OMP psi_bilinear_matrix_columns_loc, & !$OMP psi_bilinear_matrix_columns_loc, &
!$OMP singles_alpha_size, sze_8, istart, iend, istep, & !$OMP singles_alpha_size, istart, iend, istep, &
!$OMP ishift, idx0, u_t, maxab, v_0, s_0) & !$OMP ishift, idx0, u_t, maxab, v_0, s_0) &
!$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, &
!$OMP lcol, lrow, l_a, l_b, nmax, & !$OMP lcol, lrow, l_a, l_b, nmax, &

View File

@ -1,5 +1,5 @@
subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8) subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -10,9 +10,9 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
! H_jj : array of <j|H|j> ! H_jj : array of <j|H|j>
! !
END_DOC END_DOC
integer, intent(in) :: N_st,n,Nint, sze_8 integer, intent(in) :: N_st,n,Nint, sze
double precision, intent(out) :: v_0(sze_8,N_st) double precision, intent(out) :: v_0(sze,N_st)
double precision, intent(in) :: u_0(sze_8,N_st) double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(in) :: H_jj(n) double precision, intent(in) :: H_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2 double precision :: hij,s2
@ -228,7 +228,7 @@ end
subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
use bitmasks use bitmasks
implicit none implicit none
BEGIN_DOC BEGIN_DOC
@ -240,9 +240,9 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
! !
! S2_jj : array of <j|S^2|j> ! S2_jj : array of <j|S^2|j>
END_DOC END_DOC
integer, intent(in) :: N_st,n,Nint, sze_8 integer, intent(in) :: N_st,n,Nint, sze
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
double precision, intent(in) :: u_0(sze_8,N_st) double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n) double precision, intent(in) :: H_jj(n), S2_jj(n)
integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n)
double precision :: hij,s2 double precision :: hij,s2
@ -457,13 +457,13 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
deallocate (shortcut, sort_idx, sorted, version, ut) deallocate (shortcut, sort_idx, sorted, version, ut)
end end
subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8) subroutine H_S2_u_0_nstates_test(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze)
use bitmasks use bitmasks
implicit none implicit none
integer, intent(in) :: N_st,n,Nint, sze_8 integer, intent(in) :: N_st,n,Nint, sze
integer(bit_kind), intent(in) :: keys_tmp(Nint,2,n) integer(bit_kind), intent(in) :: keys_tmp(Nint,2,n)
double precision, intent(out) :: v_0(sze_8,N_st), s_0(sze_8,N_st) double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
double precision, intent(in) :: u_0(sze_8,N_st) double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(in) :: H_jj(n), S2_jj(n) double precision, intent(in) :: H_jj(n), S2_jj(n)
PROVIDE ref_bitmask_energy PROVIDE ref_bitmask_energy