9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-10-16 04:31:32 +02:00

Added davidson without S2

This commit is contained in:
Anthony Scemama 2021-02-17 00:46:58 +01:00
parent 7aa191523d
commit 554579492b
17 changed files with 2243 additions and 416 deletions

View File

@ -4,3 +4,4 @@ mpi
davidson_undressed
iterations
two_body_rdm
csf

View File

@ -700,7 +700,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
endif
enddo
do_diag = sum(dabs(coef)) > 0.001d0
do_diag = sum(dabs(coef)) > 0.001d0 .and. N_states > 1
double precision :: eigvalues(N_states+1)
double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2)

1
src/csf/NEED Normal file
View File

@ -0,0 +1 @@
determinants

View File

@ -0,0 +1,279 @@
subroutine do_single_excitation_cfg(key_in,key_out,i_hole,i_particle,ok)
use bitmasks
implicit none
BEGIN_DOC
! Applies the single excitation operator to a configuration
! If the excitation is possible, ok is True
END_DOC
integer, intent(in) :: i_hole,i_particle
integer(bit_kind), intent(in) :: key_in(N_int,2)
logical , intent(out) :: ok
integer :: k,j,i
integer(bit_kind) :: mask
integer(bit_kind) :: key_out(N_int,2)
ASSERT (i_hole > 0)
ASSERT (i_particle <= mo_num)
ok = .True.
key_out(:,:) = key_in(:,:)
! hole
k = shiftr(i_hole-1,bit_kind_shift)+1
j = i_hole-shiftl(k-1,bit_kind_shift)-1
mask = ibset(0_bit_kind,j)
! Check if the position j is singly occupied
! 1 -> 0 (SOMO)
! 0 0 (DOMO)
if (iand(key_out(k,1),mask) /= 0_bit_kind) then
key_out(k,1) = ibclr(key_out(k,1),j)
! Check if the position j is doubly occupied
! 0 -> 1 (SOMO)
! 1 0 (DOMO)
else if (iand(key_out(k,2),mask) /= 0_bit_kind) then
key_out(k,1) = ibset(key_out(k,1),j)
key_out(k,2) = ibclr(key_out(k,2),j)
! The position j is unoccupied: Not OK
! 0 -> 0 (SOMO)
! 0 0 (DOMO)
else
ok =.False.
return
endif
! particle
k = shiftr(i_particle-1,bit_kind_shift)+1
j = i_particle-shiftl(k-1,bit_kind_shift)-1
mask = ibset(0_bit_kind,j)
! Check if the position j is singly occupied
! 1 -> 0 (SOMO)
! 0 1 (DOMO)
if (iand(key_out(k,1),mask) /= 0_bit_kind) then
key_out(k,1) = ibclr(key_out(k,1),j)
key_out(k,2) = ibset(key_out(k,2),j)
! Check if the position j is doubly occupied : Not OK
! 0 -> 1 (SOMO)
! 1 0 (DOMO)
else if (iand(key_out(k,2),mask) /= 0_bit_kind) then
ok = .False.
return
! Position at j is unoccupied
! 0 -> 0 (SOMO)
! 0 0 (DOMO)
else
key_out(k,1) = ibset(key_out(k,1),j)
endif
end
subroutine do_single_excitation_cfg_with_type(key_in,key_out,i_hole,i_particle,ex_type,ok)
use bitmasks
implicit none
BEGIN_DOC
! Applies the single excitation operator to a configuration
! Returns the type of excitation in ex_type
! where the following convention is used
! 1 = (SOMO -> SOMO) 1 change in Nsomo
! 2 = (DOMO -> VMO) 1 change in Nsomo
! 3 = (SOMO -> VMO) 0 change in Nsomo
! 4 = (DOMO -> SOMO) 0 change in Nsomo
! If the excitation is possible, ok is True
END_DOC
integer, intent(in) :: i_hole,i_particle
integer(bit_kind), intent(in) :: key_in(N_int,2)
integer , intent(out) :: ex_type
logical , intent(out) :: ok
integer :: k,j,i
integer(bit_kind) :: mask
integer(bit_kind) :: key_out(N_int,2)
logical :: isholeSOMO
logical :: isparticleSOMO
logical :: isholeDOMO
logical :: isparticleVMO
isholeSOMO = .False.
isholeDOMO = .False.
isparticleSOMO = .False.
isparticleVMO = .False.
ASSERT (i_hole > 0)
ASSERT (i_particle <= mo_num)
ok = .True.
key_out(:,:) = key_in(:,:)
! hole
k = shiftr(i_hole-1,bit_kind_shift)+1
j = i_hole-shiftl(k-1,bit_kind_shift)-1
mask = ibset(0_bit_kind,j)
! Check if the position j is singly occupied
! 1 -> 0 (SOMO)
! 0 0 (DOMO)
if (iand(key_out(k,1),mask) /= 0_bit_kind) then
key_out(k,1) = ibclr(key_out(k,1),j)
isholeSOMO = .True.
! Check if the position j is doubly occupied
! 0 -> 1 (SOMO)
! 1 0 (DOMO)
else if (iand(key_out(k,2),mask) /= 0_bit_kind) then
key_out(k,1) = ibset(key_out(k,1),j)
key_out(k,2) = ibclr(key_out(k,2),j)
isholeDOMO = .True.
! The position j is unoccupied: Not OK
! 0 -> 0 (SOMO)
! 0 0 (DOMO)
else
ok =.False.
return
endif
! particle
k = shiftr(i_particle-1,bit_kind_shift)+1
j = i_particle-shiftl(k-1,bit_kind_shift)-1
mask = ibset(0_bit_kind,j)
! Check if the position j is singly occupied
! 1 -> 0 (SOMO)
! 0 1 (DOMO)
if (iand(key_out(k,1),mask) /= 0_bit_kind) then
key_out(k,1) = ibclr(key_out(k,1),j)
key_out(k,2) = ibset(key_out(k,2),j)
isparticleSOMO = .True.
! Check if the position j is doubly occupied : Not OK
! 0 -> 1 (SOMO)
! 1 0 (DOMO)
else if (iand(key_out(k,2),mask) /= 0_bit_kind) then
ok = .False.
return
! Position at j is unoccupied
! 0 -> 0 (SOMO)
! 0 0 (DOMO)
else
key_out(k,1) = ibset(key_out(k,1),j)
isparticleVMO = .True.
endif
if(isholeSOMO) then
! two possibilities
! particle is SOMO or VMO
if(isparticleSOMO) then
! SOMO -> SOMO
ex_type = 1
else
! SOMO -> VMO
ex_type = 3
endif
else
! two possibilities
! particle is SOMO or VMO
if(isparticleSOMO) then
! DOMO -> SOMO
ex_type = 4
else
! DOMO -> VMO
ex_type = 2
endif
endif
end
subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint)
implicit none
use bitmasks
BEGIN_DOC
! Generate all single excitation wrt a configuration
!
! n_singles : on input, max number of singles :
! elec_alpha_num * (mo_num - elec_beta_num)
! on output, number of generated singles
END_DOC
integer, intent(in) :: Nint
integer, intent(inout) :: n_singles
integer(bit_kind), intent(in) :: cfg(Nint,2)
integer(bit_kind), intent(out) :: singles(Nint,2,*)
integer :: i,k, n_singles_ma, i_hole, i_particle
integer(bit_kind) :: single(Nint,2)
logical :: i_ok
n_singles = 0
!TODO
!Make list of Somo and Domo for holes
!Make list of Unocc and Somo for particles
do i_hole = 1, mo_num
do i_particle = 1, mo_num
call do_single_excitation_cfg(cfg,single,i_hole,i_particle,i_ok)
if (i_ok) then
n_singles = n_singles + 1
do k=1,Nint
singles(k,1,n_singles) = single(k,1)
singles(k,2,n_singles) = single(k,2)
enddo
endif
enddo
enddo
end
subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
implicit none
use bitmasks
BEGIN_DOC
! Generate all single excitation wrt a configuration
!
! n_singles : on input, max number of singles :
! elec_alpha_num * (mo_num - elec_beta_num)
! on output, number of generated singles
! ex_type_singles : on output contains type of excitations :
!
END_DOC
integer, intent(in) :: Nint
integer, intent(inout) :: n_singles
integer, intent(out) :: idxs_singles(*)
integer, intent(out) :: ex_type_singles(*)
integer, intent(out) :: pq_singles(2,*)
integer(bit_kind), intent(in) :: cfgInp(Nint,2)
integer(bit_kind), intent(out) :: singles(Nint,2,*)
integer(bit_kind) :: Jdet(Nint,2)
integer :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg
integer(bit_kind) :: single(Nint,2)
logical :: i_ok
n_singles = 0
!TODO
!Make list of Somo and Domo for holes
!Make list of Unocc and Somo for particles
do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
if(i_hole .EQ. i_particle) cycle
addcfg = -1
call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok)
if (i_ok) then
call binary_search_cfg(single,addcfg)
if(addcfg .EQ. -1) cycle
n_singles = n_singles + 1
do k=1,Nint
singles(k,1,n_singles) = single(k,1)
singles(k,2,n_singles) = single(k,2)
ex_type_singles(n_singles) = ex_type
pq_singles(1,n_singles) = i_hole ! p
pq_singles(2,n_singles) = i_particle ! q
idxs_singles(n_singles) = addcfg
enddo
endif
enddo
enddo
end

View File

@ -1 +1 @@
determinants
csf

View File

@ -329,6 +329,7 @@ end subroutine
subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull, v0, s0, sze, N_st)
use f77_zmq
implicit none
@ -377,7 +378,6 @@ end subroutine
subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
use omp_lib
use bitmasks
@ -538,6 +538,10 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
end
BEGIN_PROVIDER [ integer, nthreads_davidson ]
implicit none
BEGIN_DOC

View File

@ -0,0 +1,495 @@
use bitmasks
use f77_zmq
subroutine davidson_nos2_slave_inproc(i)
implicit none
integer, intent(in) :: i
call davidson_nos2_run_slave(1,i)
end
subroutine davidson_nos2_slave_tcp(i)
implicit none
integer, intent(in) :: i
call davidson_nos2_run_slave(0,i)
end
subroutine davidson_nos2_run_slave(thread,iproc)
use f77_zmq
implicit none
BEGIN_DOC
! Slave routine for Davidson's diagonalization.
END_DOC
integer, intent(in) :: thread, iproc
integer :: worker_id, task_id, blockb
integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket
integer(ZMQ_PTR) :: zmq_to_qp_run_socket
integer(ZMQ_PTR), external :: new_zmq_push_socket
integer(ZMQ_PTR) :: zmq_socket_push
integer, external :: connect_to_taskserver
integer :: doexit, send, receive
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
doexit = 0
if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
doexit=1
endif
IRP_IF MPI
include 'mpif.h'
integer :: ierr
send = doexit
call MPI_AllReduce(send, receive, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr)
if (ierr /= MPI_SUCCESS) then
doexit=1
endif
doexit = receive
IRP_ENDIF
if (doexit>0) then
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
return
endif
zmq_socket_push = new_zmq_push_socket(thread)
call davidson_nos2_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id)
integer, external :: disconnect_from_taskserver
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
call sleep(1)
if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
print *, irp_here, ': disconnect failed'
continue
endif
endif
call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
call end_zmq_push_socket(zmq_socket_push)
end subroutine
subroutine davidson_nos2_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, worker_id)
use f77_zmq
implicit none
integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket
integer(ZMQ_PTR),intent(in) :: zmq_socket_push
integer,intent(in) :: worker_id, N_st, sze
integer :: task_id
character*(512) :: msg
integer :: imin, imax, ishift, istep
integer, allocatable :: psi_det_read(:,:,:)
double precision, allocatable :: v_t(:,:), u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t
! Get wave function (u_t)
! -----------------------
integer :: rc, ni, nj
integer*8 :: rc8
integer :: N_states_read, N_det_read, psi_det_size_read
integer :: N_det_selectors_read, N_det_generators_read
integer, external :: zmq_get_dvector
integer, external :: zmq_get_dmatrix
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
allocate(u_t(N_st,N_det))
! Warning : dimensions are modified for efficiency, It is OK since we get the
! full matrix
if (size(u_t,kind=8) < 8388608_8) then
ni = size(u_t)
nj = 1
else
ni = 8388608
nj = int(size(u_t,kind=8)/8388608_8,4) + 1
endif
do while (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1)
print *, 'mpi_rank, N_states_diag, N_det'
print *, mpi_rank, N_states_diag, N_det
stop 'u_t'
enddo
IRP_IF MPI
include 'mpif.h'
integer :: ierr
call broadcast_chunks_double(u_t,size(u_t,kind=8))
IRP_ENDIF
! Run tasks
! ---------
logical :: sending
sending=.False.
allocate(v_t(N_st,N_det))
do
integer, external :: get_task_from_taskserver
integer, external :: task_done_to_taskserver
if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) == -1) then
exit
endif
if(task_id == 0) exit
read (msg,*) imin, imax, ishift, istep
integer :: k
do k=imin,imax
v_t(:,k) = 0.d0
enddo
call H_u_0_nstates_openmp_work(v_t,u_t,N_st,N_det,imin,imax,ishift,istep)
if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
print *, irp_here, 'Unable to send task_done'
endif
call davidson_push_results_async_recv(zmq_socket_push, sending)
call davidson_nos2_push_results_async_send(zmq_socket_push, v_t, imin, imax, task_id, sending)
end do
deallocate(u_t,v_t)
call davidson_push_results_async_recv(zmq_socket_push, sending)
end subroutine
subroutine davidson_nos2_push_results(zmq_socket_push, v_t, imin, imax, task_id)
use f77_zmq
implicit none
BEGIN_DOC
! Push the results of $H | U \rangle$ from a worker to the master.
END_DOC
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
integer ,intent(in) :: task_id, imin, imax
double precision ,intent(in) :: v_t(N_states_diag,N_det)
integer :: rc, sz
integer*8 :: rc8
sz = (imax-imin+1)*N_states_diag
rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
if(rc /= 4) stop 'davidson_nos2_push_results failed to push task_id'
rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE)
if(rc /= 4) stop 'davidson_nos2_push_results failed to push imin'
rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE)
if(rc /= 4) stop 'davidson_nos2_push_results failed to push imax'
rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE)
if(rc8 /= 8_8*sz) stop 'davidson_nos2_push_results failed to push vt'
! Activate is zmq_socket_push is a REQ
IRP_IF ZMQ_PUSH
IRP_ELSE
character*(2) :: ok
rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
if ((rc /= 2).and.(ok(1:2)/='ok')) then
print *, irp_here, ': f77_zmq_recv( zmq_socket_push, ok, 2, 0)'
stop -1
endif
IRP_ENDIF
end subroutine
subroutine davidson_nos2_push_results_async_send(zmq_socket_push, v_t, imin, imax, task_id,sending)
use f77_zmq
implicit none
BEGIN_DOC
! Push the results of $H | U \rangle$ from a worker to the master.
END_DOC
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push
integer ,intent(in) :: task_id, imin, imax
double precision ,intent(in) :: v_t(N_states_diag,N_det)
logical ,intent(inout) :: sending
integer :: rc, sz
integer*8 :: rc8
if (sending) then
print *, irp_here, ': sending=true'
stop -1
endif
sending = .True.
sz = (imax-imin+1)*N_states_diag
rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE)
if(rc /= 4) stop 'davidson_nos2_push_results failed to push task_id'
rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE)
if(rc /= 4) stop 'davidson_nos2_push_results failed to push imin'
rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE)
if(rc /= 4) stop 'davidson_nos2_push_results failed to push imax'
rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE)
if(rc8 /= 8_8*sz) stop 'davidson_nos2_push_results failed to push vt'
end subroutine
subroutine davidson_nos2_pull_results(zmq_socket_pull, v_t, imin, imax, task_id)
use f77_zmq
implicit none
BEGIN_DOC
! Pull the results of $H | U \rangle$ on the master.
END_DOC
integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull
integer ,intent(out) :: task_id, imin, imax
double precision ,intent(out) :: v_t(N_states_diag,N_det)
integer :: rc, sz
integer*8 :: rc8
rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
if(rc /= 4) stop 'davidson_nos2_pull_results failed to pull task_id'
rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0)
if(rc /= 4) stop 'davidson_nos2_pull_results failed to pull imin'
rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0)
if(rc /= 4) stop 'davidson_nos2_pull_results failed to pull imax'
sz = (imax-imin+1)*N_states_diag
rc8 = f77_zmq_recv8( zmq_socket_pull, v_t(1,imin), 8_8*sz, 0)
if(rc8 /= 8*sz) stop 'davidson_nos2_pull_results failed to pull v_t'
! Activate if zmq_socket_pull is a REP
IRP_IF ZMQ_PUSH
IRP_ELSE
rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
if (rc /= 2) then
print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
stop -1
endif
IRP_ENDIF
end subroutine
subroutine davidson_nos2_collector(zmq_to_qp_run_socket, zmq_socket_pull, v0, sze, N_st)
use f77_zmq
implicit none
BEGIN_DOC
! Routine collecting the results of the workers in Davidson's algorithm.
END_DOC
integer(ZMQ_PTR), intent(in) :: zmq_socket_pull
integer, intent(in) :: sze, N_st
integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket
double precision ,intent(inout) :: v0(sze, N_st)
integer :: more, task_id, imin, imax
double precision, allocatable :: v_t(:,:)
logical :: sending
integer :: i,j
integer, external :: zmq_delete_task_async_send
integer, external :: zmq_delete_task_async_recv
allocate(v_t(N_st,N_det))
v0 = 0.d0
more = 1
sending = .False.
do while (more == 1)
call davidson_nos2_pull_results(zmq_socket_pull, v_t, imin, imax, task_id)
if (zmq_delete_task_async_send(zmq_to_qp_run_socket,task_id,sending) == -1) then
stop 'davidson: Unable to delete task (send)'
endif
do j=1,N_st
do i=imin,imax
v0(i,j) = v0(i,j) + v_t(j,i)
enddo
enddo
if (zmq_delete_task_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then
stop 'davidson: Unable to delete task (recv)'
endif
end do
deallocate(v_t)
end subroutine
subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
use omp_lib
use bitmasks
use f77_zmq
implicit none
BEGIN_DOC
! Computes $v_0 = H | u_0\rangle$
!
! n : number of determinants
!
! H_jj : array of $\langle j | H | j \rangle$
END_DOC
integer, intent(in) :: N_st, sze
double precision, intent(out) :: v_0(sze,N_st)
double precision, intent(inout):: u_0(sze,N_st)
integer :: i,j,k
integer :: ithread
double precision, allocatable :: u_t(:,:)
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique
PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc
PROVIDE ref_bitmask_energy nproc
PROVIDE mpi_initialized
call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson')
! integer :: N_states_diag_save
! N_states_diag_save = N_states_diag
! N_states_diag = N_st
if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then
stop 'Unable to put N_states_diag on ZMQ server'
endif
if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
stop 'Unable to put psi on ZMQ server'
endif
energy = 0.d0
if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then
stop 'Unable to put energy on ZMQ server'
endif
! Create tasks
! ============
integer :: istep, imin, imax, ishift, ipos
integer, external :: add_task_to_taskserver
integer, parameter :: tasksize=20000
character*(100000) :: task
istep=1
ishift=0
imin=1
ipos=1
do imin=1,N_det,tasksize
imax = min(N_det,imin-1+tasksize)
if (imin==1) then
istep = 2
else
istep = 1
endif
do ishift=0,istep-1
write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|'
ipos = ipos+50
if (ipos > 100000-50) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task'
endif
ipos=1
endif
enddo
enddo
if (ipos > 1) then
if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
stop 'Unable to add task'
endif
ipos=1
endif
allocate(u_t(N_st,N_det))
do k=1,N_st
call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
enddo
call dtranspose( &
u_0, &
size(u_0, 1), &
u_t, &
size(u_t, 1), &
N_det, N_st)
ASSERT (N_st == N_states_diag)
ASSERT (sze >= N_det)
integer :: rc, ni, nj
integer*8 :: rc8
double precision :: energy(N_st)
integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag
integer, external :: zmq_put_dmatrix
if (size(u_t) < 8388608) then
ni = size(u_t)
nj = 1
else
ni = 8388608
nj = size(u_t)/8388608 + 1
endif
! Warning : dimensions are modified for efficiency, It is OK since we get the
! full matrix
if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then
stop 'Unable to put u_t on ZMQ server'
endif
deallocate(u_t)
integer, external :: zmq_set_running
if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
print *, irp_here, ': Failed in zmq_set_running'
endif
call omp_set_max_active_levels(4)
!$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
ithread = omp_get_thread_num()
if (ithread == 0 ) then
call davidson_nos2_collector(zmq_to_qp_run_socket, zmq_socket_pull, v_0, N_det, N_st)
else
call davidson_nos2_slave_inproc(1)
endif
!$OMP END PARALLEL
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'davidson')
!$OMP PARALLEL
!$OMP SINGLE
do k=1,N_st
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
!$OMP END TASK
!$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det)
call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
!$OMP END TASK
enddo
!$OMP END SINGLE
!$OMP TASKWAIT
!$OMP END PARALLEL
! N_states_diag = N_states_diag_save
! SOFT_TOUCH N_states_diag
end

View File

@ -0,0 +1,568 @@
subroutine davidson_diag_h(dets_in,u_in,dim_in,energies,sze,N_st,N_st_diag,Nint,dressing_state,converged)
use bitmasks
implicit none
BEGIN_DOC
! Davidson diagonalization.
!
! dets_in : bitmasks corresponding to determinants
!
! u_in : guess coefficients on the various states. Overwritten
! on exit
!
! dim_in : leftmost dimension of u_in
!
! sze : Number of determinants
!
! N_st : Number of eigenstates
!
! Initial guess vectors are not necessarily orthonormal
END_DOC
integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
double precision, intent(inout) :: u_in(dim_in,N_st_diag)
double precision, intent(out) :: energies(N_st_diag)
integer, intent(in) :: dressing_state
logical, intent(out) :: converged
double precision, allocatable :: H_jj(:)
double precision, external :: diag_H_mat_elem, diag_S_mat_elem
integer :: i,k
ASSERT (N_st > 0)
ASSERT (sze > 0)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
PROVIDE mo_two_e_integrals_in_map
allocate(H_jj(sze))
H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED(sze,H_jj, dets_in,Nint) &
!$OMP PRIVATE(i)
!$OMP DO SCHEDULE(static)
do i=2,sze
H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint)
enddo
!$OMP END DO
!$OMP END PARALLEL
if (dressing_state > 0) then
do k=1,N_st
do i=1,sze
H_jj(i) += u_in(i,k) * dressing_column_h(i,k)
enddo
enddo
endif
call davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,dressing_state,converged)
deallocate (H_jj)
end
subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,Nint,dressing_state,converged)
use bitmasks
use mmap_module
implicit none
BEGIN_DOC
! Davidson diagonalization with specific diagonal elements of the H matrix
!
! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
!
! dets_in : bitmasks corresponding to determinants
!
! u_in : guess coefficients on the various states. Overwritten
! on exit
!
! dim_in : leftmost dimension of u_in
!
! sze : Number of determinants
!
! N_st : Number of eigenstates
!
! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
!
! Initial guess vectors are not necessarily orthonormal
END_DOC
integer, intent(in) :: dim_in, sze, N_st, N_st_diag_in, Nint
integer(bit_kind), intent(in) :: dets_in(Nint,2,sze)
double precision, intent(in) :: H_jj(sze)
integer, intent(in) :: dressing_state
double precision, intent(inout) :: u_in(dim_in,N_st_diag_in)
double precision, intent(out) :: energies(N_st_diag_in)
integer :: iter, N_st_diag
integer :: i,j,k,l,m
logical, intent(inout) :: converged
double precision, external :: u_dot_v, u_dot_u
integer :: k_pairs, kl
integer :: iter2, itertot
double precision, allocatable :: y(:,:), h(:,:), lambda(:)
double precision, allocatable :: s_tmp(:,:)
double precision :: diag_h_mat_elem
double precision, allocatable :: residual_norm(:)
character*(16384) :: write_buffer
double precision :: to_print(2,N_st)
double precision :: cpu, wall
integer :: shift, shift2, itermax, istate
double precision :: r1, r2, alpha
logical :: state_ok(N_st_diag_in*davidson_sze_max)
integer :: nproc_target
integer :: order(N_st_diag_in)
double precision :: cmax
double precision, allocatable :: U(:,:), overlap(:,:)
double precision, pointer :: W(:,:)
logical :: disk_based
double precision :: energy_shift(N_st_diag_in*davidson_sze_max)
include 'constants.include.F'
N_st_diag = N_st_diag_in
!DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, S_d, h, lambda
if (N_st_diag*3 > sze) then
print *, 'error in Davidson :'
print *, 'Increase n_det_max_full to ', N_st_diag*3
stop -1
endif
itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1
itertot = 0
if (state_following) then
allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
else
allocate(overlap(1,1)) ! avoid 'if' for deallocate
endif
overlap = 0.d0
PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2
call write_time(6)
write(6,'(A)') ''
write(6,'(A)') 'Davidson Diagonalization'
write(6,'(A)') '------------------------'
write(6,'(A)') ''
! Find max number of cores to fit in memory
! -----------------------------------------
nproc_target = nproc
double precision :: rss
integer :: maxab
maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
m=1
disk_based = .False.
call resident_memory(rss)
do
r1 = 8.d0 * &! bytes
( dble(sze)*(N_st_diag*itermax) &! U
+ 1.0d0*dble(sze*m)*(N_st_diag*itermax) &! W
+ 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp
+ 1.d0*(N_st_diag*itermax) &! lambda
+ 1.d0*(N_st_diag) &! residual_norm
! In H_u_0_nstates_zmq
+ 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector
+ 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave
+ 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_*
+ nproc_target * &! In OMP section
( 1.d0*(N_int*maxab) &! buffer
+ 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx
) / 1024.d0**3
if (nproc_target == 0) then
call check_mem(r1,irp_here)
nproc_target = 1
exit
endif
if (r1+rss < qp_max_mem) then
exit
endif
if (itermax > 4) then
itermax = itermax - 1
else if (m==1.and.disk_based_davidson) then
m=0
disk_based = .True.
itermax = 6
else
nproc_target = nproc_target - 1
endif
enddo
nthreads_davidson = nproc_target
TOUCH nthreads_davidson
call write_int(6,N_st,'Number of states')
call write_int(6,N_st_diag,'Number of states in diagonalization')
call write_int(6,sze,'Number of determinants')
call write_int(6,nproc_target,'Number of threads for diagonalization')
call write_double(6, r1, 'Memory(Gb)')
if (disk_based) then
print *, 'Using swap space to reduce RAM'
endif
!---------------
write(6,'(A)') ''
write_buffer = '====='
do i=1,N_st
write_buffer = trim(write_buffer)//' ================ ==========='
enddo
write(6,'(A)') write_buffer(1:6+41*N_st)
write_buffer = 'Iter'
do i=1,N_st
write_buffer = trim(write_buffer)//' Energy Residual '
enddo
write(6,'(A)') write_buffer(1:6+41*N_st)
write_buffer = '====='
do i=1,N_st
write_buffer = trim(write_buffer)//' ================ ==========='
enddo
write(6,'(A)') write_buffer(1:6+41*N_st)
if (disk_based) then
! Create memory-mapped files for W and S
type(c_ptr) :: ptr_w, ptr_s
integer :: fd_s, fd_w
call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
8, fd_w, .False., ptr_w)
call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/))
else
allocate(W(sze,N_st_diag*itermax))
endif
allocate( &
! Large
U(sze,N_st_diag*itermax), &
! Small
h(N_st_diag*itermax,N_st_diag*itermax), &
y(N_st_diag*itermax,N_st_diag*itermax), &
s_tmp(N_st_diag*itermax,N_st_diag*itermax), &
residual_norm(N_st_diag), &
lambda(N_st_diag*itermax))
h = 0.d0
U = 0.d0
y = 0.d0
s_tmp = 0.d0
ASSERT (N_st > 0)
ASSERT (N_st_diag >= N_st)
ASSERT (sze > 0)
ASSERT (Nint > 0)
ASSERT (Nint == N_int)
! Davidson iterations
! ===================
converged = .False.
do k=N_st+1,N_st_diag
do i=1,sze
call random_number(r1)
call random_number(r2)
r1 = dsqrt(-2.d0*dlog(r1))
r2 = dtwo_pi*r2
u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st)
enddo
u_in(k,k) = u_in(k,k) + 10.d0
enddo
do k=1,N_st_diag
call normalize(u_in(1,k),sze)
enddo
do k=1,N_st_diag
do i=1,sze
U(i,k) = u_in(i,k)
enddo
enddo
do while (.not.converged)
itertot = itertot+1
if (itertot == 8) then
exit
endif
do iter=1,itermax-1
shift = N_st_diag*(iter-1)
shift2 = N_st_diag*iter
if ((iter > 1).or.(itertot == 1)) then
! Compute |W_k> = \sum_i |i><i|H|u_k>
! -----------------------------------
if (disk_based) then
call ortho_qr_unblocked(U,size(U,1),sze,shift2)
call ortho_qr_unblocked(U,size(U,1),sze,shift2)
else
call ortho_qr(U,size(U,1),sze,shift2)
call ortho_qr(U,size(U,1),sze,shift2)
endif
if ((sze > 100000).and.distributed_davidson) then
call H_u_0_nstates_zmq (W(1,shift+1),U(1,shift+1),N_st_diag,sze)
else
call H_u_0_nstates_openmp(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
endif
else
! Already computed in update below
continue
endif
if (dressing_state > 0) then
if (N_st == 1) then
l = dressed_column_idx(1)
double precision :: f
f = 1.0d0/psi_coef(l,1)
do istate=1,N_st_diag
do i=1,sze
W(i,shift+istate) += dressing_column_h(i,1) *f * U(l,shift+istate)
W(l,shift+istate) += dressing_column_h(i,1) *f * U(i,shift+istate)
enddo
enddo
else
call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
psi_coef, size(psi_coef,1), &
U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), &
1.d0, W(1,shift+1), size(W,1))
call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
dressing_column_h, size(dressing_column_h,1), &
U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
1.d0, W(1,shift+1), size(W,1))
endif
endif
! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
! -------------------------------------------
call dgemm('T','N', shift2, shift2, sze, &
1.d0, U, size(U,1), W, size(W,1), &
0.d0, h, size(h,1))
! Diagonalize h
! ---------------
call lapack_diag(lambda,y,h,size(h,1),shift2)
! Compute Energy for each eigenvector
! -----------------------------------
call dgemm('N','N',shift2,shift2,shift2, &
1.d0, h, size(h,1), y, size(y,1), &
0.d0, s_tmp, size(s_tmp,1))
call dgemm('T','N',shift2,shift2,shift2, &
1.d0, y, size(y,1), s_tmp, size(s_tmp,1), &
0.d0, h, size(h,1))
do k=1,shift2
lambda(k) = h(k,k)
enddo
if (state_following) then
overlap = -1.d0
do k=1,shift2
do i=1,shift2
overlap(k,i) = dabs(y(k,i))
enddo
enddo
do k=1,N_st
cmax = -1.d0
do i=1,N_st
if (overlap(i,k) > cmax) then
cmax = overlap(i,k)
order(k) = i
endif
enddo
do i=1,N_st_diag
overlap(order(k),i) = -1.d0
enddo
enddo
overlap = y
do k=1,N_st
l = order(k)
if (k /= l) then
y(1:shift2,k) = overlap(1:shift2,l)
endif
enddo
do k=1,N_st
overlap(k,1) = lambda(k)
enddo
endif
! Express eigenvectors of h in the determinant basis
! --------------------------------------------------
call dgemm('N','N', sze, N_st_diag, shift2, &
1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
call dgemm('N','N', sze, N_st_diag, shift2, &
1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
! Compute residual vector and davidson step
! -----------------------------------------
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
do k=1,N_st_diag
do i=1,sze
U(i,shift2+k) = &
(lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) &
/max(H_jj(i) - lambda (k),1.d-2)
enddo
if (k <= N_st) then
residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
to_print(1,k) = lambda(k) + nuclear_repulsion
to_print(2,k) = residual_norm(k)
endif
enddo
!$OMP END PARALLEL DO
if ((itertot>1).and.(iter == 1)) then
!don't print
continue
else
write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
endif
! Check convergence
if (iter > 1) then
if (threshold_davidson_from_pt2) then
converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
else
converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson
endif
endif
do k=1,N_st
if (residual_norm(k) > 1.e8) then
print *, 'Davidson failed'
stop -1
endif
enddo
if (converged) then
exit
endif
logical, external :: qp_stop
if (qp_stop()) then
converged = .True.
exit
endif
enddo
! Re-contract U and update W
! --------------------------------
call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, &
W, size(W,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
do k=1,N_st_diag
do i=1,sze
W(i,k) = u_in(i,k)
enddo
enddo
call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, &
U, size(U,1), y, size(y,1), 0.d0, u_in, size