diff --git a/src/cipsi/NEED b/src/cipsi/NEED index c9dc92c0..6c14b4b6 100644 --- a/src/cipsi/NEED +++ b/src/cipsi/NEED @@ -4,3 +4,4 @@ mpi davidson_undressed iterations two_body_rdm +csf diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index ea2345b3..1bbaffc4 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -700,7 +700,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d endif enddo - do_diag = sum(dabs(coef)) > 0.001d0 + do_diag = sum(dabs(coef)) > 0.001d0 .and. N_states > 1 double precision :: eigvalues(N_states+1) double precision :: work(1+6*(N_states+1)+2*(N_states+1)**2) diff --git a/src/csf/NEED b/src/csf/NEED new file mode 100644 index 00000000..d3d4d2c7 --- /dev/null +++ b/src/csf/NEED @@ -0,0 +1 @@ +determinants diff --git a/src/determinants/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f similarity index 100% rename from src/determinants/configuration_CI_sigma_helpers.irp.f rename to src/csf/configuration_CI_sigma_helpers.irp.f diff --git a/src/determinants/configuration_CI_sigma_helpers.org b/src/csf/configuration_CI_sigma_helpers.org similarity index 100% rename from src/determinants/configuration_CI_sigma_helpers.org rename to src/csf/configuration_CI_sigma_helpers.org diff --git a/src/determinants/configurations.irp.f b/src/csf/configurations.irp.f similarity index 100% rename from src/determinants/configurations.irp.f rename to src/csf/configurations.irp.f diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f new file mode 100644 index 00000000..c1560148 --- /dev/null +++ b/src/csf/create_excitations.irp.f @@ -0,0 +1,279 @@ +subroutine do_single_excitation_cfg(key_in,key_out,i_hole,i_particle,ok) + use bitmasks + implicit none + BEGIN_DOC + ! Applies the single excitation operator to a configuration + ! If the excitation is possible, ok is True + END_DOC + integer, intent(in) :: i_hole,i_particle + integer(bit_kind), intent(in) :: key_in(N_int,2) + logical , intent(out) :: ok + integer :: k,j,i + integer(bit_kind) :: mask + integer(bit_kind) :: key_out(N_int,2) + + ASSERT (i_hole > 0) + ASSERT (i_particle <= mo_num) + + ok = .True. + key_out(:,:) = key_in(:,:) + + ! hole + k = shiftr(i_hole-1,bit_kind_shift)+1 + j = i_hole-shiftl(k-1,bit_kind_shift)-1 + mask = ibset(0_bit_kind,j) + + ! Check if the position j is singly occupied + ! 1 -> 0 (SOMO) + ! 0 0 (DOMO) + if (iand(key_out(k,1),mask) /= 0_bit_kind) then + key_out(k,1) = ibclr(key_out(k,1),j) + + ! Check if the position j is doubly occupied + ! 0 -> 1 (SOMO) + ! 1 0 (DOMO) + else if (iand(key_out(k,2),mask) /= 0_bit_kind) then + key_out(k,1) = ibset(key_out(k,1),j) + key_out(k,2) = ibclr(key_out(k,2),j) + + ! The position j is unoccupied: Not OK + ! 0 -> 0 (SOMO) + ! 0 0 (DOMO) + else + ok =.False. + return + endif + + + ! particle + k = shiftr(i_particle-1,bit_kind_shift)+1 + j = i_particle-shiftl(k-1,bit_kind_shift)-1 + mask = ibset(0_bit_kind,j) + + ! Check if the position j is singly occupied + ! 1 -> 0 (SOMO) + ! 0 1 (DOMO) + if (iand(key_out(k,1),mask) /= 0_bit_kind) then + key_out(k,1) = ibclr(key_out(k,1),j) + key_out(k,2) = ibset(key_out(k,2),j) + + ! Check if the position j is doubly occupied : Not OK + ! 0 -> 1 (SOMO) + ! 1 0 (DOMO) + else if (iand(key_out(k,2),mask) /= 0_bit_kind) then + ok = .False. + return + + ! Position at j is unoccupied + ! 0 -> 0 (SOMO) + ! 0 0 (DOMO) + else + key_out(k,1) = ibset(key_out(k,1),j) + endif + +end + +subroutine do_single_excitation_cfg_with_type(key_in,key_out,i_hole,i_particle,ex_type,ok) + use bitmasks + implicit none + BEGIN_DOC + ! Applies the single excitation operator to a configuration + ! Returns the type of excitation in ex_type + ! where the following convention is used + ! 1 = (SOMO -> SOMO) 1 change in Nsomo + ! 2 = (DOMO -> VMO) 1 change in Nsomo + ! 3 = (SOMO -> VMO) 0 change in Nsomo + ! 4 = (DOMO -> SOMO) 0 change in Nsomo + ! If the excitation is possible, ok is True + END_DOC + integer, intent(in) :: i_hole,i_particle + integer(bit_kind), intent(in) :: key_in(N_int,2) + integer , intent(out) :: ex_type + logical , intent(out) :: ok + integer :: k,j,i + integer(bit_kind) :: mask + integer(bit_kind) :: key_out(N_int,2) + logical :: isholeSOMO + logical :: isparticleSOMO + logical :: isholeDOMO + logical :: isparticleVMO + isholeSOMO = .False. + isholeDOMO = .False. + isparticleSOMO = .False. + isparticleVMO = .False. + + ASSERT (i_hole > 0) + ASSERT (i_particle <= mo_num) + + ok = .True. + key_out(:,:) = key_in(:,:) + + ! hole + k = shiftr(i_hole-1,bit_kind_shift)+1 + j = i_hole-shiftl(k-1,bit_kind_shift)-1 + mask = ibset(0_bit_kind,j) + + ! Check if the position j is singly occupied + ! 1 -> 0 (SOMO) + ! 0 0 (DOMO) + if (iand(key_out(k,1),mask) /= 0_bit_kind) then + key_out(k,1) = ibclr(key_out(k,1),j) + isholeSOMO = .True. + + ! Check if the position j is doubly occupied + ! 0 -> 1 (SOMO) + ! 1 0 (DOMO) + else if (iand(key_out(k,2),mask) /= 0_bit_kind) then + key_out(k,1) = ibset(key_out(k,1),j) + key_out(k,2) = ibclr(key_out(k,2),j) + isholeDOMO = .True. + + ! The position j is unoccupied: Not OK + ! 0 -> 0 (SOMO) + ! 0 0 (DOMO) + else + ok =.False. + return + endif + + + ! particle + k = shiftr(i_particle-1,bit_kind_shift)+1 + j = i_particle-shiftl(k-1,bit_kind_shift)-1 + mask = ibset(0_bit_kind,j) + + ! Check if the position j is singly occupied + ! 1 -> 0 (SOMO) + ! 0 1 (DOMO) + if (iand(key_out(k,1),mask) /= 0_bit_kind) then + key_out(k,1) = ibclr(key_out(k,1),j) + key_out(k,2) = ibset(key_out(k,2),j) + isparticleSOMO = .True. + + ! Check if the position j is doubly occupied : Not OK + ! 0 -> 1 (SOMO) + ! 1 0 (DOMO) + else if (iand(key_out(k,2),mask) /= 0_bit_kind) then + ok = .False. + return + + ! Position at j is unoccupied + ! 0 -> 0 (SOMO) + ! 0 0 (DOMO) + else + key_out(k,1) = ibset(key_out(k,1),j) + isparticleVMO = .True. + endif + + if(isholeSOMO) then + ! two possibilities + ! particle is SOMO or VMO + if(isparticleSOMO) then + ! SOMO -> SOMO + ex_type = 1 + else + ! SOMO -> VMO + ex_type = 3 + endif + else + ! two possibilities + ! particle is SOMO or VMO + if(isparticleSOMO) then + ! DOMO -> SOMO + ex_type = 4 + else + ! DOMO -> VMO + ex_type = 2 + endif + endif + +end + +subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint) + implicit none + use bitmasks + BEGIN_DOC + ! Generate all single excitation wrt a configuration + ! + ! n_singles : on input, max number of singles : + ! elec_alpha_num * (mo_num - elec_beta_num) + ! on output, number of generated singles + END_DOC + integer, intent(in) :: Nint + integer, intent(inout) :: n_singles + integer(bit_kind), intent(in) :: cfg(Nint,2) + integer(bit_kind), intent(out) :: singles(Nint,2,*) + + integer :: i,k, n_singles_ma, i_hole, i_particle + integer(bit_kind) :: single(Nint,2) + logical :: i_ok + + n_singles = 0 + !TODO + !Make list of Somo and Domo for holes + !Make list of Unocc and Somo for particles + do i_hole = 1, mo_num + do i_particle = 1, mo_num + call do_single_excitation_cfg(cfg,single,i_hole,i_particle,i_ok) + if (i_ok) then + n_singles = n_singles + 1 + do k=1,Nint + singles(k,1,n_singles) = single(k,1) + singles(k,2,n_singles) = single(k,2) + enddo + endif + enddo + enddo +end + +subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) + implicit none + use bitmasks + BEGIN_DOC + ! Generate all single excitation wrt a configuration + ! + ! n_singles : on input, max number of singles : + ! elec_alpha_num * (mo_num - elec_beta_num) + ! on output, number of generated singles + ! ex_type_singles : on output contains type of excitations : + ! + END_DOC + integer, intent(in) :: Nint + integer, intent(inout) :: n_singles + integer, intent(out) :: idxs_singles(*) + integer, intent(out) :: ex_type_singles(*) + integer, intent(out) :: pq_singles(2,*) + integer(bit_kind), intent(in) :: cfgInp(Nint,2) + integer(bit_kind), intent(out) :: singles(Nint,2,*) + integer(bit_kind) :: Jdet(Nint,2) + + integer :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg + integer(bit_kind) :: single(Nint,2) + logical :: i_ok + + n_singles = 0 + !TODO + !Make list of Somo and Domo for holes + !Make list of Unocc and Somo for particles + do i_hole = 1+n_core_orb, n_core_orb + n_act_orb + do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + if(i_hole .EQ. i_particle) cycle + addcfg = -1 + call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok) + if (i_ok) then + call binary_search_cfg(single,addcfg) + if(addcfg .EQ. -1) cycle + n_singles = n_singles + 1 + do k=1,Nint + singles(k,1,n_singles) = single(k,1) + singles(k,2,n_singles) = single(k,2) + ex_type_singles(n_singles) = ex_type + pq_singles(1,n_singles) = i_hole ! p + pq_singles(2,n_singles) = i_particle ! q + idxs_singles(n_singles) = addcfg + enddo + endif + enddo + enddo +end + diff --git a/src/davidson/NEED b/src/davidson/NEED index d3d4d2c7..bfe31bd0 100644 --- a/src/davidson/NEED +++ b/src/davidson/NEED @@ -1 +1 @@ -determinants +csf diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index 32f89979..ac56fad7 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -329,6 +329,7 @@ end subroutine + subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull, v0, s0, sze, N_st) use f77_zmq implicit none @@ -377,7 +378,6 @@ end subroutine - subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) use omp_lib use bitmasks @@ -538,6 +538,10 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze) end + + + + BEGIN_PROVIDER [ integer, nthreads_davidson ] implicit none BEGIN_DOC diff --git a/src/davidson/davidson_parallel_nos2.irp.f b/src/davidson/davidson_parallel_nos2.irp.f new file mode 100644 index 00000000..c332281a --- /dev/null +++ b/src/davidson/davidson_parallel_nos2.irp.f @@ -0,0 +1,495 @@ +use bitmasks +use f77_zmq + + +subroutine davidson_nos2_slave_inproc(i) + implicit none + integer, intent(in) :: i + + call davidson_nos2_run_slave(1,i) +end + + +subroutine davidson_nos2_slave_tcp(i) + implicit none + integer, intent(in) :: i + call davidson_nos2_run_slave(0,i) +end + + + +subroutine davidson_nos2_run_slave(thread,iproc) + use f77_zmq + implicit none + BEGIN_DOC +! Slave routine for Davidson's diagonalization. + END_DOC + + integer, intent(in) :: thread, iproc + + integer :: worker_id, task_id, blockb + integer(ZMQ_PTR),external :: new_zmq_to_qp_run_socket + integer(ZMQ_PTR) :: zmq_to_qp_run_socket + + integer(ZMQ_PTR), external :: new_zmq_push_socket + integer(ZMQ_PTR) :: zmq_socket_push + + integer, external :: connect_to_taskserver + integer :: doexit, send, receive + + zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() + + doexit = 0 + if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then + doexit=1 + endif + IRP_IF MPI + include 'mpif.h' + integer :: ierr + send = doexit + call MPI_AllReduce(send, receive, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + doexit=1 + endif + doexit = receive + IRP_ENDIF + if (doexit>0) then + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + return + endif + + zmq_socket_push = new_zmq_push_socket(thread) + + call davidson_nos2_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_states_diag, N_det, worker_id) + + integer, external :: disconnect_from_taskserver + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + call sleep(1) + if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then + print *, irp_here, ': disconnect failed' + continue + endif + endif + + call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) + call end_zmq_push_socket(zmq_socket_push) + +end subroutine + + + +subroutine davidson_nos2_slave_work(zmq_to_qp_run_socket, zmq_socket_push, N_st, sze, worker_id) + use f77_zmq + implicit none + + integer(ZMQ_PTR),intent(in) :: zmq_to_qp_run_socket + integer(ZMQ_PTR),intent(in) :: zmq_socket_push + integer,intent(in) :: worker_id, N_st, sze + integer :: task_id + character*(512) :: msg + integer :: imin, imax, ishift, istep + + integer, allocatable :: psi_det_read(:,:,:) + double precision, allocatable :: v_t(:,:), u_t(:,:) + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t, v_t + + ! Get wave function (u_t) + ! ----------------------- + + integer :: rc, ni, nj + integer*8 :: rc8 + integer :: N_states_read, N_det_read, psi_det_size_read + integer :: N_det_selectors_read, N_det_generators_read + + integer, external :: zmq_get_dvector + integer, external :: zmq_get_dmatrix + + PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique + PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc + PROVIDE ref_bitmask_energy nproc + PROVIDE mpi_initialized + + allocate(u_t(N_st,N_det)) + + ! Warning : dimensions are modified for efficiency, It is OK since we get the + ! full matrix + if (size(u_t,kind=8) < 8388608_8) then + ni = size(u_t) + nj = 1 + else + ni = 8388608 + nj = int(size(u_t,kind=8)/8388608_8,4) + 1 + endif + + do while (zmq_get_dmatrix(zmq_to_qp_run_socket, worker_id, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) + print *, 'mpi_rank, N_states_diag, N_det' + print *, mpi_rank, N_states_diag, N_det + stop 'u_t' + enddo + + IRP_IF MPI + include 'mpif.h' + integer :: ierr + + call broadcast_chunks_double(u_t,size(u_t,kind=8)) + + IRP_ENDIF + + ! Run tasks + ! --------- + + logical :: sending + sending=.False. + + allocate(v_t(N_st,N_det)) + do + integer, external :: get_task_from_taskserver + integer, external :: task_done_to_taskserver + if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, msg) == -1) then + exit + endif + if(task_id == 0) exit + read (msg,*) imin, imax, ishift, istep + integer :: k + do k=imin,imax + v_t(:,k) = 0.d0 + enddo + call H_u_0_nstates_openmp_work(v_t,u_t,N_st,N_det,imin,imax,ishift,istep) + if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then + print *, irp_here, 'Unable to send task_done' + endif + call davidson_push_results_async_recv(zmq_socket_push, sending) + call davidson_nos2_push_results_async_send(zmq_socket_push, v_t, imin, imax, task_id, sending) + end do + deallocate(u_t,v_t) + call davidson_push_results_async_recv(zmq_socket_push, sending) + +end subroutine + + + +subroutine davidson_nos2_push_results(zmq_socket_push, v_t, imin, imax, task_id) + use f77_zmq + implicit none + BEGIN_DOC +! Push the results of $H | U \rangle$ from a worker to the master. + END_DOC + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push + integer ,intent(in) :: task_id, imin, imax + double precision ,intent(in) :: v_t(N_states_diag,N_det) + integer :: rc, sz + integer*8 :: rc8 + + sz = (imax-imin+1)*N_states_diag + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_nos2_push_results failed to push task_id' + + rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_nos2_push_results failed to push imin' + + rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_nos2_push_results failed to push imax' + + rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE) + if(rc8 /= 8_8*sz) stop 'davidson_nos2_push_results failed to push vt' + +! Activate is zmq_socket_push is a REQ +IRP_IF ZMQ_PUSH +IRP_ELSE + character*(2) :: ok + rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0) + if ((rc /= 2).and.(ok(1:2)/='ok')) then + print *, irp_here, ': f77_zmq_recv( zmq_socket_push, ok, 2, 0)' + stop -1 + endif +IRP_ENDIF + +end subroutine + +subroutine davidson_nos2_push_results_async_send(zmq_socket_push, v_t, imin, imax, task_id,sending) + use f77_zmq + implicit none + BEGIN_DOC +! Push the results of $H | U \rangle$ from a worker to the master. + END_DOC + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_push + integer ,intent(in) :: task_id, imin, imax + double precision ,intent(in) :: v_t(N_states_diag,N_det) + logical ,intent(inout) :: sending + integer :: rc, sz + integer*8 :: rc8 + + if (sending) then + print *, irp_here, ': sending=true' + stop -1 + endif + sending = .True. + + sz = (imax-imin+1)*N_states_diag + + rc = f77_zmq_send( zmq_socket_push, task_id, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_nos2_push_results failed to push task_id' + + rc = f77_zmq_send( zmq_socket_push, imin, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_nos2_push_results failed to push imin' + + rc = f77_zmq_send( zmq_socket_push, imax, 4, ZMQ_SNDMORE) + if(rc /= 4) stop 'davidson_nos2_push_results failed to push imax' + + rc8 = f77_zmq_send8( zmq_socket_push, v_t(1,imin), 8_8*sz, ZMQ_SNDMORE) + if(rc8 /= 8_8*sz) stop 'davidson_nos2_push_results failed to push vt' + + +end subroutine + + + + +subroutine davidson_nos2_pull_results(zmq_socket_pull, v_t, imin, imax, task_id) + use f77_zmq + implicit none + BEGIN_DOC +! Pull the results of $H | U \rangle$ on the master. + END_DOC + + integer(ZMQ_PTR) ,intent(in) :: zmq_socket_pull + integer ,intent(out) :: task_id, imin, imax + double precision ,intent(out) :: v_t(N_states_diag,N_det) + + integer :: rc, sz + integer*8 :: rc8 + + rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) + if(rc /= 4) stop 'davidson_nos2_pull_results failed to pull task_id' + + rc = f77_zmq_recv( zmq_socket_pull, imin, 4, 0) + if(rc /= 4) stop 'davidson_nos2_pull_results failed to pull imin' + + rc = f77_zmq_recv( zmq_socket_pull, imax, 4, 0) + if(rc /= 4) stop 'davidson_nos2_pull_results failed to pull imax' + + sz = (imax-imin+1)*N_states_diag + + rc8 = f77_zmq_recv8( zmq_socket_pull, v_t(1,imin), 8_8*sz, 0) + if(rc8 /= 8*sz) stop 'davidson_nos2_pull_results failed to pull v_t' + +! Activate if zmq_socket_pull is a REP +IRP_IF ZMQ_PUSH +IRP_ELSE + rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0) + if (rc /= 2) then + print *, irp_here, ' : f77_zmq_send (zmq_socket_pull,...' + stop -1 + endif +IRP_ENDIF + +end subroutine + + + + +subroutine davidson_nos2_collector(zmq_to_qp_run_socket, zmq_socket_pull, v0, sze, N_st) + use f77_zmq + implicit none + BEGIN_DOC +! Routine collecting the results of the workers in Davidson's algorithm. + END_DOC + + integer(ZMQ_PTR), intent(in) :: zmq_socket_pull + integer, intent(in) :: sze, N_st + integer(ZMQ_PTR), intent(in) :: zmq_to_qp_run_socket + + double precision ,intent(inout) :: v0(sze, N_st) + + integer :: more, task_id, imin, imax + + double precision, allocatable :: v_t(:,:) + logical :: sending + integer :: i,j + integer, external :: zmq_delete_task_async_send + integer, external :: zmq_delete_task_async_recv + + allocate(v_t(N_st,N_det)) + v0 = 0.d0 + more = 1 + sending = .False. + do while (more == 1) + call davidson_nos2_pull_results(zmq_socket_pull, v_t, imin, imax, task_id) + if (zmq_delete_task_async_send(zmq_to_qp_run_socket,task_id,sending) == -1) then + stop 'davidson: Unable to delete task (send)' + endif + do j=1,N_st + do i=imin,imax + v0(i,j) = v0(i,j) + v_t(j,i) + enddo + enddo + if (zmq_delete_task_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then + stop 'davidson: Unable to delete task (recv)' + endif + end do + deallocate(v_t) + +end subroutine + + + + + +subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze) + use omp_lib + use bitmasks + use f77_zmq + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$ + ! + ! n : number of determinants + ! + ! H_jj : array of $\langle j | H | j \rangle$ + END_DOC + integer, intent(in) :: N_st, sze + double precision, intent(out) :: v_0(sze,N_st) + double precision, intent(inout):: u_0(sze,N_st) + integer :: i,j,k + integer :: ithread + double precision, allocatable :: u_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull + PROVIDE psi_det_beta_unique psi_bilinear_matrix_order_transp_reverse psi_det_alpha_unique + PROVIDE psi_bilinear_matrix_transp_values psi_bilinear_matrix_values psi_bilinear_matrix_columns_loc + PROVIDE ref_bitmask_energy nproc + PROVIDE mpi_initialized + + call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'davidson') + +! integer :: N_states_diag_save +! N_states_diag_save = N_states_diag +! N_states_diag = N_st + if (zmq_put_N_states_diag(zmq_to_qp_run_socket, 1) == -1) then + stop 'Unable to put N_states_diag on ZMQ server' + endif + + if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then + stop 'Unable to put psi on ZMQ server' + endif + energy = 0.d0 + if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',energy,size(energy)) == -1) then + stop 'Unable to put energy on ZMQ server' + endif + + + ! Create tasks + ! ============ + + integer :: istep, imin, imax, ishift, ipos + integer, external :: add_task_to_taskserver + integer, parameter :: tasksize=20000 + character*(100000) :: task + istep=1 + ishift=0 + imin=1 + + + ipos=1 + do imin=1,N_det,tasksize + imax = min(N_det,imin-1+tasksize) + if (imin==1) then + istep = 2 + else + istep = 1 + endif + do ishift=0,istep-1 + write(task(ipos:ipos+50),'(4(I11,1X),1X,1A)') imin, imax, ishift, istep, '|' + ipos = ipos+50 + if (ipos > 100000-50) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task' + endif + ipos=1 + endif + enddo + enddo + + if (ipos > 1) then + if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then + stop 'Unable to add task' + endif + ipos=1 + endif + + allocate(u_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + + ASSERT (N_st == N_states_diag) + ASSERT (sze >= N_det) + + integer :: rc, ni, nj + integer*8 :: rc8 + double precision :: energy(N_st) + + integer, external :: zmq_put_dvector, zmq_put_psi, zmq_put_N_states_diag + integer, external :: zmq_put_dmatrix + + if (size(u_t) < 8388608) then + ni = size(u_t) + nj = 1 + else + ni = 8388608 + nj = size(u_t)/8388608 + 1 + endif + ! Warning : dimensions are modified for efficiency, It is OK since we get the + ! full matrix + if (zmq_put_dmatrix(zmq_to_qp_run_socket, 1, 'u_t', u_t, ni, nj, size(u_t,kind=8)) == -1) then + stop 'Unable to put u_t on ZMQ server' + endif + + deallocate(u_t) + + integer, external :: zmq_set_running + if (zmq_set_running(zmq_to_qp_run_socket) == -1) then + print *, irp_here, ': Failed in zmq_set_running' + endif + + call omp_set_max_active_levels(4) + !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread) + ithread = omp_get_thread_num() + if (ithread == 0 ) then + call davidson_nos2_collector(zmq_to_qp_run_socket, zmq_socket_pull, v_0, N_det, N_st) + else + call davidson_nos2_slave_inproc(1) + endif + !$OMP END PARALLEL + call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'davidson') + + !$OMP PARALLEL + !$OMP SINGLE + do k=1,N_st + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det) + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + !$OMP END TASK + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(k,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + !$OMP END TASK + enddo + !$OMP END SINGLE + !$OMP TASKWAIT + !$OMP END PARALLEL + +! N_states_diag = N_states_diag_save +! SOFT_TOUCH N_states_diag +end + diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f new file mode 100644 index 00000000..ebecf623 --- /dev/null +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -0,0 +1,568 @@ +subroutine davidson_diag_h(dets_in,u_in,dim_in,energies,sze,N_st,N_st_diag,Nint,dressing_state,converged) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + integer, intent(in) :: dressing_state + logical, intent(out) :: converged + double precision, allocatable :: H_jj(:) + + double precision, external :: diag_H_mat_elem, diag_S_mat_elem + integer :: i,k + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_two_e_integrals_in_map + allocate(H_jj(sze)) + + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze,H_jj, dets_in,Nint) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(static) + do i=2,sze + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + if (dressing_state > 0) then + do k=1,N_st + do i=1,sze + H_jj(i) += u_in(i,k) * dressing_column_h(i,k) + enddo + enddo + endif + + call davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,dressing_state,converged) + deallocate (H_jj) +end + + +subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,Nint,dressing_state,converged) + use bitmasks + use mmap_module + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze + ! + ! Initial guess vectors are not necessarily orthonormal + END_DOC + integer, intent(in) :: dim_in, sze, N_st, N_st_diag_in, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + integer, intent(in) :: dressing_state + double precision, intent(inout) :: u_in(dim_in,N_st_diag_in) + double precision, intent(out) :: energies(N_st_diag_in) + + integer :: iter, N_st_diag + integer :: i,j,k,l,m + logical, intent(inout) :: converged + + double precision, external :: u_dot_v, u_dot_u + + integer :: k_pairs, kl + + integer :: iter2, itertot + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(2,N_st) + double precision :: cpu, wall + integer :: shift, shift2, itermax, istate + double precision :: r1, r2, alpha + logical :: state_ok(N_st_diag_in*davidson_sze_max) + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: cmax + double precision, allocatable :: U(:,:), overlap(:,:) + double precision, pointer :: W(:,:) + logical :: disk_based + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + + include 'constants.include.F' + + N_st_diag = N_st_diag_in + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, S_d, h, lambda + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1 + itertot = 0 + + if (state_following) then + allocate(overlap(N_st_diag*itermax, N_st_diag*itermax)) + else + allocate(overlap(1,1)) ! avoid 'if' for deallocate + endif + overlap = 0.d0 + + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2 + + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.0d0*dble(sze*m)*(N_st_diag*itermax) &! W + + 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp + + 1.d0*(N_st_diag*itermax) &! lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_u_0_nstates_zmq + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave + + 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(r1,irp_here) + nproc_target = 1 + exit + endif + + if (r1+rss < qp_max_mem) then + exit + endif + + if (itermax > 4) then + itermax = itermax - 1 + else if (m==1.and.disk_based_davidson) then + m=0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + call write_int(6,N_st,'Number of states') + call write_int(6,N_st_diag,'Number of states in diagonalization') + call write_int(6,sze,'Number of determinants') + call write_int(6,nproc_target,'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if (disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + if (disk_based) then + ! Create memory-mapped files for W and S + type(c_ptr) :: ptr_w, ptr_s + integer :: fd_s, fd_w + call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& + 8, fd_w, .False., ptr_w) + call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) + else + allocate(W(sze,N_st_diag*itermax)) + endif + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + residual_norm(N_st_diag), & + lambda(N_st_diag*itermax)) + + h = 0.d0 + U = 0.d0 + y = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + + do k=N_st+1,N_st_diag + do i=1,sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) + enddo + u_in(k,k) = u_in(k,k) + 10.d0 + enddo + do k=1,N_st_diag + call normalize(u_in(1,k),sze) + enddo + + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + + + do while (.not.converged) + itertot = itertot+1 + if (itertot == 8) then + exit + endif + + do iter=1,itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + + if ((iter > 1).or.(itertot == 1)) then + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- + + if (disk_based) then + call ortho_qr_unblocked(U,size(U,1),sze,shift2) + call ortho_qr_unblocked(U,size(U,1),sze,shift2) + else + call ortho_qr(U,size(U,1),sze,shift2) + call ortho_qr(U,size(U,1),sze,shift2) + endif + + if ((sze > 100000).and.distributed_davidson) then + call H_u_0_nstates_zmq (W(1,shift+1),U(1,shift+1),N_st_diag,sze) + else + call H_u_0_nstates_openmp(W(1,shift+1),U(1,shift+1),N_st_diag,sze) + endif + else + ! Already computed in update below + continue + endif + + if (dressing_state > 0) then + + if (N_st == 1) then + + l = dressed_column_idx(1) + double precision :: f + f = 1.0d0/psi_coef(l,1) + do istate=1,N_st_diag + do i=1,sze + W(i,shift+istate) += dressing_column_h(i,1) *f * U(l,shift+istate) + W(l,shift+istate) += dressing_column_h(i,1) *f * U(i,shift+istate) + enddo + + enddo + + else + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + psi_coef, size(psi_coef,1), & + U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,shift+1), size(W,1)) + + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + dressing_column_h, size(dressing_column_h,1), & + U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,shift+1), size(W,1)) + + endif + endif + + ! Compute h_kl = = + ! ------------------------------------------- + + call dgemm('T','N', shift2, shift2, sze, & + 1.d0, U, size(U,1), W, size(W,1), & + 0.d0, h, size(h,1)) + + ! Diagonalize h + ! --------------- + + call lapack_diag(lambda,y,h,size(h,1),shift2) + + ! Compute Energy for each eigenvector + ! ----------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, h, size(h,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, h, size(h,1)) + + do k=1,shift2 + lambda(k) = h(k,k) + enddo + + if (state_following) then + + overlap = -1.d0 + do k=1,shift2 + do i=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + enddo + + endif + + + ! Express eigenvectors of h in the determinant basis + ! -------------------------------------------------- + + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + call dgemm('N','N', sze, N_st_diag, shift2, & + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,shift2+k) = & + (lambda(k) * U(i,shift2+k) - W(i,shift2+k) ) & + /max(H_jj(i) - lambda (k),1.d-2) + enddo + + if (k <= N_st) then + residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + endif + enddo + !$OMP END PARALLEL DO + + + if ((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if (iter > 1) then + if (threshold_davidson_from_pt2) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2 + else + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson + endif + endif + + do k=1,N_st + if (residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if (converged) then + exit + endif + + logical, external :: qp_stop + if (qp_stop()) then + converged = .True. + exit + endif + + + enddo + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & + W, size(W,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm('N','N', sze, N_st_diag, shift2, 1.d0, & + U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + + do k=1,N_st_diag + do i=1,sze + U(i,k) = u_in(i,k) + enddo + enddo + + if (disk_based) then + call ortho_qr_unblocked(U,size(U,1),sze,N_st_diag) + call ortho_qr_unblocked(U,size(U,1),sze,N_st_diag) + else + call ortho_qr(U,size(U,1),sze,N_st_diag) + call ortho_qr(U,size(U,1),sze,N_st_diag) + endif + + ! Adjust the phase + do j=1,N_st_diag + ! Find first non-zero + k=1 + do while ((k N_states_diag_save) then N_states_diag = N_states_diag_save diff --git a/src/davidson/u0_h_u0.irp.f b/src/davidson/u0_h_u0.irp.f index bc6acdb9..3f5113db 100644 --- a/src/davidson/u0_h_u0.irp.f +++ b/src/davidson/u0_h_u0.irp.f @@ -1,72 +1,43 @@ - BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] -&BEGIN_PROVIDER [ double precision, psi_s2, (N_states) ] - implicit none - BEGIN_DOC -! psi_energy(i) = $\langle \Psi_i | H | \Psi_i \rangle$ -! -! psi_s2(i) = $\langle \Psi_i | S^2 | \Psi_i \rangle$ - END_DOC - call u_0_H_u_0(psi_energy,psi_s2,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size) - integer :: i - do i=N_det+1,N_states - psi_energy(i) = 0.d0 - psi_s2(i) = 0.d0 - enddo -END_PROVIDER - -BEGIN_PROVIDER [ double precision, psi_energy_with_nucl_rep, (N_states) ] - implicit none - BEGIN_DOC - ! Energy of the wave function with the nuclear repulsion energy. - END_DOC - psi_energy_with_nucl_rep(1:N_states) = psi_energy(1:N_states) + nuclear_repulsion -END_PROVIDER - - -subroutine u_0_H_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze) +subroutine u_0_H_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze) use bitmasks implicit none BEGIN_DOC ! Computes $E_0 = \frac{\langle u_0 | H | u_0 \rangle}{\langle u_0 | u_0 \rangle}$ ! - ! and $S_0 = \frac{\langle u_0 | S^2 | u_0 \rangle}{\langle u_0 | u_0 \rangle}$ - ! ! n : number of determinants ! END_DOC integer, intent(in) :: n,Nint, N_st, sze - double precision, intent(out) :: e_0(N_st),s_0(N_st) + double precision, intent(out) :: e_0(N_st) double precision, intent(inout) :: u_0(sze,N_st) integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) - double precision, allocatable :: v_0(:,:), s_vec(:,:), u_1(:,:) + double precision, allocatable :: v_0(:,:), u_1(:,:) double precision :: u_dot_u,u_dot_v,diag_H_mat_elem integer :: i,j, istate if ((n > 100000).and.distributed_davidson) then - allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag)) + allocate (v_0(n,N_states_diag), u_1(n,N_states_diag)) u_1(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) - call H_S2_u_0_nstates_zmq(v_0,s_vec,u_1,N_states_diag,n) + call H_u_0_nstates_zmq(v_0,u_1,N_states_diag,n) else if (n < n_det_max_full) then - allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st)) + allocate (v_0(n,N_st), u_1(n,N_st)) v_0(:,:) = 0.d0 u_1(:,:) = 0.d0 - s_vec(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) do istate = 1,N_st do j=1,n do i=1,n v_0(i,istate) = v_0(i,istate) + h_matrix_all_dets(i,j) * u_0(j,istate) - s_vec(i,istate) = s_vec(i,istate) + S2_matrix_all_dets(i,j) * u_0(j,istate) enddo enddo enddo else - allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st)) + allocate (v_0(n,N_st),u_1(n,N_st)) u_1(:,:) = 0.d0 u_1(1:n,1:N_st) = u_0(1:n,1:N_st) - call H_S2_u_0_nstates_openmp(v_0,s_vec,u_1,N_st,n) + call H_u_0_nstates_openmp(v_0,u_1,N_st,n) endif u_0(1:n,1:N_st) = u_1(1:n,1:N_st) deallocate(u_1) @@ -76,42 +47,39 @@ subroutine u_0_H_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze) norm = u_dot_u(u_0(1,i),n) if (norm /= 0.d0) then e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n) - s_0(i) = u_dot_v(s_vec(1,i),u_0(1,i),n) else e_0(i) = 0.d0 - s_0(i) = 0.d0 endif enddo !$OMP END PARALLEL DO - deallocate (s_vec, v_0) + deallocate (v_0) end -subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) +subroutine H_u_0_nstates_openmp(v_0,u_0,N_st,sze) use bitmasks implicit none BEGIN_DOC - ! Computes $v_0 = H | u_0\rangle$ and $s_0 = S^2 | u_0\rangle$. + ! Computes $v_0 = H | u_0\rangle$. ! ! Assumes that the determinants are in psi_det ! ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC integer, intent(in) :: N_st,sze - double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st) + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st) integer :: k - double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t - allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + allocate(u_t(N_st,N_det),v_t(N_st,N_det)) do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo v_t = 0.d0 - s_t = 0.d0 call dtranspose( & u_0, & size(u_0, 1), & @@ -119,7 +87,7 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) size(u_t, 1), & N_det, N_st) - call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1) + call H_u_0_nstates_openmp_work(v_t,u_t,N_st,sze,1,N_det,0,1) deallocate(u_t) call dtranspose( & @@ -128,66 +96,59 @@ subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) v_0, & size(v_0, 1), & N_st, N_det) - call dtranspose( & - s_t, & - size(s_t, 1), & - s_0, & - size(s_0, 1), & - N_st, N_det) - deallocate(v_t,s_t) + deallocate(v_t) do k=1,N_st call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) - call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) enddo end -subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) +subroutine H_u_0_nstates_openmp_work(v_t,u_t,N_st,sze,istart,iend,ishift,istep) use bitmasks implicit none BEGIN_DOC - ! Computes $v_t = H | u_t\rangle$ and $s_t = S^2 | u_t\rangle$ + ! Computes $v_t = H | u_t\rangle$ ! ! Default should be 1,N_det,0,1 END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) - double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + double precision, intent(out) :: v_t(N_st,sze) PROVIDE ref_bitmask_energy N_int select case (N_int) case (1) - call H_S2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + call H_u_0_nstates_openmp_work_1(v_t,u_t,N_st,sze,istart,iend,ishift,istep) case (2) - call H_S2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + call H_u_0_nstates_openmp_work_2(v_t,u_t,N_st,sze,istart,iend,ishift,istep) case (3) - call H_S2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + call H_u_0_nstates_openmp_work_3(v_t,u_t,N_st,sze,istart,iend,ishift,istep) case (4) - call H_S2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + call H_u_0_nstates_openmp_work_4(v_t,u_t,N_st,sze,istart,iend,ishift,istep) case default - call H_S2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + call H_u_0_nstates_openmp_work_N_int(v_t,u_t,N_st,sze,istart,iend,ishift,istep) end select end BEGIN_TEMPLATE -subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) +subroutine H_u_0_nstates_openmp_work_$N_int(v_t,u_t,N_st,sze,istart,iend,ishift,istep) use bitmasks implicit none BEGIN_DOC - ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! Computes $v_t = H | u_t \\rangle$ ! ! Default should be 1,N_det,0,1 END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) - double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + double precision, intent(out) :: v_t(N_st,sze) - double precision :: hij, sij + double precision :: hij integer :: i,j,k,l,kk integer :: k_a, k_b, l_a, l_b, m_a, m_b integer :: istate @@ -246,14 +207,14 @@ compute_singles=.True. !$OMP psi_bilinear_matrix_order_transp_reverse, & !$OMP psi_bilinear_matrix_columns_loc, & !$OMP psi_bilinear_matrix_transp_rows_loc, & - !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP istart, iend, istep, irp_here, v_t, & !$OMP ishift, idx0, u_t, maxab, compute_singles, & !$OMP singles_alpha_csc,singles_alpha_csc_idx, & !$OMP singles_beta_csc,singles_beta_csc_idx) & !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & !$OMP buffer, doubles, n_doubles, umax, & - !$OMP tmp_det2, hij, sij, idx, l, kcol_prev, & + !$OMP tmp_det2, hij, idx, l, kcol_prev, & !$OMP singles_a, n_singles_a, singles_b, ratio, & !$OMP n_singles_b, k8, last_found,left,right,right_max) @@ -453,11 +414,9 @@ compute_singles=.True. tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij) - call get_s2(tmp_det,tmp_det2,$N_int,sij) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) - s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) enddo enddo enddo @@ -559,7 +518,6 @@ compute_singles=.True. !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) - ! single => sij = 0 enddo enddo enddo @@ -604,7 +562,6 @@ compute_singles=.True. !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) - ! same spin => sij = 0 enddo enddo enddo @@ -697,7 +654,6 @@ compute_singles=.True. !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) - ! single => sij = 0 enddo enddo enddo @@ -745,7 +701,6 @@ compute_singles=.True. !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) - ! same spin => sij = 0 enddo enddo enddo @@ -777,14 +732,12 @@ compute_singles=.True. tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) - double precision, external :: diag_H_mat_elem, diag_S_mat_elem + double precision, external :: diag_H_mat_elem hij = diag_H_mat_elem(tmp_det,$N_int) - sij = diag_S_mat_elem(tmp_det,$N_int) !DIR$ LOOP COUNT AVG(4) do l=1,N_st v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) - s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) enddo end do diff --git a/src/davidson/u0_hs2_u0.irp.f b/src/davidson/u0_hs2_u0.irp.f new file mode 100644 index 00000000..96c266c2 --- /dev/null +++ b/src/davidson/u0_hs2_u0.irp.f @@ -0,0 +1,807 @@ + BEGIN_PROVIDER [ double precision, psi_energy, (N_states) ] +&BEGIN_PROVIDER [ double precision, psi_s2, (N_states) ] + implicit none + BEGIN_DOC +! psi_energy(i) = $\langle \Psi_i | H | \Psi_i \rangle$ +! +! psi_s2(i) = $\langle \Psi_i | S^2 | \Psi_i \rangle$ + END_DOC + call u_0_HS2_u_0(psi_energy,psi_s2,psi_coef,N_det,psi_det,N_int,N_states,psi_det_size) + integer :: i + do i=N_det+1,N_states + psi_energy(i) = 0.d0 + psi_s2(i) = 0.d0 + enddo +END_PROVIDER + +BEGIN_PROVIDER [ double precision, psi_energy_with_nucl_rep, (N_states) ] + implicit none + BEGIN_DOC + ! Energy of the wave function with the nuclear repulsion energy. + END_DOC + psi_energy_with_nucl_rep(1:N_states) = psi_energy(1:N_states) + nuclear_repulsion +END_PROVIDER + + +subroutine u_0_HS2_u_0(e_0,s_0,u_0,n,keys_tmp,Nint,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $E_0 = \frac{\langle u_0 | H | u_0 \rangle}{\langle u_0 | u_0 \rangle}$ + ! + ! and $S_0 = \frac{\langle u_0 | S^2 | u_0 \rangle}{\langle u_0 | u_0 \rangle}$ + ! + ! n : number of determinants + ! + END_DOC + integer, intent(in) :: n,Nint, N_st, sze + double precision, intent(out) :: e_0(N_st),s_0(N_st) + double precision, intent(inout) :: u_0(sze,N_st) + integer(bit_kind),intent(in) :: keys_tmp(Nint,2,n) + + double precision, allocatable :: v_0(:,:), s_vec(:,:), u_1(:,:) + double precision :: u_dot_u,u_dot_v,diag_H_mat_elem + integer :: i,j, istate + + if ((n > 100000).and.distributed_davidson) then + allocate (v_0(n,N_states_diag),s_vec(n,N_states_diag), u_1(n,N_states_diag)) + u_1(:,:) = 0.d0 + u_1(1:n,1:N_st) = u_0(1:n,1:N_st) + call H_S2_u_0_nstates_zmq(v_0,s_vec,u_1,N_states_diag,n) + else if (n < n_det_max_full) then + allocate (v_0(n,N_st),s_vec(n,N_st), u_1(n,N_st)) + v_0(:,:) = 0.d0 + u_1(:,:) = 0.d0 + s_vec(:,:) = 0.d0 + u_1(1:n,1:N_st) = u_0(1:n,1:N_st) + do istate = 1,N_st + do j=1,n + do i=1,n + v_0(i,istate) = v_0(i,istate) + h_matrix_all_dets(i,j) * u_0(j,istate) + s_vec(i,istate) = s_vec(i,istate) + S2_matrix_all_dets(i,j) * u_0(j,istate) + enddo + enddo + enddo + else + allocate (v_0(n,N_st),s_vec(n,N_st),u_1(n,N_st)) + u_1(:,:) = 0.d0 + u_1(1:n,1:N_st) = u_0(1:n,1:N_st) + call H_S2_u_0_nstates_openmp(v_0,s_vec,u_1,N_st,n) + endif + u_0(1:n,1:N_st) = u_1(1:n,1:N_st) + deallocate(u_1) + double precision :: norm + !$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED) + do i=1,N_st + norm = u_dot_u(u_0(1,i),n) + if (norm /= 0.d0) then + e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n) + s_0(i) = u_dot_v(s_vec(1,i),u_0(1,i),n) + else + e_0(i) = 0.d0 + s_0(i) = 0.d0 + endif + enddo + !$OMP END PARALLEL DO + deallocate (s_vec, v_0) +end + + + + + +subroutine H_S2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$ and $s_0 = S^2 | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), s_0(sze,N_st), u_0(sze,N_st) + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + v_t = 0.d0 + s_t = 0.d0 + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + call H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1) + deallocate(u_t) + + call dtranspose( & + v_t, & + size(v_t, 1), & + v_0, & + size(v_0, 1), & + N_st, N_det) + call dtranspose( & + s_t, & + size(s_t, 1), & + s_0, & + size(s_0, 1), & + N_st, N_det) + deallocate(v_t,s_t) + + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + + +subroutine H_S2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t\rangle$ and $s_t = S^2 | u_t\rangle$ + ! + ! Default should be 1,N_det,0,1 + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + + PROVIDE ref_bitmask_energy N_int + + select case (N_int) + case (1) + call H_S2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + case (2) + call H_S2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + case (3) + call H_S2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + case (4) + call H_S2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + case default + call H_S2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + end select +end +BEGIN_TEMPLATE + +subroutine H_S2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! + ! Default should be 1,N_det,0,1 + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + double precision :: hij, sij + integer :: i,j,k,l,kk + integer :: k_a, k_b, l_a, l_b, m_a, m_b + integer :: istate + integer :: krow, kcol, krow_b, kcol_b + integer :: lrow, lcol + integer :: mrow, mcol + integer(bit_kind) :: spindet($N_int) + integer(bit_kind) :: tmp_det($N_int,2) + integer(bit_kind) :: tmp_det2($N_int,2) + integer(bit_kind) :: tmp_det3($N_int,2) + integer(bit_kind), allocatable :: buffer(:,:) + integer :: n_doubles + integer, allocatable :: doubles(:) + integer, allocatable :: singles_a(:) + integer, allocatable :: singles_b(:) + integer, allocatable :: idx(:), idx0(:) + integer :: maxab, n_singles_a, n_singles_b, kcol_prev + integer*8 :: k8 + logical :: compute_singles + integer*8 :: last_found, left, right, right_max + double precision :: rss, mem, ratio + double precision, allocatable :: utl(:,:) + integer, parameter :: block_size=128 + logical :: u_is_sparse + +! call resident_memory(rss) +! mem = dble(singles_beta_csc_size) / 1024.d0**3 +! +! compute_singles = (mem+rss > qp_max_mem) +! +! if (.not.compute_singles) then +! provide singles_beta_csc +! endif +compute_singles=.True. + + + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab)) + + do i=1,maxab + idx0(i) = i + enddo + + ! Prepare the array of all alpha single excitations + ! ------------------------------------------------- + + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & + !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & + !$OMP psi_bilinear_matrix_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & + !$OMP psi_bilinear_matrix_transp_rows, & + !$OMP psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_transp_order, N_st, & + !$OMP psi_bilinear_matrix_order_transp_reverse, & + !$OMP psi_bilinear_matrix_columns_loc, & + !$OMP psi_bilinear_matrix_transp_rows_loc, & + !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP ishift, idx0, u_t, maxab, compute_singles, & + !$OMP singles_alpha_csc,singles_alpha_csc_idx, & + !$OMP singles_beta_csc,singles_beta_csc_idx) & + !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & + !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & + !$OMP buffer, doubles, n_doubles, umax, & + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev, & + !$OMP singles_a, n_singles_a, singles_b, ratio, & + !$OMP n_singles_b, k8, last_found,left,right,right_max) + + ! Alpha/Beta double excitations + ! ============================= + + allocate( buffer($N_int,maxab), & + singles_a(maxab), & + singles_b(maxab), & + doubles(maxab), & + idx(maxab), utl(N_st,block_size)) + + kcol_prev=-1 + + ! Check if u has multiple zeros + kk=1 ! Avoid division by zero + !$OMP DO + do k=1,N_det + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k))) + enddo + if (umax < 1.d-20) then + !$OMP ATOMIC + kk = kk+1 + endif + enddo + !$OMP END DO + u_is_sparse = N_det / kk < 20 ! 5% + + ASSERT (iend <= N_det) + ASSERT (istart > 0) + ASSERT (istep > 0) + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + + if (kcol /= kcol_prev) then + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + if (compute_singles) then + call get_all_spin_singles_$N_int( & + psi_det_beta_unique, idx0, & + tmp_det(1,2), N_det_beta_unique, & + singles_b, n_singles_b) + else + n_singles_b = 0 + !DIR$ LOOP COUNT avg(1000) + do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 + n_singles_b = n_singles_b+1 + singles_b(n_singles_b) = singles_beta_csc(k8) + enddo + endif + endif + kcol_prev = kcol + + ! Loop over singly excited beta columns + ! ------------------------------------- + + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b + lcol = singles_b(i) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) + +!--- +! if (compute_singles) then + + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + + !DIR$ UNROLL(8) + !DIR$ LOOP COUNT avg(50000) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot + + ASSERT (l_a <= N_det) + idx(j) = l_a + l_a = l_a+1 + enddo + j = j-1 + + call get_all_spin_singles_$N_int( & + buffer, idx, tmp_det(1,1), j, & + singles_a, n_singles_a ) + +!----- +! else +! +! ! Search for singles +! +!call cpu_time(time0) +! ! Right boundary +! l_a = psi_bilinear_matrix_columns_loc(lcol+1)-1 +! ASSERT (l_a <= N_det) +! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) +! lrow = psi_bilinear_matrix_rows(l_a) +! ASSERT (lrow <= N_det_alpha_unique) +! +! left = singles_alpha_csc_idx(krow) +! right_max = -1_8 +! right = singles_alpha_csc_idx(krow+1) +! do while (right-left>0_8) +! k8 = shiftr(right+left,1) +! if (singles_alpha_csc(k8) > lrow) then +! right = k8 +! else if (singles_alpha_csc(k8) < lrow) then +! left = k8 + 1_8 +! else +! right_max = k8+1_8 +! exit +! endif +! enddo +! if (right_max > 0_8) exit +! l_a = l_a-1 +! enddo +! if (right_max < 0_8) right_max = singles_alpha_csc_idx(krow) +! +! ! Search +! n_singles_a = 0 +! l_a = psi_bilinear_matrix_columns_loc(lcol) +! ASSERT (l_a <= N_det) +! +! last_found = singles_alpha_csc_idx(krow) +! do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) +! lrow = psi_bilinear_matrix_rows(l_a) +! ASSERT (lrow <= N_det_alpha_unique) +! +! left = last_found +! right = right_max +! do while (right-left>0_8) +! k8 = shiftr(right+left,1) +! if (singles_alpha_csc(k8) > lrow) then +! right = k8 +! else if (singles_alpha_csc(k8) < lrow) then +! left = k8 + 1_8 +! else +! n_singles_a += 1 +! singles_a(n_singles_a) = l_a +! last_found = k8+1_8 +! exit +! endif +! enddo +! l_a = l_a+1 +! enddo +! j = j-1 +! +! endif +!----- + + ! Loop over alpha singles + ! ----------------------- + + double precision :: umax + + !DIR$ LOOP COUNT avg(1000) + do k = 1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) + call i_H_j_double_alpha_beta(tmp_det,tmp_det2,$N_int,hij) + call get_s2(tmp_det,tmp_det2,$N_int,sij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) + enddo + enddo + enddo + + enddo + + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + + ! Single and double alpha excitations + ! =================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + ! Initial determinant is at k_b in beta-major representation + ! ---------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + spindet(1:$N_int) = tmp_det(1:$N_int,1) + + ! Loop inside the beta column to gather all the connected alphas + lcol = psi_bilinear_matrix_columns(k_a) + l_a = psi_bilinear_matrix_columns_loc(lcol) + + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_alpha_unique + if (l_a > N_det) exit + lcol = psi_bilinear_matrix_columns(l_a) + if (lcol /= kcol) exit + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot + idx(i) = l_a + l_a = l_a+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_a, doubles, n_singles_a, n_doubles ) + + ! Compute Hij for all alpha singles + ! ---------------------------------- + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) + call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + ! single => sij = 0 + enddo + enddo + enddo + + + ! Compute Hij for all alpha doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + ! same spin => sij = 0 + enddo + enddo + enddo + + + ! Single and double beta excitations + ! ================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + spindet(1:$N_int) = tmp_det(1:$N_int,2) + + ! Initial determinant is at k_b in beta-major representation + ! ----------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + ! Loop inside the alpha row to gather all the connected betas + lrow = psi_bilinear_matrix_transp_rows(k_b) + l_b = psi_bilinear_matrix_transp_rows_loc(lrow) + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_beta_unique + if (l_b > N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l_b) + if (lrow /= krow) exit + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) + idx(i) = l_b + l_b = l_b+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_b, doubles, n_singles_b, n_doubles ) + + ! Compute Hij for all beta singles + ! ---------------------------------- + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) + call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + ! single => sij = 0 + enddo + enddo + enddo + + ! Compute Hij for all beta doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + ! same spin => sij = 0 + enddo + enddo + enddo + + + ! Diagonal contribution + ! ===================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + if (u_is_sparse) then + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k_a))) + enddo + else + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + double precision, external :: diag_H_mat_elem, diag_S_mat_elem + + hij = diag_H_mat_elem(tmp_det,$N_int) + sij = diag_S_mat_elem(tmp_det,$N_int) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + end do + !$OMP END DO + deallocate(buffer, singles_a, singles_b, doubles, idx, utl) + !$OMP END PARALLEL + +end + +SUBST [ N_int ] + +1;; +2;; +3;; +4;; +N_int;; + +END_TEMPLATE + + diff --git a/src/determinants/connected_to_ref.irp.f b/src/determinants/connected_to_ref.irp.f index 7b8ea3e4..5c0cdd2a 100644 --- a/src/determinants/connected_to_ref.irp.f +++ b/src/determinants/connected_to_ref.irp.f @@ -12,36 +12,6 @@ integer*8 function det_search_key(det,Nint) end -integer*8 function configuration_search_key(cfg,Nint) - use bitmasks - implicit none - BEGIN_DOC - ! Returns an integer*8 corresponding to a determinant index for searching. - ! The left-most 8 bits contain the number of open shells+1. This ensures that the CSF - ! are packed with the same seniority. - END_DOC - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: cfg(Nint,2) - integer :: i, n_open_shells - integer*8 :: mask - - i = shiftr(elec_alpha_num, bit_kind_shift)+1 - configuration_search_key = int(shiftr(ior(cfg(i,1),cfg(i,2)),1)+sum(cfg),8) - - mask = X'00FFFFFFFFFFFFFF' - configuration_search_key = iand(mask,configuration_search_key) - - n_open_shells = 1 - do i=1,Nint - if (cfg(i,1) == 0_bit_kind) cycle - n_open_shells = n_open_shells + popcnt(cfg(i,1)) - enddo - mask = n_open_shells - mask = shiftl(mask,56) - configuration_search_key = ior (mask,configuration_search_key) - -end - logical function is_in_wavefunction(key,Nint) diff --git a/src/determinants/create_excitations.irp.f b/src/determinants/create_excitations.irp.f index 8611ca22..a597116b 100644 --- a/src/determinants/create_excitations.irp.f +++ b/src/determinants/create_excitations.irp.f @@ -107,283 +107,3 @@ logical function is_spin_flip_possible(key_in,i_flip,ispin) return endif end - -subroutine do_single_excitation_cfg(key_in,key_out,i_hole,i_particle,ok) - use bitmasks - implicit none - BEGIN_DOC - ! Applies the single excitation operator to a configuration - ! If the excitation is possible, ok is True - END_DOC - integer, intent(in) :: i_hole,i_particle - integer(bit_kind), intent(in) :: key_in(N_int,2) - logical , intent(out) :: ok - integer :: k,j,i - integer(bit_kind) :: mask - integer(bit_kind) :: key_out(N_int,2) - - ASSERT (i_hole > 0) - ASSERT (i_particle <= mo_num) - - ok = .True. - key_out(:,:) = key_in(:,:) - - ! hole - k = shiftr(i_hole-1,bit_kind_shift)+1 - j = i_hole-shiftl(k-1,bit_kind_shift)-1 - mask = ibset(0_bit_kind,j) - - ! Check if the position j is singly occupied - ! 1 -> 0 (SOMO) - ! 0 0 (DOMO) - if (iand(key_out(k,1),mask) /= 0_bit_kind) then - key_out(k,1) = ibclr(key_out(k,1),j) - - ! Check if the position j is doubly occupied - ! 0 -> 1 (SOMO) - ! 1 0 (DOMO) - else if (iand(key_out(k,2),mask) /= 0_bit_kind) then - key_out(k,1) = ibset(key_out(k,1),j) - key_out(k,2) = ibclr(key_out(k,2),j) - - ! The position j is unoccupied: Not OK - ! 0 -> 0 (SOMO) - ! 0 0 (DOMO) - else - ok =.False. - return - endif - - - ! particle - k = shiftr(i_particle-1,bit_kind_shift)+1 - j = i_particle-shiftl(k-1,bit_kind_shift)-1 - mask = ibset(0_bit_kind,j) - - ! Check if the position j is singly occupied - ! 1 -> 0 (SOMO) - ! 0 1 (DOMO) - if (iand(key_out(k,1),mask) /= 0_bit_kind) then - key_out(k,1) = ibclr(key_out(k,1),j) - key_out(k,2) = ibset(key_out(k,2),j) - - ! Check if the position j is doubly occupied : Not OK - ! 0 -> 1 (SOMO) - ! 1 0 (DOMO) - else if (iand(key_out(k,2),mask) /= 0_bit_kind) then - ok = .False. - return - - ! Position at j is unoccupied - ! 0 -> 0 (SOMO) - ! 0 0 (DOMO) - else - key_out(k,1) = ibset(key_out(k,1),j) - endif - -end - -subroutine do_single_excitation_cfg_with_type(key_in,key_out,i_hole,i_particle,ex_type,ok) - use bitmasks - implicit none - BEGIN_DOC - ! Applies the single excitation operator to a configuration - ! Returns the type of excitation in ex_type - ! where the following convention is used - ! 1 = (SOMO -> SOMO) 1 change in Nsomo - ! 2 = (DOMO -> VMO) 1 change in Nsomo - ! 3 = (SOMO -> VMO) 0 change in Nsomo - ! 4 = (DOMO -> SOMO) 0 change in Nsomo - ! If the excitation is possible, ok is True - END_DOC - integer, intent(in) :: i_hole,i_particle - integer(bit_kind), intent(in) :: key_in(N_int,2) - integer , intent(out) :: ex_type - logical , intent(out) :: ok - integer :: k,j,i - integer(bit_kind) :: mask - integer(bit_kind) :: key_out(N_int,2) - logical :: isholeSOMO - logical :: isparticleSOMO - logical :: isholeDOMO - logical :: isparticleVMO - isholeSOMO = .False. - isholeDOMO = .False. - isparticleSOMO = .False. - isparticleVMO = .False. - - ASSERT (i_hole > 0) - ASSERT (i_particle <= mo_num) - - ok = .True. - key_out(:,:) = key_in(:,:) - - ! hole - k = shiftr(i_hole-1,bit_kind_shift)+1 - j = i_hole-shiftl(k-1,bit_kind_shift)-1 - mask = ibset(0_bit_kind,j) - - ! Check if the position j is singly occupied - ! 1 -> 0 (SOMO) - ! 0 0 (DOMO) - if (iand(key_out(k,1),mask) /= 0_bit_kind) then - key_out(k,1) = ibclr(key_out(k,1),j) - isholeSOMO = .True. - - ! Check if the position j is doubly occupied - ! 0 -> 1 (SOMO) - ! 1 0 (DOMO) - else if (iand(key_out(k,2),mask) /= 0_bit_kind) then - key_out(k,1) = ibset(key_out(k,1),j) - key_out(k,2) = ibclr(key_out(k,2),j) - isholeDOMO = .True. - - ! The position j is unoccupied: Not OK - ! 0 -> 0 (SOMO) - ! 0 0 (DOMO) - else - ok =.False. - return - endif - - - ! particle - k = shiftr(i_particle-1,bit_kind_shift)+1 - j = i_particle-shiftl(k-1,bit_kind_shift)-1 - mask = ibset(0_bit_kind,j) - - ! Check if the position j is singly occupied - ! 1 -> 0 (SOMO) - ! 0 1 (DOMO) - if (iand(key_out(k,1),mask) /= 0_bit_kind) then - key_out(k,1) = ibclr(key_out(k,1),j) - key_out(k,2) = ibset(key_out(k,2),j) - isparticleSOMO = .True. - - ! Check if the position j is doubly occupied : Not OK - ! 0 -> 1 (SOMO) - ! 1 0 (DOMO) - else if (iand(key_out(k,2),mask) /= 0_bit_kind) then - ok = .False. - return - - ! Position at j is unoccupied - ! 0 -> 0 (SOMO) - ! 0 0 (DOMO) - else - key_out(k,1) = ibset(key_out(k,1),j) - isparticleVMO = .True. - endif - - if(isholeSOMO) then - ! two possibilities - ! particle is SOMO or VMO - if(isparticleSOMO) then - ! SOMO -> SOMO - ex_type = 1 - else - ! SOMO -> VMO - ex_type = 3 - endif - else - ! two possibilities - ! particle is SOMO or VMO - if(isparticleSOMO) then - ! DOMO -> SOMO - ex_type = 4 - else - ! DOMO -> VMO - ex_type = 2 - endif - endif - -end - -subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint) - implicit none - use bitmasks - BEGIN_DOC - ! Generate all single excitation wrt a configuration - ! - ! n_singles : on input, max number of singles : - ! elec_alpha_num * (mo_num - elec_beta_num) - ! on output, number of generated singles - END_DOC - integer, intent(in) :: Nint - integer, intent(inout) :: n_singles - integer(bit_kind), intent(in) :: cfg(Nint,2) - integer(bit_kind), intent(out) :: singles(Nint,2,*) - - integer :: i,k, n_singles_ma, i_hole, i_particle - integer(bit_kind) :: single(Nint,2) - logical :: i_ok - - n_singles = 0 - !TODO - !Make list of Somo and Domo for holes - !Make list of Unocc and Somo for particles - do i_hole = 1, mo_num - do i_particle = 1, mo_num - call do_single_excitation_cfg(cfg,single,i_hole,i_particle,i_ok) - if (i_ok) then - n_singles = n_singles + 1 - do k=1,Nint - singles(k,1,n_singles) = single(k,1) - singles(k,2,n_singles) = single(k,2) - enddo - endif - enddo - enddo -end - -subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) - implicit none - use bitmasks - BEGIN_DOC - ! Generate all single excitation wrt a configuration - ! - ! n_singles : on input, max number of singles : - ! elec_alpha_num * (mo_num - elec_beta_num) - ! on output, number of generated singles - ! ex_type_singles : on output contains type of excitations : - ! - END_DOC - integer, intent(in) :: Nint - integer, intent(inout) :: n_singles - integer, intent(out) :: idxs_singles(*) - integer, intent(out) :: ex_type_singles(*) - integer, intent(out) :: pq_singles(2,*) - integer(bit_kind), intent(in) :: cfgInp(Nint,2) - integer(bit_kind), intent(out) :: singles(Nint,2,*) - integer(bit_kind) :: Jdet(Nint,2) - - integer :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg - integer(bit_kind) :: single(Nint,2) - logical :: i_ok - - n_singles = 0 - !TODO - !Make list of Somo and Domo for holes - !Make list of Unocc and Somo for particles - do i_hole = 1+n_core_orb, n_core_orb + n_act_orb - do i_particle = 1+n_core_orb, n_core_orb + n_act_orb - if(i_hole .EQ. i_particle) cycle - addcfg = -1 - call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok) - if (i_ok) then - call binary_search_cfg(single,addcfg) - if(addcfg .EQ. -1) cycle - n_singles = n_singles + 1 - do k=1,Nint - singles(k,1,n_singles) = single(k,1) - singles(k,2,n_singles) = single(k,2) - ex_type_singles(n_singles) = ex_type - pq_singles(1,n_singles) = i_hole ! p - pq_singles(2,n_singles) = i_particle ! q - idxs_singles(n_singles) = addcfg - enddo - endif - enddo - enddo -end - diff --git a/src/determinants/prune_wf.irp.f b/src/determinants/prune_wf.irp.f index 0bf4b246..697bd6d8 100644 --- a/src/determinants/prune_wf.irp.f +++ b/src/determinants/prune_wf.irp.f @@ -13,16 +13,16 @@ BEGIN_PROVIDER [ logical, pruned, (N_det) ] integer :: i,j,k,ndet_new,ncfg_max double precision :: thr - if (s2_eig) then - - ncfg_max = max(1,int ( dble(N_configuration) * (1.d0 - pruning) + 0.5d0 )) - - do i=1,N_det - k = det_to_configuration(i) - pruned(i) = psi_configuration_sorted_order_reverse(k) > ncfg_max - enddo - - else +! if (s2_eig) then +! +! ncfg_max = max(1,int ( dble(N_configuration) * (1.d0 - pruning) + 0.5d0 )) +! +! do i=1,N_det +! k = det_to_configuration(i) +! pruned(i) = psi_configuration_sorted_order_reverse(k) > ncfg_max +! enddo +! +! else ndet_new = max(1,int( dble(N_det) * (1.d0 - pruning) + 0.5d0 )) thr = psi_average_norm_contrib_sorted(ndet_new) @@ -30,6 +30,6 @@ BEGIN_PROVIDER [ logical, pruned, (N_det) ] pruned(i) = psi_average_norm_contrib(i) < thr enddo - endif +! endif END_PROVIDER