Merge branch 'master' of github.com:scemama/quantum_package

2024-12-23 12:56:14 +01:00 · 2016-10-11 22:46:40 +02:00 · 2016-10-11 22:46:40 +02:00 · b4827421ee
commit b4827421ee
parent 9fe84ab137 fa3ffdd696
6 changed files with 117 additions and 84 deletions
--- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
@ -12,7 +12,7 @@ program selection_slave
 end
 subroutine provide_everything
-  PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context
+  PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context mo_mono_elec_integral
 !   PROVIDE ci_electronic_energy mo_tot_num N_int
 end
--- a/plugins/MRCC_Utils/mrcc_utils.irp.f
+++ b/plugins/MRCC_Utils/mrcc_utils.irp.f
@ -810,14 +810,18 @@ END_PROVIDER
       if(res < 1d-8) exit
     end do
     ! rho_mrcc now contains A.X
     norm = 0.d0
     do i=1,N_det_non_ref
       norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s)
     enddo
     ! Norm now contains the norm of A.X
     do i=1,N_det_ref
       norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
     enddo
     ! Norm now contains the norm of Psi + A.X
     print *, k, "res : ", res, "norm : ", sqrt(norm)
@ -829,30 +833,46 @@ END_PROVIDER
       if (rho_mrcc(i,s) == 0.d0) then
         rho_mrcc(i,s) = 1.d-32
       endif
       ! f is such that f.\tilde{c_i} = c_i
       f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
       ! Avoid numerical instabilities
       f = min(f,2.d0)
       f = max(f,-2.d0)
       norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
       rho_mrcc(i,s) = f
     enddo
     ! norm now contains the norm of |T.Psi_0>
     ! rho_mrcc now contains the f factors
     f = 1.d0/norm
-     norm = 0.d0
+     ! f now contains 1/ <T.Psi_0|T.Psi_0>
     do i=1,N_det_non_ref
       norm = norm + psi_non_ref_coef(i,s)*psi_non_ref_coef(i,s)
     enddo
     f = dsqrt(f*norm)
-     print *,  'norm of |T Psi_0> = ', norm*f*f
+     norm = 1.d0
     do i=1,N_det_ref
       norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s)
     enddo
     ! norm now contains <Psi_SD|Psi_SD>
     f = dsqrt(f*norm)
     ! f normalises T.Psi_0 such that (1+T)|Psi> is normalized
     norm = norm*f
     print *,  'norm of |T Psi_0> = ', dsqrt(norm)
     do i=1,N_det_ref
       norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
     enddo
     do i=1,N_det_non_ref
       rho_mrcc(i,s) = rho_mrcc(i,s) * f
     enddo
     ! rho_mrcc now contains the product of the scaling factors and the
     ! normalization constant
   end do
   print *, "done"
 END_PROVIDER
@ -860,13 +880,17 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
  integer :: s,i,j
  double precision, external :: get_dij_index
  print *, "computing amplitudes..."
  !$OMP PARALLEL DEFAULT(shared) PRIVATE(s,i,j)
  do s=1, N_states
    !$OMP DO
    do i=1, N_det_non_ref
      do j=1, N_det_ref
        dij(j, i, s) = get_dij_index(j, i, s, N_int)
      end do
    end do
    !$OMP END DO 
  end do
  !$OMP END PARALLEL
  print *, "done computing amplitudes"
 END_PROVIDER
@ -879,10 +903,9 @@ double precision function get_dij_index(II, i, s, Nint)
  double precision :: HIi, phase
  if(lambda_type == 0) then
    get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint)
 !    get_dij_index = get_dij_index * rho_mrcc(i,s) 
    call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
-    get_dij_index = get_dij_index * rho_mrcc(i,s) * phase
+    get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase
    get_dij_index = get_dij_index * rho_mrcc(i,s) 
  else
    call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi)
    get_dij_index = HIi * lambda_mrcc(s, i)
--- a/plugins/Selectors_full/zmq.irp.f
+++ b/plugins/Selectors_full/zmq.irp.f
@ -114,7 +114,7 @@ subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
  if (N_det_selectors_read > 0) then
    N_det_selectors  = N_det_selectors_read
  endif
-  SOFT_TOUCH psi_det psi_coef N_det_selectors N_det_generators psi_coef_generators psi_det_generators
+  SOFT_TOUCH psi_det psi_coef N_det_selectors N_det_generators
 end
--- a/src/Davidson/davidson_parallel.irp.f
+++ b/src/Davidson/davidson_parallel.irp.f
@ -4,12 +4,12 @@
 use bitmasks
 use f77_zmq
-subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
+subroutine davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
  implicit none
-  integer             , intent(in)        :: blockb, blocke, bs
+  integer             , intent(in)        :: blockb, bs, blockb2, istep
  integer             , intent(inout)     :: N
  integer             , intent(inout)     :: idx(bs)
  double precision    , intent(inout)     :: vt(N_states_diag, bs)
@ -20,11 +20,12 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
  double precision :: s2, hij
  logical, allocatable :: wrotten(:)
  allocate(wrotten(bs))
  wrotten = .false.
  PROVIDE dav_det
-  do sh = blockb, blocke
+  ii=0
  sh = blockb
  do sh2=1,shortcut_(0,1)
    exa = 0
    do ni=1,N_int
@ -32,7 +33,7 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
    end do
    if(exa > 2) cycle
-    do i=shortcut_(sh,1),shortcut_(sh+1,1)-1
+    do i=blockb2+shortcut_(sh,1),shortcut_(sh+1,1)-1, istep
      ii = i - shortcut_(blockb,1) + 1
      org_i =  sort_idx_(i,1)
@ -60,47 +61,43 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
            vt (istate,ii) += hij*dav_ut(istate,org_j)
            st (istate,ii) += s2*dav_ut(istate,org_j)
          enddo
 !          call daxpy(N_states_diag,hij,dav_ut(1,org_j),1,vt(1,org_i),1)
 !          call daxpy(N_states_diag,s2, dav_ut(1,org_j),1,st(1,org_i),1)
        endif
      enddo
    enddo
  enddo
  enddo
-  do sh=blockb,min(blocke, shortcut_(0,2))
+  if (blockb <= shortcut_(0,2)) then
-  do sh2=sh, shortcut_(0,2), shortcut_(0,1)
+    sh=blockb
-    do i=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
+    do sh2=sh, shortcut_(0,2), shortcut_(0,1)
-      ii += 1
+      do i=blockb2+shortcut_(sh2,2),shortcut_(sh2+1,2)-1, istep
-      org_i = sort_idx_(i,2)
+        ii += 1
-      do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
+        org_i = sort_idx_(i,2)
-        if(i == j) cycle
+        do j=shortcut_(sh2,2),shortcut_(sh2+1,2)-1
-        org_j = sort_idx_(j,2)
+          if(i == j) cycle
-        ext = 0
+          org_j = sort_idx_(j,2)
-        do ni=1,N_int
+          ext = 0
-          ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
+          do ni=1,N_int
            ext = ext + popcnt(xor(sorted_(ni,i,2), sorted_(ni,j,2)))
          end do
          if(ext == 4) then
          call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
          call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
          if(.not. wrotten(ii)) then
            wrotten(ii) = .true.
            idx(ii) = org_i
            vt (:,ii) = 0d0
            st (:,ii) = 0d0
          end if
          do istate=1,N_states_diag
            vt (istate,ii) += hij*dav_ut(istate,org_j)
            st (istate,ii) += s2*dav_ut(istate,org_j)
          enddo
          end if
        end do
        if(ext == 4) then
         call i_h_j (dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,hij)
         call get_s2(dav_det(1,1,org_j),dav_det(1,1,org_i),n_int,s2)
         if(.not. wrotten(ii)) then
           wrotten(ii) = .true.
           idx(ii) = org_i
           vt (:,ii) = 0d0
           st (:,ii) = 0d0
         end if
 !         call daxpy(N_states_diag,hij,dav_ut(1,org_j),1,vt(1,ii),1)
 !         call daxpy(N_states_diag,s2, dav_ut(1,org_j),1,st(1,ii),1)
         do istate=1,N_states_diag
           vt (istate,ii) += hij*dav_ut(istate,org_j)
           st (istate,ii) += s2*dav_ut(istate,org_j)
         enddo
        end if
      end do
-    end do
+    enddo
-  enddo
+  endif
  enddo
  N=0
  do i=1,bs
@ -112,16 +109,16 @@ subroutine davidson_process(blockb, blocke, N, idx, vt, st, bs)
    end if
  end do
 end subroutine
-subroutine davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
+subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
  implicit none
  integer             , intent(in)        :: blockb, blocke
  integer             , intent(in)        :: N
  integer             , intent(in)        :: idx(N)
  double precision    , intent(in)        :: vt(N_states_diag, N)
@ -175,16 +172,16 @@ end subroutine
-subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blocke)
+subroutine davidson_add_task(zmq_to_qp_run_socket, blockb, blockb2, istep)
  use f77_zmq
  implicit none
  integer(ZMQ_PTR)    ,intent(in)    :: zmq_to_qp_run_socket
-  integer             ,intent(in)    :: blockb, blocke
+  integer             ,intent(in)    :: blockb, blockb2, istep
  character*(512)                    :: task 
-  write(task,*) blockb, blocke
+  write(task,*) blockb, blockb2, istep
  call add_task_to_taskserver(zmq_to_qp_run_socket, task)
 end subroutine
@ -213,7 +210,7 @@ subroutine davidson_run_slave(thread,iproc)
  integer,  intent(in)           :: thread, iproc
-  integer                        :: worker_id, task_id, blockb, blocke
+  integer                        :: worker_id, task_id, blockb
  character*(512)                :: task
  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
@ -253,7 +250,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
  character*(512) :: task
-  integer                  :: blockb, blocke
+  integer                  :: blockb, blockb2, istep
  integer                  :: N
  integer             ,  allocatable      :: idx(:)
  double precision    ,  allocatable      :: vt(:,:)
@ -266,10 +263,10 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
  do
    call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
    if(task_id == 0) exit
-    read (task,*) blockb, blocke
+    read (task,*) blockb, blockb2, istep
-    bs = shortcut_(blocke+1,1) - shortcut_(blockb, 1)
+    bs = shortcut_(blockb+1,1) - shortcut_(blockb, 1)
    do i=blockb, shortcut_(0,2), shortcut_(0,1)
-    do j=i, min(i+blocke-blockb, shortcut_(0,2))
+    do j=i, min(i, shortcut_(0,2))
      bs += shortcut_(j+1,2) - shortcut_(j, 2)
    end do
    end do
@ -280,10 +277,9 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
      allocate(st(N_states_diag, bs))
    end if
-    call davidson_process(blockb, blocke, N, idx, vt, st, bs)
+    call davidson_process(blockb, blockb2, N, idx, vt, st, bs, istep)
    call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
-    call davidson_push_results(zmq_socket_push, blockb, blocke, N, idx, vt, st, task_id)
+    call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
  end do
 end subroutine
@ -387,7 +383,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
  integer :: msize
-  msize = (max_workload + max_blocksize)*2
+  msize = (1 + max_blocksize)*2
  allocate(idx(msize)) 
  allocate(vt(N_states_diag, msize)) 
  allocate(st(N_states_diag, msize)) 
@ -402,7 +398,7 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
  do while (more == 1)
    call davidson_pull_results(zmq_socket_pull, blockb, blocke, N, idx, vt, st, task_id)
    !DIR$ FORCEINLINE
-    call davidson_collect(blockb, blocke, N, idx, vt, st , v0t, s0t)
+    call davidson_collect(N, idx, vt, st , v0t, s0t)
    call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more)
  end do
  deallocate(idx,vt,st)
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@ -208,7 +208,8 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
  integer                        :: N_st_8
  integer, external              :: align_double
-  integer :: workload, blockb, blocke
+  integer :: blockb, blockb2, istep
  double precision :: ave_workload, workload
  integer(ZMQ_PTR) :: handler
@ -234,21 +235,38 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
   call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
   call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
  workload = 0
  blockb = shortcut(0,1)
  blocke = blockb
  call davidson_init(handler,n,N_st_8,ut)
  ave_workload = 0.d0
  do sh=1,shortcut(0,1)
    ave_workload += shortcut(0,1)
    ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
    do i=sh, shortcut(0,2), shortcut(0,1)
      do j=i, min(i, shortcut(0,2))
        ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
      end do
    end do
  enddo
  ave_workload = ave_workload/dble(shortcut(0,1))
  print *,  'Ave workload :', ave_workload
  do sh=shortcut(0,1),1,-1
-    workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
+    workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
-    if(workload > max_workload) then
+    do i=sh, shortcut(0,2), shortcut(0,1)
-      blocke = sh
+      do j=i, min(i, shortcut(0,2))
-      call davidson_add_task(handler, blocke, blockb)
+        workload += (shortcut(j+1,2) - shortcut(j, 2))**2
-      blockb = sh-1
+      end do
-      workload = 0
+    end do
-    end if
+    istep = 1+ int(0.5d0*workload/ave_workload)
    do blockb2=0, istep-1
      call davidson_add_task(handler, sh, blockb2, istep)
    enddo
  enddo
  if(blockb > 0) call davidson_add_task(handler, 1, blockb)
  call davidson_run(handler, v_0, s_0, size(v_0,1))
  do istate=1,N_st
@ -262,8 +280,4 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
 end
 BEGIN_PROVIDER [ integer, max_workload ]
  max_workload = 1000
 END_PROVIDER
--- a/src/Integrals_Bielec/map_integrals.irp.f
+++ b/src/Integrals_Bielec/map_integrals.irp.f
@ -364,7 +364,7 @@ double precision function get_mo_bielec_integral(i,j,k,l,map)
  integer(key_kind)              :: idx
  type(map_type), intent(inout)  :: map
  real(integral_kind)            :: tmp
-  PROVIDE mo_bielec_integrals_in_map
+  PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
  if ( (i >= mo_integrals_cache_min) .and. &
       (j >= mo_integrals_cache_min) .and. &
       (k >= mo_integrals_cache_min) .and. &
@ -393,7 +393,7 @@ double precision function get_mo_bielec_integral_schwartz(i,j,k,l,map)
  integer(key_kind)              :: idx
  type(map_type), intent(inout)  :: map
  real(integral_kind)            :: tmp
-  PROVIDE mo_bielec_integrals_in_map
+  PROVIDE mo_bielec_integrals_in_map  mo_integrals_cache
  if (mo_bielec_integral_schwartz(i,k)*mo_bielec_integral_schwartz(j,l) > mo_integrals_threshold) then
    double precision, external :: get_mo_bielec_integral
    !DIR$ FORCEINLINE
@ -411,7 +411,7 @@ double precision function mo_bielec_integral(i,j,k,l)
  END_DOC
  integer, intent(in)            :: i,j,k,l
  double precision               :: get_mo_bielec_integral
-  PROVIDE mo_bielec_integrals_in_map
+  PROVIDE mo_bielec_integrals_in_map mo_integrals_cache
  !DIR$ FORCEINLINE
  mo_bielec_integral = get_mo_bielec_integral(i,j,k,l,mo_integrals_map)
  return