Accelerated 2rdm

2025-04-25 09:44:43 +02:00 · 2025-02-26 15:14:51 +01:00 · 2025-02-26 15:14:51 +01:00 · 2655f99622
commit 2655f99622
parent 62c13860ba
1 changed files with 103 additions and 53 deletions
--- a/src/two_rdm_routines/davidson_like_2rdm.irp.f
+++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f
@ -13,7 +13,7 @@ subroutine orb_range_2_rdm_openmp(big_array,dim1,norb,list_orb,ispin,u_0,N_st,sz
   END_DOC
   integer, intent(in)             :: N_st,sze
   integer, intent(in)             :: dim1,norb,list_orb(norb),ispin
-   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1)
+   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st)
   double precision, intent(in)    :: u_0(sze,N_st)

   integer                        :: k
@ -50,7 +50,7 @@ subroutine orb_range_2_rdm_openmp_work(big_array,dim1,norb,list_orb,ispin,u_t,N_
   END_DOC
   integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
   integer, intent(in)             :: dim1,norb,list_orb(norb),ispin
-   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1)
+   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st)
   double precision, intent(in)   :: u_t(N_st,N_det)

   integer :: k
@ -91,7 +91,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
   integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
   double precision, intent(in)   :: u_t(N_st,N_det)
   integer, intent(in)            :: dim1,norb,list_orb(norb),ispin
-   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1)
+   double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st)

   integer(omp_lock_kind)          :: lock_2rdm
   integer                        :: i,j,k,l
@ -155,6 +155,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
   ! Prepare the array of all alpha single excitations
   ! -------------------------------------------------

+   double precision, allocatable :: big_array_local(:,:,:,:,:)
+
   PROVIDE N_int nthreads_davidson elec_alpha_num
    !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson)      &
            !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,lock_2rdm,&
@ -174,7 +176,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
            !$OMP          buffer, doubles, n_doubles,               &
            !$OMP          tmp_det2, idx, l, kcol_prev,              &
            !$OMP          singles_a, n_singles_a, singles_b,        &
-            !$OMP          n_singles_b, nkeys, keys, values)
+            !$OMP          n_singles_b, nkeys, keys, values, big_array_local)

   ! Alpha/Beta double excitations
   ! =============================
@ -185,6 +187,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
       singles_b(maxab),                                             &
       doubles(maxab),                                               &
       idx(maxab))
+   allocate( big_array_local(N_states,dim1, dim1, dim1, dim1) )
+   big_array_local(:,:,:,:,:) = 0.d0

   kcol_prev=-1

@ -256,30 +260,32 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
         do l= 1, N_states
           c_1(l) = u_t(l,l_a) * u_t(l,k_a)
         enddo
-         if(alpha_beta)then
-          ! only ONE contribution
-          if (nkeys+1 .ge. sze_buff) then
-            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-            nkeys = 0
-          endif
-         else if (spin_trace)then
-          ! TWO contributions
+!         if(alpha_beta)then
+!          ! only ONE contribution
+!          if (nkeys+1 .ge. sze_buff) then
+!            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!            nkeys = 0
+!          endif
+!         else if (spin_trace)then
+!          ! TWO contributions
          if (nkeys+2 .ge. sze_buff) then
-            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!            call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+            call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
            nkeys = 0
          endif
-         endif
+!         endif
         call orb_range_off_diag_double_to_all_states_ab_dm_buffer(tmp_det,tmp_det2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)

       enddo
      endif

     enddo
-!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-!     nkeys = 0

   enddo
-     !$OMP END DO
+     !$OMP END DO NOWAIT
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+     call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
+     nkeys = 0

     !$OMP DO SCHEDULE(dynamic)
   do k_a=istart+ishift,iend,istep
@ -334,33 +340,36 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
     ! ----------------------------------

     tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
-     do i=1,n_singles_a
-       l_a = singles_a(i)
-       ASSERT (l_a <= N_det)
+     if(alpha_beta.or.spin_trace.or.alpha_alpha)then
+       do i=1,n_singles_a
+         l_a = singles_a(i)
+         ASSERT (l_a <= N_det)

-       lrow = psi_bilinear_matrix_rows(l_a)
-       ASSERT (lrow <= N_det_alpha_unique)
+         lrow = psi_bilinear_matrix_rows(l_a)
+         ASSERT (lrow <= N_det_alpha_unique)
+
+         tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+         do l= 1, N_states
+           c_1(l) = u_t(l,l_a) * u_t(l,k_a)
+         enddo

-       tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
-       do l= 1, N_states
-         c_1(l) = u_t(l,l_a) * u_t(l,k_a)
-       enddo
-       if(alpha_beta.or.spin_trace.or.alpha_alpha)then
         ! increment the alpha/beta part for single excitations
         if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then
-           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+           call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
           nkeys = 0
         endif
         call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
         ! increment the alpha/alpha part for single excitations
         if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then
-           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!           call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+           call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
           nkeys = 0
         endif
         call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-       endif

-     enddo
+       enddo
+     endif

 !     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
 !     nkeys = 0
@ -380,7 +389,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
          c_1(l) = u_t(l,l_a) * u_t(l,k_a)
        enddo
        if (nkeys+4 .ge. sze_buff) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
          nkeys = 0
        endif
        call orb_range_off_diag_double_to_all_states_aa_dm_buffer(tmp_det(1,1),psi_det_alpha_unique(1, lrow),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
@ -435,33 +445,37 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
     ! ----------------------------------

     tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
-     do i=1,n_singles_b
-       l_b = singles_b(i)
-       ASSERT (l_b <= N_det)
+     if(alpha_beta.or.spin_trace.or.beta_beta)then
+      do i=1,n_singles_b
+        l_b = singles_b(i)
+        ASSERT (l_b <= N_det)

-       lcol = psi_bilinear_matrix_transp_columns(l_b)
-       ASSERT (lcol <= N_det_beta_unique)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        do l= 1, N_states
+          c_1(l) = u_t(l,l_a) * u_t(l,k_a)
+        enddo

-       tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
-       l_a = psi_bilinear_matrix_transp_order(l_b)
-       do l= 1, N_states
-         c_1(l) = u_t(l,l_a) * u_t(l,k_a)
-       enddo
-       if(alpha_beta.or.spin_trace.or.beta_beta)then
        ! increment the alpha/beta  part for single excitations
        if (nkeys+2 * elec_alpha_num .ge. sze_buff ) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
          nkeys = 0
        endif
         call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
        ! increment the beta /beta  part for single excitations
        if (nkeys+4 * elec_alpha_num .ge. sze_buff) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
          nkeys = 0
        endif
        call orb_range_off_diag_single_to_all_states_bb_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-       endif
-     enddo
+       enddo
+     endif
+
 !     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
 !     nkeys = 0

@ -481,7 +495,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
          c_1(l) = u_t(l,l_a) * u_t(l,k_a)
        enddo
        if (nkeys+4 .ge. sze_buff) then
-          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+!          call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+          call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
          nkeys = 0
        endif
        call orb_range_off_diag_double_to_all_states_bb_dm_buffer(tmp_det(1,2),psi_det_beta_unique(1, lcol),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
@ -517,16 +532,28 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin
       c_1(l) = u_t(l,k_a) * u_t(l,k_a)
     enddo

-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
-     nkeys = 0
+     if (nkeys+elec_alpha_num*elec_alpha_num .ge. sze_buff) then
+!       call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+       call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
+       nkeys = 0
+     endif
+
     call orb_range_diag_to_all_states_2_rdm_dm_buffer(tmp_det,c_1,N_states,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values)
-     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+
+!     call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm)
+     call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
     nkeys = 0

   end do
-    !$OMP END DO
+    !$OMP END DO NOWAIT
   deallocate(buffer, singles_a, singles_b, doubles, idx, keys, values)
-    !$OMP END PARALLEL
+   !$OMP CRITICAL
+   do i=1,N_states
+     big_array(:,:,:,:,i) = big_array(:,:,:,:,i) + big_array_local(i,:,:,:,:)
+   enddo
+   !$OMP END CRITICAL
+   deallocate(big_array_local)
+   !$OMP END PARALLEL

 end

@ -593,3 +620,26 @@ subroutine update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,loc

 end

+subroutine update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local)
+ use omp_lib
+ implicit none
+ integer, intent(in) :: n_st,nkeys,dim1
+ integer, intent(in) :: keys(4,nkeys)
+ double precision, intent(in) :: values(n_st,nkeys)
+ double precision, intent(inout) :: big_array_local(n_st,dim1,dim1,dim1,dim1)
+
+ integer :: istate
+ integer :: i,h1,h2,p1,p2
+
+ do i = 1, nkeys
+   do istate = 1, N_st
+    h1 = keys(1,i)
+    h2 = keys(2,i)
+    p1 = keys(3,i)
+    p2 = keys(4,i)
+    big_array_local(istate,h1,h2,p1,p2) = big_array_local(istate,h1,h2,p1,p2) + values(istate,i)
+   enddo
+ enddo
+
+end
+