From 2655f996228808592ab2e146f98eb52aa65292c5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 26 Feb 2025 15:14:51 +0100 Subject: [PATCH] Accelerated 2rdm --- src/two_rdm_routines/davidson_like_2rdm.irp.f | 156 ++++++++++++------ 1 file changed, 103 insertions(+), 53 deletions(-) diff --git a/src/two_rdm_routines/davidson_like_2rdm.irp.f b/src/two_rdm_routines/davidson_like_2rdm.irp.f index 0e6899f8..342b4318 100644 --- a/src/two_rdm_routines/davidson_like_2rdm.irp.f +++ b/src/two_rdm_routines/davidson_like_2rdm.irp.f @@ -13,7 +13,7 @@ subroutine orb_range_2_rdm_openmp(big_array,dim1,norb,list_orb,ispin,u_0,N_st,sz END_DOC integer, intent(in) :: N_st,sze integer, intent(in) :: dim1,norb,list_orb(norb),ispin - double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1) + double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st) double precision, intent(in) :: u_0(sze,N_st) integer :: k @@ -50,7 +50,7 @@ subroutine orb_range_2_rdm_openmp_work(big_array,dim1,norb,list_orb,ispin,u_t,N_ END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep integer, intent(in) :: dim1,norb,list_orb(norb),ispin - double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1) + double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st) double precision, intent(in) :: u_t(N_st,N_det) integer :: k @@ -91,7 +91,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) integer, intent(in) :: dim1,norb,list_orb(norb),ispin - double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1) + double precision, intent(inout) :: big_array(dim1,dim1,dim1,dim1,N_st) integer(omp_lock_kind) :: lock_2rdm integer :: i,j,k,l @@ -155,6 +155,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ! Prepare the array of all alpha single excitations ! ------------------------------------------------- + double precision, allocatable :: big_array_local(:,:,:,:,:) + PROVIDE N_int nthreads_davidson elec_alpha_num !$OMP PARALLEL DEFAULT(NONE) NUM_THREADS(nthreads_davidson) & !$OMP SHARED(psi_bilinear_matrix_rows, N_det,lock_2rdm,& @@ -174,7 +176,7 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin !$OMP buffer, doubles, n_doubles, & !$OMP tmp_det2, idx, l, kcol_prev, & !$OMP singles_a, n_singles_a, singles_b, & - !$OMP n_singles_b, nkeys, keys, values) + !$OMP n_singles_b, nkeys, keys, values, big_array_local) ! Alpha/Beta double excitations ! ============================= @@ -185,6 +187,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin singles_b(maxab), & doubles(maxab), & idx(maxab)) + allocate( big_array_local(N_states,dim1, dim1, dim1, dim1) ) + big_array_local(:,:,:,:,:) = 0.d0 kcol_prev=-1 @@ -256,30 +260,32 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin do l= 1, N_states c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo - if(alpha_beta)then - ! only ONE contribution - if (nkeys+1 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 - endif - else if (spin_trace)then - ! TWO contributions +! if(alpha_beta)then +! ! only ONE contribution +! if (nkeys+1 .ge. sze_buff) then +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! nkeys = 0 +! endif +! else if (spin_trace)then +! ! TWO contributions if (nkeys+2 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif - endif +! endif call orb_range_off_diag_double_to_all_states_ab_dm_buffer(tmp_det,tmp_det2,c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) enddo endif enddo -! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) -! nkeys = 0 enddo - !$OMP END DO + !$OMP END DO NOWAIT +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) + nkeys = 0 !$OMP DO SCHEDULE(dynamic) do k_a=istart+ishift,iend,istep @@ -334,33 +340,36 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ! ---------------------------------- tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) - do i=1,n_singles_a - l_a = singles_a(i) - ASSERT (l_a <= N_det) + if(alpha_beta.or.spin_trace.or.alpha_alpha)then + do i=1,n_singles_a + l_a = singles_a(i) + ASSERT (l_a <= N_det) - lrow = psi_bilinear_matrix_rows(l_a) - ASSERT (lrow <= N_det_alpha_unique) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) + do l= 1, N_states + c_1(l) = u_t(l,l_a) * u_t(l,k_a) + enddo - tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) - do l= 1, N_states - c_1(l) = u_t(l,l_a) * u_t(l,k_a) - enddo - if(alpha_beta.or.spin_trace.or.alpha_alpha)then ! increment the alpha/beta part for single excitations if (nkeys+ 2 * elec_alpha_num .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) ! increment the alpha/alpha part for single excitations if (nkeys+4 * elec_alpha_num .ge. sze_buff ) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_aa_dm_buffer(tmp_det,tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - endif - enddo + enddo + endif ! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) ! nkeys = 0 @@ -380,7 +389,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo if (nkeys+4 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_double_to_all_states_aa_dm_buffer(tmp_det(1,1),psi_det_alpha_unique(1, lrow),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) @@ -435,33 +445,37 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin ! ---------------------------------- tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) - do i=1,n_singles_b - l_b = singles_b(i) - ASSERT (l_b <= N_det) + if(alpha_beta.or.spin_trace.or.beta_beta)then + do i=1,n_singles_b + l_b = singles_b(i) + ASSERT (l_b <= N_det) - lcol = psi_bilinear_matrix_transp_columns(l_b) - ASSERT (lcol <= N_det_beta_unique) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) + l_a = psi_bilinear_matrix_transp_order(l_b) + do l= 1, N_states + c_1(l) = u_t(l,l_a) * u_t(l,k_a) + enddo - tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) - l_a = psi_bilinear_matrix_transp_order(l_b) - do l= 1, N_states - c_1(l) = u_t(l,l_a) * u_t(l,k_a) - enddo - if(alpha_beta.or.spin_trace.or.beta_beta)then ! increment the alpha/beta part for single excitations if (nkeys+2 * elec_alpha_num .ge. sze_buff ) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_ab_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) ! increment the beta /beta part for single excitations if (nkeys+4 * elec_alpha_num .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_single_to_all_states_bb_dm_buffer(tmp_det, tmp_det2,c_1,N_st,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - endif - enddo + enddo + endif + ! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) ! nkeys = 0 @@ -481,7 +495,8 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,l_a) * u_t(l,k_a) enddo if (nkeys+4 .ge. sze_buff) then - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 endif call orb_range_off_diag_double_to_all_states_bb_dm_buffer(tmp_det(1,2),psi_det_beta_unique(1, lcol),c_1,N_st,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) @@ -517,16 +532,28 @@ subroutine orb_range_2_rdm_openmp_work_$N_int(big_array,dim1,norb,list_orb,ispin c_1(l) = u_t(l,k_a) * u_t(l,k_a) enddo - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) - nkeys = 0 + if (nkeys+elec_alpha_num*elec_alpha_num .ge. sze_buff) then +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) + nkeys = 0 + endif + call orb_range_diag_to_all_states_2_rdm_dm_buffer(tmp_det,c_1,N_states,orb_bitmask,list_orb_reverse,ispin,sze_buff,nkeys,keys,values) - call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + +! call update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,lock_2rdm) + call update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) nkeys = 0 end do - !$OMP END DO + !$OMP END DO NOWAIT deallocate(buffer, singles_a, singles_b, doubles, idx, keys, values) - !$OMP END PARALLEL + !$OMP CRITICAL + do i=1,N_states + big_array(:,:,:,:,i) = big_array(:,:,:,:,i) + big_array_local(i,:,:,:,:) + enddo + !$OMP END CRITICAL + deallocate(big_array_local) + !$OMP END PARALLEL end @@ -593,3 +620,26 @@ subroutine update_keys_values_n_states(keys,values,nkeys,dim1,n_st,big_array,loc end +subroutine update_keys_values_n_states_local(keys,values,nkeys,dim1,n_st,big_array_local) + use omp_lib + implicit none + integer, intent(in) :: n_st,nkeys,dim1 + integer, intent(in) :: keys(4,nkeys) + double precision, intent(in) :: values(n_st,nkeys) + double precision, intent(inout) :: big_array_local(n_st,dim1,dim1,dim1,dim1) + + integer :: istate + integer :: i,h1,h2,p1,p2 + + do i = 1, nkeys + do istate = 1, N_st + h1 = keys(1,i) + h2 = keys(2,i) + p1 = keys(3,i) + p2 = keys(4,i) + big_array_local(istate,h1,h2,p1,p2) = big_array_local(istate,h1,h2,p1,p2) + values(istate,i) + enddo + enddo + +end +