10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-11-19 04:22:32 +01:00

added OMP loops in H_tc_triple psi

This commit is contained in:
eginer 2023-09-15 01:06:32 +02:00
parent 73fc6078ca
commit 9b4082c235
4 changed files with 125 additions and 54 deletions

View File

@ -221,6 +221,40 @@ subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze)
enddo enddo
end end
subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze)
BEGIN_DOC
! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS
!
! Assumes that the determinants are in psi_det
!
! istart, iend, ishift, istep are used in ZMQ parallelization.
END_DOC
use bitmasks
implicit none
integer, intent(in) :: N_st,sze
double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze)
integer :: i,j,degree,ist
double precision :: hmono, htwoe, hthree, htot
!$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
!$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) &
!$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot)
do i = 1, N_det
do j = 1, N_det
call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int)
if(degree .ne. 3)cycle
call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot)
do ist = 1, N_st
v_0(i,ist) += htot * u_0(j,ist)
enddo
enddo
enddo
!$OMP END PARALLEL DO
end
! --- ! ---
subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze)
@ -253,6 +287,40 @@ subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze)
enddo enddo
end end
subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze)
BEGIN_DOC
! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS
!
! Assumes that the determinants are in psi_det
!
! istart, iend, ishift, istep are used in ZMQ parallelization.
END_DOC
use bitmasks
implicit none
integer, intent(in) :: N_st,sze
double precision, intent(in) :: u_0(sze,N_st)
double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze)
integer :: i,j,degree,ist
double precision :: hmono, htwoe, hthree, htot
!$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
!$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) &
!$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot)
do i = 1, N_det
do j = 1, N_det
call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int)
if(degree .ne. 3)cycle
call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot)
do ist = 1, N_st
v_0(i,ist) += htot * u_0(j,ist)
enddo
enddo
enddo
!$OMP END PARALLEL DO
end
! --- ! ---
subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
use bitmasks use bitmasks

View File

@ -19,6 +19,9 @@ subroutine provide_all_three_ints_bi_ortho()
if(three_e_4_idx_term) then if(three_e_4_idx_term) then
PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort
endif endif
if(pure_three_body_h_tc)then
provide three_body_ints_bi_ort
endif
if(.not. double_normal_ord .and. three_e_5_idx_term) then if(.not. double_normal_ord .and. three_e_5_idx_term) then
PROVIDE three_e_5_idx_direct_bi_ort PROVIDE three_e_5_idx_direct_bi_ort

View File

@ -225,8 +225,8 @@ end
external H_tc_dagger_u_0_opt external H_tc_dagger_u_0_opt
external H_tc_s2_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt
external H_tc_s2_u_0_opt external H_tc_s2_u_0_opt
external H_tc_s2_dagger_u_0_with_pure_three external H_tc_s2_dagger_u_0_with_pure_three_omp
external H_tc_s2_u_0_with_pure_three external H_tc_s2_u_0_with_pure_three_omp
allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag))
@ -255,7 +255,7 @@ end
if(.not.pure_three_body_h_tc)then if(.not.pure_three_body_h_tc)then
call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
else else
call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three_omp)
endif endif
i_it += 1 i_it += 1
if(i_it .gt. 5) exit if(i_it .gt. 5) exit
@ -284,7 +284,7 @@ end
if(.not.pure_three_body_h_tc)then if(.not.pure_three_body_h_tc)then
call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt)
else else
call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three_omp)
endif endif
i_it += 1 i_it += 1
if(i_it .gt. 5) exit if(i_it .gt. 5) exit

View File

@ -14,12 +14,14 @@ program test_tc
read_wf = .True. read_wf = .True.
touch read_wf touch read_wf
call routine_test_s2 call provide_all_three_ints_bi_ortho()
call routine_test_s2_davidson call routine_h_triple_left
call routine_h_triple_right
! call routine_test_s2_davidson
end end
subroutine routine_test_s2 subroutine routine_h_triple_right
implicit none implicit none
logical :: do_right logical :: do_right
integer :: sze ,i, N_st, j integer :: sze ,i, N_st, j
@ -29,67 +31,65 @@ subroutine routine_test_s2
sze = N_det sze = N_det
N_st = 1 N_st = 1
allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1))
print*,'Checking first the Left ' print*,'Checking first the Right '
do_right = .False.
do i = 1, sze
u_0(i,1) = psi_l_coef_bi_ortho(i,1)
enddo
call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right)
s_0_ref = 0.d0
do i = 1, sze
do j = 1, sze
call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij)
s_0_ref(i,1) += u_0(j,1) * sij
enddo
enddo
call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right)
accu_e = 0.d0
accu_s = 0.d0
accu_e_0 = 0.d0
accu_s_0 = 0.d0
do i = 1, sze
accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1)
accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1)
accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1))
accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1))
enddo
print*,'accu_e = ',accu_e
print*,'accu_s = ',accu_s
print*,'accu_e_0 = ',accu_e_0
print*,'accu_s_0 = ',accu_s_0
print*,'Checking then the right '
do_right = .True.
do i = 1, sze do i = 1, sze
u_0(i,1) = psi_r_coef_bi_ortho(i,1) u_0(i,1) = psi_r_coef_bi_ortho(i,1)
enddo enddo
call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) double precision :: wall0,wall1
s_0_ref = 0.d0 call wall_time(wall0)
do i = 1, sze call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze)
do j = 1, sze call wall_time(wall1)
call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) print*,'time for omp',wall1 - wall0
s_0_ref(i,1) += u_0(j,1) * sij call wall_time(wall0)
enddo call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze)
enddo call wall_time(wall1)
call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) print*,'time serial ',wall1 - wall0
accu_e = 0.d0 accu_e = 0.d0
accu_s = 0.d0 accu_s = 0.d0
accu_e_0 = 0.d0
accu_s_0 = 0.d0
do i = 1, sze do i = 1, sze
accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1)
accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1)
accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1))
accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1))
enddo enddo
print*,'accu_e = ',accu_e print*,'accu_e = ',accu_e
print*,'accu_s = ',accu_s print*,'accu_s = ',accu_s
print*,'accu_e_0 = ',accu_e_0
print*,'accu_s_0 = ',accu_s_0
end end
subroutine routine_h_triple_left
implicit none
logical :: do_right
integer :: sze ,i, N_st, j
double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0
double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:)
double precision, allocatable :: v_0_new(:,:),s_0_new(:,:)
sze = N_det
N_st = 1
allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1))
print*,'Checking the Left '
do i = 1, sze
u_0(i,1) = psi_l_coef_bi_ortho(i,1)
enddo
double precision :: wall0,wall1
call wall_time(wall0)
call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze)
call wall_time(wall1)
print*,'time for omp',wall1 - wall0
call wall_time(wall0)
call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze)
call wall_time(wall1)
print*,'time serial ',wall1 - wall0
accu_e = 0.d0
accu_s = 0.d0
do i = 1, sze
accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1))
accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1))
enddo
print*,'accu_e = ',accu_e
print*,'accu_s = ',accu_s
end
subroutine routine_test_s2_davidson subroutine routine_test_s2_davidson
implicit none implicit none
double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:)