diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 8d5d1ce4..4c8c107a 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -221,6 +221,40 @@ subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) enddo end +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + ! --- subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) @@ -253,6 +287,40 @@ subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) enddo end +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + ! --- subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) use bitmasks diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index e398d8f2..ab21d3e8 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -19,6 +19,9 @@ subroutine provide_all_three_ints_bi_ortho() if(three_e_4_idx_term) then PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif + if(pure_three_body_h_tc)then + provide three_body_ints_bi_ort + endif if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 7cb23d77..a9e22e03 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,8 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt - external H_tc_s2_dagger_u_0_with_pure_three - external H_tc_s2_u_0_with_pure_three + external H_tc_s2_dagger_u_0_with_pure_three_omp + external H_tc_s2_u_0_with_pure_three_omp allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -255,7 +255,7 @@ end if(.not.pure_three_body_h_tc)then call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) else - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three_omp) endif i_it += 1 if(i_it .gt. 5) exit @@ -284,7 +284,7 @@ end if(.not.pure_three_body_h_tc)then call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) else - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three_omp) endif i_it += 1 if(i_it .gt. 5) exit diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index b398507a..7c70b119 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -14,12 +14,14 @@ program test_tc read_wf = .True. touch read_wf - call routine_test_s2 - call routine_test_s2_davidson + call provide_all_three_ints_bi_ortho() + call routine_h_triple_left + call routine_h_triple_right +! call routine_test_s2_davidson end -subroutine routine_test_s2 +subroutine routine_h_triple_right implicit none logical :: do_right integer :: sze ,i, N_st, j @@ -29,67 +31,65 @@ subroutine routine_test_s2 sze = N_det N_st = 1 allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Left ' - do_right = .False. - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) - accu_e = 0.d0 - accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - - print*,'Checking then the right ' - do_right = .True. + print*,'Checking first the Right ' do i = 1, sze u_0(i,1) = psi_r_coef_bi_ortho(i,1) enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 accu_e = 0.d0 accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) enddo print*,'accu_e = ',accu_e print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - end +subroutine routine_h_triple_left + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking the Left ' + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 + accu_e = 0.d0 + accu_s = 0.d0 + do i = 1, sze + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + +end + + subroutine routine_test_s2_davidson implicit none double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:)