From 93adc8d6c1316f95151fe7ab32829db657cbd34c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:57:28 +0200 Subject: [PATCH] no aaa // --- .../normal_ordered_contractions.irp.f | 539 +++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 40 ++ 2 files changed, 560 insertions(+), 19 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index f066c958..d11c6727 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -500,7 +500,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aaa_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] BEGIN_DOC ! @@ -508,12 +508,12 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ ! h1 < h2 ! p1 > p2 ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] ! ! else: ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] ! ! ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) @@ -536,7 +536,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) - print*,' Providing no_aaa_contraction ...' + print*,' Providing no_aaa_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -556,7 +556,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ if(Ne(2) .lt. 3) then - no_aaa_contraction = 0.d0 + no_aaa_contraction_v0 = 0.d0 else @@ -630,7 +630,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -662,7 +662,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -736,7 +736,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -776,7 +776,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -849,7 +849,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -881,7 +881,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -955,7 +955,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -995,7 +995,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -1010,19 +1010,19 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2, tmpvec_3) - no_aaa_contraction = -0.5d0 * no_aaa_contraction + no_aaa_contraction_v0 = -0.5d0 * no_aaa_contraction_v0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aaa_contraction, mo_num) + !$OMP SHARED (no_aaa_contraction_v0, mo_num) !$OMP DO do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num do p2 = p1, mo_num - no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + no_aaa_contraction_v0(p2,h2,p1,h1) -= no_aaa_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -1034,7 +1034,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + no_aaa_contraction_v0(p2,h2,p1,h1) = -no_aaa_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -1046,17 +1046,18 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = h1+1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + no_aaa_contraction_v0(p2,h2,p1,h1) *= -1.d0 enddo enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL endif call wall_time(wall1) - print*,' Wall time for no_aaa_contraction', wall1-wall0 + print*,' Wall time for no_aaa_contraction_v0', wall1-wall0 END_PROVIDER @@ -1519,3 +1520,503 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 4f190407..4404bc02 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -21,6 +21,7 @@ program tc_bi_ortho !call test_no() call test_no_aba() call test_no_aab() + call test_no_aaa() end subroutine test_h_u0 @@ -382,4 +383,43 @@ end ! --- +subroutine test_no_aaa() + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aaa_contraction ...' + + thr = 1d-8 + + PROVIDE no_aaa_contraction_v0 + PROVIDE no_aaa_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aaa_contraction (l,k,j,i) + ref = no_aaa_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aaa_contraction = ', accu / dble(mo_num)**4 + + return +end + +! ---