From 5b58b062d9fb7173135c6882115e0664578f8377 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 4 Mar 2023 02:19:15 +0100 Subject: [PATCH] non_h_ints_mu: combined --- src/non_h_ints_mu/grad_squared.irp.f | 139 +++++++------ src/non_h_ints_mu/grad_squared_manu.irp.f | 174 ++++++++++++++++- src/non_h_ints_mu/new_grad_tc.irp.f | 222 +++++++++++++-------- src/non_h_ints_mu/new_grad_tc_manu.irp.f | 227 +++++++++++++--------- src/non_h_ints_mu/total_tc_int.irp.f | 58 +++++- 5 files changed, 579 insertions(+), 241 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index ff3d11f3..7925fa7c 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -299,7 +299,6 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_ END_PROVIDER -! --- ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ] @@ -364,70 +363,100 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao integer :: ipoint, i, j, k, l double precision :: weight1, ao_ik_r, ao_i_r double precision :: time0, time1 - double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:) + double precision, allocatable :: b_mat(:,:,:), tmp(:,:,:) print*, ' providing tc_grad_square_ao ...' call wall_time(time0) - allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) + if(read_tc_integ) then - b_mat = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid - b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & - !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do l = 1, ao_num - tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - - ac_mat = 0.d0 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & - , 1.d0, ac_mat, ao_num*ao_num) - deallocate(tmp, b_mat) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & - !$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do l = 1, ao_num + open(unit=11, form="unformatted", file='tc_grad_square_ao', action="read") do i = 1, ao_num - do k = 1, ao_num - tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + read(11) tc_grad_square_ao(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + + else + + allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, l, ipoint) & + !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do l = 1, ao_num + tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + tc_grad_square_ao = 0.d0 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 1.d0, tc_grad_square_ao, ao_num*ao_num) + deallocate(tmp, b_mat) + + call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) + + !!$OMP PARALLEL & + !!$OMP DEFAULT (NONE) & + !!$OMP PRIVATE (i, j, k, l) & + !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) + !!$OMP DO SCHEDULE (static) + ! do j = 1, ao_num + ! do l = 1, ao_num + ! do i = 1, ao_num + ! do k = 1, ao_num + ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + ! enddo + ! enddo + ! enddo + ! enddo + !!$OMP END DO + !!$OMP END PARALLEL + endif - deallocate(ac_mat) + if(write_tc_integ) then + open(unit=11, form="unformatted", file='tc_grad_square_ao', action="write") + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + write(11) tc_grad_square_ao(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index 180c9588..cb9e15c4 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -11,11 +11,177 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu integer :: ipoint, i, j, k, l double precision :: weight1, ao_ik_r, ao_i_r,contrib,contrib2 double precision :: time0, time1 - double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:) + double precision, allocatable :: b_mat(:,:,:), tmp(:,:,:) print*, ' providing tc_grad_square_ao_test ...' call wall_time(time0) + if(read_tc_integ) then + + open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="read") + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + read(11) tc_grad_square_ao_test(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + + else + + provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test + + allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, l, ipoint) & + !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do l = 1, ao_num + tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + tc_grad_square_ao_test = 0.d0 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 1.d0, tc_grad_square_ao_test, ao_num*ao_num) + deallocate(tmp, b_mat) + + call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) + !do i = 1, ao_num + ! do j = 1, ao_num + ! do k = i, ao_num + + ! do l = max(j,k), ao_num + ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) + ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) + ! end do + + ! !if (j.eq.k) then + ! ! do l = j+1, ao_num + ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) + ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) + ! ! end do + ! !else + ! ! do l = j, ao_num + ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) + ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) + ! ! enddo + ! !endif + + ! enddo + ! enddo + !enddo + !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test + ! !$OMP PARALLEL & + ! !$OMP DEFAULT (NONE) & + ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) + ! !$OMP DO SCHEDULE (static) + ! integer :: ii + ! ii = 0 + ! do j = 1, ao_num + ! do l = 1, ao_num + ! do i = 1, ao_num + ! do k = 1, ao_num + ! if((i.lt.j) .and. (k.lt.l)) cycle + ! ii = ii + 1 + ! tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_square_ao_test(l,j,k,i) + ! enddo + ! enddo + ! enddo + ! enddo + ! print *, ' ii =', ii + ! !$OMP END DO + ! !$OMP END PARALLEL + + ! !$OMP PARALLEL & + ! !$OMP DEFAULT (NONE) & + ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) + ! !$OMP DO SCHEDULE (static) + ! do j = 1, ao_num + ! do l = 1, ao_num + ! do i = 1, j-1 + ! do k = 1, l-1 + ! ii = ii + 1 + ! tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(l,j,k,i) + ! enddo + ! enddo + ! enddo + ! enddo + ! print *, ' ii =', ii + ! print *, ao_num * ao_num * ao_num * ao_num + ! !$OMP END DO + ! !$OMP END PARALLEL + + endif + + if(write_tc_integ) then + open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="write") + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + write(11) tc_grad_square_ao_test(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + endif + + call wall_time(time1) + print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, ao_num, ao_num)] + + BEGIN_DOC + ! + ! tc_grad_square_ao_test_ref(k,i,l,j) = -1/2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, k, l + double precision :: weight1, ao_ik_r, ao_i_r,contrib,contrib2 + double precision :: time0, time1 + double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:) + + print*, ' providing tc_grad_square_ao_test_ref ...' + call wall_time(time0) + provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) @@ -61,13 +227,13 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, k, l) & - !$OMP SHARED (ac_mat, tc_grad_square_ao_test, ao_num) + !$OMP SHARED (ac_mat, tc_grad_square_ao_test_ref, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + tc_grad_square_ao_test_ref(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) enddo enddo enddo @@ -78,7 +244,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu deallocate(ac_mat) call wall_time(time1) - print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 + print*, ' Wall time for tc_grad_square_ao_test_ref = ', time1 - time0 END_PROVIDER diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 854789bd..a15f690a 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -25,7 +25,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ END_DOC implicit none - integer :: ipoint, i, j + integer :: ipoint, i, j, m double precision :: time0, time1 double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 @@ -33,54 +33,76 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ call wall_time(time0) PROVIDE j1b_type - - if(j1b_type .eq. 3) then - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + if(read_tc_integ) then - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) - - do j = 1, ao_num - do i = 1, ao_num - - tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) - - int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x - int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y - int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="read") + do m = 1, 3 + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + read(11) int2_grad1_u12_ao(i,j,ipoint,m) + enddo + enddo enddo enddo - enddo + close(11) else - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - - do j = 1, ao_num - do i = 1, ao_num - tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - - int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1) - int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2) - int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3) + if(j1b_type .eq. 3) then + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + tmp0 = 0.5d0 * v_1b(ipoint) + tmp_x = v_1b_grad(1,ipoint) + tmp_y = v_1b_grad(2,ipoint) + tmp_z = v_1b_grad(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) + tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + enddo enddo enddo - enddo + else + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - int2_grad1_u12_ao *= 0.5d0 + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1) + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2) + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3) + enddo + enddo + enddo + int2_grad1_u12_ao *= 0.5d0 + endif endif + if(write_tc_integ) then + open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="write") + do m = 1, 3 + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + write(11) int2_grad1_u12_ao(i,j,ipoint,m) + enddo + enddo + enddo + enddo + close(11) + endif + call wall_time(time1) print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0 @@ -290,65 +312,95 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, integer :: ipoint, i, j, k, l, m double precision :: weight1, ao_k_r, ao_i_r double precision :: time0, time1 - double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:) + double precision, allocatable :: b_mat(:,:,:,:) print*, ' providing tc_grad_and_lapl_ao ...' call wall_time(time0) - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) + if(read_tc_integ) then - b_mat = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid - - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) - - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ac_mat = 0.d0 - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, ac_mat, ao_num*ao_num) - - enddo - deallocate(b_mat) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & - !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do l = 1, ao_num + open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="read") do i = 1, ao_num - do k = 1, ao_num - tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - !tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + read(11) tc_grad_and_lapl_ao(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + + else + + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) + b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) + b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + tc_grad_and_lapl_ao = 0.d0 + do m = 1, 3 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + , 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) + + enddo + deallocate(b_mat) + + call sum_A_At(tc_grad_and_lapl_ao(1,1,1,1), ao_num*ao_num) + ! !$OMP PARALLEL & + ! !$OMP DEFAULT (NONE) & + ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num) + ! !$OMP DO SCHEDULE (static) + ! do j = 1, ao_num + ! do l = 1, ao_num + ! do i = 1, ao_num + ! do k = 1, ao_num + ! tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + ! enddo + ! enddo + ! enddo + ! enddo + ! !$OMP END DO + ! !$OMP END PARALLEL - deallocate(ac_mat) + endif + + if(write_tc_integ) then + open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="write") + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + write(11) tc_grad_and_lapl_ao(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + endif call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0 diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index 4d85e061..47b05e52 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -24,7 +24,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po END_DOC implicit none - integer :: ipoint, i, j + integer :: ipoint, i, j, m double precision :: time0, time1 double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 @@ -32,52 +32,73 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po call wall_time(time0) PROVIDE j1b_type - - if(j1b_type .eq. 3) then - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + if(read_tc_integ) then - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) - - do j = 1, ao_num - do i = 1, ao_num - - tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint) - - int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x - int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y - int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z + open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="read") + do m = 1, 3 + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + read(11) int2_grad1_u12_ao_test(i,j,ipoint,m) + enddo + enddo enddo enddo - enddo + close(11) else - - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - - do j = 1, ao_num - do i = 1, ao_num - tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - - int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1) - int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2) - int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3) + + if(j1b_type .eq. 3) then + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + tmp0 = 0.5d0 * v_1b(ipoint) + tmp_x = v_1b_grad(1,ipoint) + tmp_y = v_1b_grad(2,ipoint) + tmp_z = v_1b_grad(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint) + tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint) + int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z + enddo enddo enddo - enddo + else + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) + int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1) + int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2) + int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3) + enddo + enddo + enddo + int2_grad1_u12_ao_test *= 0.5d0 + endif - int2_grad1_u12_ao_test *= 0.5d0 + endif + if(write_tc_integ) then + open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="write") + do m = 1, 3 + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + write(11) int2_grad1_u12_ao_test(i,j,ipoint,m) + enddo + enddo + enddo + enddo + close(11) endif call wall_time(time1) @@ -109,61 +130,93 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ print*, ' providing tc_grad_and_lapl_ao_test ...' call wall_time(time0) - provide int2_grad1_u12_ao_test - - allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) - - b_mat = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & - !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) - do i = 1, ao_num - do k = 1, ao_num - do ipoint = 1, n_points_final_grid - - weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - ao_i_r = aos_in_r_array_transp(ipoint,i) - ao_k_r = aos_in_r_array_transp(ipoint,k) - - b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) - b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) - b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ac_mat = 0.d0 - do m = 1, 3 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, ac_mat, ao_num*ao_num) - - enddo - deallocate(b_mat) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & - !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do l = 1, ao_num + if(read_tc_integ) then + + open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="read") do i = 1, ao_num - do k = 1, ao_num - tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + read(11) tc_grad_and_lapl_ao_test(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + + else + + provide int2_grad1_u12_ao_test + + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) + b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) + b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + ac_mat = 0.d0 + do m = 1, 3 + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & + , 1.d0, ac_mat, ao_num*ao_num) + + enddo + deallocate(b_mat) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l) & + !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(ac_mat) - deallocate(ac_mat) + endif + + if(write_tc_integ) then + open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="write") + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + write(11) tc_grad_and_lapl_ao_test(l,k,j,i) + enddo + enddo + enddo + enddo + close(11) + endif call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0 diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 81747553..c1e010c7 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -1,6 +1,44 @@ ! --- +BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, ao_num)] + + implicit none + integer :: i, j, k, l + double precision :: wall1, wall0 + + print *, ' providing ao_vartc_int_chemist ...' + call wall_time(wall0) + + if(test_cycle_tc) then + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_vartc_int_chemist(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo + else + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_vartc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo + endif + + call wall_time(wall1) + print *, ' wall time for ao_vartc_int_chemist ', wall1 - wall0 + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao_num)] implicit none @@ -11,17 +49,17 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao call wall_time(wall0) if(test_cycle_tc)then - ao_tc_int_chemist = ao_tc_int_chemist_test + ao_tc_int_chemist = ao_tc_int_chemist_test else - do j = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do k = 1, ao_num - ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) - enddo - enddo - enddo - enddo + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo endif call wall_time(wall1)