diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index d515b0c1..19cfe4cd 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -22,15 +22,15 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp) do m = 1, 3 gauss_ints(m) += gauss_ints_tmp(m) * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) + * ao_coef_normalized_ordered_transp(k,j) enddo - enddo + enddo enddo end @@ -61,13 +61,13 @@ double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,m power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx) overlap_gauss_xyz_r12_ao_specific = gauss_int * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) - enddo + * ao_coef_normalized_ordered_transp(k,j) + enddo enddo end @@ -90,13 +90,13 @@ subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints) power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta) aos_ints(j,i) += analytical_j * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) - enddo + * ao_coef_normalized_ordered_transp(k,j) + enddo enddo enddo enddo @@ -114,7 +114,7 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) implicit none integer, intent(in) :: i, j double precision, intent(in) :: D_center(3), delta - + integer :: power_A(3), power_B(3), l, k double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j @@ -133,19 +133,19 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) B_center(1:3) = nucl_coord(ao_nucl(j),1:3) do l = 1, ao_prim_num(i) - alpha = ao_expo_ordered_transp (l,i) - coef1 = ao_coef_normalized_ordered_transp(l,i) + alpha = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) do k = 1, ao_prim_num(j) beta = ao_expo_ordered_transp(k,j) - coef = coef1 * ao_coef_normalized_ordered_transp(k,j) + coef = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef) .lt. 1d-12) cycle analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta) overlap_gauss_r12_ao += coef * analytical_j - enddo + enddo enddo end function overlap_gauss_r12_ao @@ -163,14 +163,13 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, implicit none integer, intent(in) :: i, j double precision, intent(in) :: B_center(3), beta, D_center(3), delta - + integer :: power_A1(3), power_A2(3), l, k double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j double precision :: G_center(3), gama, fact_g, gama_inv double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao - ASSERT(beta .gt. 0.d0) if(beta .lt. 1d-10) then overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j) return @@ -204,22 +203,120 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) do l = 1, ao_prim_num(i) - alpha1 = ao_expo_ordered_transp (l,i) + alpha1 = ao_expo_ordered_transp (l,i) coef1 = fact_g * ao_coef_normalized_ordered_transp(l,i) if(dabs(coef1) .lt. 1d-12) cycle do k = 1, ao_prim_num(j) alpha2 = ao_expo_ordered_transp (k,j) - coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) + coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef12) .lt. 1d-12) cycle analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2) overlap_gauss_r12_ao_with1s += coef12 * analytical_j - enddo + enddo enddo end function overlap_gauss_r12_ao_with1s ! --- +subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points) + BEGIN_DOC + ! + ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} + ! using an array of D_centers. + ! + END_DOC + + implicit none + integer, intent(in) :: i, j, n_points + double precision, intent(in) :: B_center(3), beta, D_center(3,n_points), delta + double precision, intent(out) :: resv(n_points) + + integer :: power_A1(3), power_A2(3), l, k + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 + double precision :: coef12, coef12f + double precision :: gama, gama_inv + double precision :: bg, dg, bdg + + double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao + + double precision, external :: overlap_gauss_r12_ao_with1s + integer :: ipoint + + double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:) + + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + return + endif + + ASSERT(beta .gt. 0.d0) + + if(beta .lt. 1d-10) then + do ipoint=1,n_points + resv(ipoint) = overlap_gauss_r12_ao(D_center(1,ipoint), delta, i, j) + enddo + return + endif + + resv(:) = 0.d0 + + ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2} + + gama = beta + delta + gama_inv = 1.d0 / gama + + power_A1(1:3) = ao_power(i,1:3) + power_A2(1:3) = ao_power(j,1:3) + + A1_center(1:3) = nucl_coord(ao_nucl(i),1:3) + A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) + + allocate (fact_g(n_points), G_center(3,n_points), analytical_j(n_points) ) + + bg = beta * gama_inv + dg = delta * gama_inv + bdg = bg * delta + do ipoint=1,n_points + G_center(1,ipoint) = bg * B_center(1) + dg * D_center(1,ipoint) + G_center(2,ipoint) = bg * B_center(2) + dg * D_center(2,ipoint) + G_center(3,ipoint) = bg * B_center(3) + dg * D_center(3,ipoint) + fact_g(ipoint) = bdg * ( & + (B_center(1) - D_center(1,ipoint)) * (B_center(1) - D_center(1,ipoint)) & + + (B_center(2) - D_center(2,ipoint)) * (B_center(2) - D_center(2,ipoint)) & + + (B_center(3) - D_center(3,ipoint)) * (B_center(3) - D_center(3,ipoint)) ) + + if(fact_g(ipoint) < 10d0) then + fact_g(ipoint) = dexp(-fact_g(ipoint)) + else + fact_g(ipoint) = 0.d0 + endif + + enddo + + ! --- + + do l = 1, ao_prim_num(i) + alpha1 = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) + + do k = 1, ao_prim_num(j) + alpha2 = ao_expo_ordered_transp (k,j) + coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) + if(dabs(coef12) .lt. 1d-12) cycle + + call overlap_gauss_r12_v(G_center, gama, A1_center,& + A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points) + + do ipoint=1,n_points + coef12f = coef12 * fact_g(ipoint) + resv(ipoint) += coef12f * analytical_j(ipoint) + end do + + enddo + enddo + + +end diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 50b4bf96..6875bc7a 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -16,54 +16,55 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n double precision :: tmp double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - int2_grad1u2_grad2u2_j1b2 = 0.d0 + allocate(int_fit_v(n_points_final_grid)) + int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2) - !$OMP DO - !do ipoint = 1, 10 - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points, n_max_fit_slat, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& + !$OMP ao_overlap_abs) + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num - do i = 1, ao_num - do j = i, ao_num + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + cycle + endif - tmp = 0.d0 - do i_1s = 1, List_all_comb_b3_size + do i_1s = 1, List_all_comb_b3_size - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - do i_fit = 1, n_max_fit_slat + do i_fit = 1, n_max_fit_slat - expo_fit = expo_gauss_1_erf_x_2(i_fit) - coef_fit = coef_gauss_1_erf_x_2(i_fit) - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - tmp += -0.25d0 * coef * coef_fit * int_fit - enddo - enddo + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points, expo_fit, i, j, int_fit_v, n_points_final_grid) - int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp - enddo - enddo - enddo + do ipoint = 1, n_points_final_grid + int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + enddo + enddo + + enddo + enddo + enddo !$OMP END DO !$OMP END PARALLEL @@ -78,7 +79,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n call wall_time(wall1) print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -105,11 +106,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u2_j1b2) !$OMP DO !do ipoint = 1, 10 @@ -158,7 +159,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final call wall_time(wall1) print*, ' wall time for int2_u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -186,12 +187,12 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -214,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p B_center(3) = List_all_comb_b3_cent(3,i_1s) dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) + + (B_center(3) - r(3)) * (B_center(3) - r(3)) do i_fit = 1, n_max_fit_slat @@ -222,17 +223,17 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist + expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist !if(expo_coef_1s .gt. 80.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) !if(dabs(coef_tmp) .lt. 1d-10) cycle - + call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) tmp_x += coef_tmp * int_fit(1) @@ -263,7 +264,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p call wall_time(wall1) print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -291,11 +292,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -323,7 +324,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) @@ -332,7 +333,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !if(expo_coef_1s .gt. 80.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) !if(dabs(coef_tmp) .lt. 1d-10) cycle - + int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) tmp += coef_tmp * int_fit @@ -357,7 +358,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points call wall_time(wall1) print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 42d37069..ced6f4e9 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -63,7 +63,6 @@ END_PROVIDER tmp_cent_z += tmp_alphaj * nucl_coord(j,3) enddo - ASSERT(List_all_comb_b2_expo(i) .gt. 0d0) if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 749227ea..0da39561 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -1,67 +1,139 @@ - double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta) BEGIN_DOC ! Computes the following integral : ! - ! .. math:: - ! + ! .. math :: + ! ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! END_DOC - implicit none + implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" - double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" - integer, intent(in) :: power_A(3),power_B(3) + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" + integer, intent(in) :: power_A(3),power_B(3) - double precision :: overlap_x,overlap_y,overlap_z,overlap - ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom - double precision :: A_center_new(3) ! new center - integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A - double precision :: alpha_new ! new exponent - double precision :: fact_a_new ! constant factor - double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr - integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1 - dim1=100 - thr = 1.d-10 - d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 + double precision :: overlap_x,overlap_y,overlap_z,overlap + ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) + double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_center_new(3) ! new center + integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A + double precision :: alpha_new ! new exponent + double precision :: fact_a_new ! constant factor + double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1 + dim1=100 + thr = 1.d-10 + d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & - delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) - ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 - accu = 0.d0 - do lx = 0, iorder_a_new(1) - coefx = A_new(lx,1) - if(dabs(coefx).lt.thr)cycle - iorder_tmp(1) = lx - do ly = 0, iorder_a_new(2) - coefy = A_new(ly,2) - coefxy = coefx * coefy - if(dabs(coefxy).lt.thr)cycle - iorder_tmp(2) = ly - do lz = 0, iorder_a_new(3) - coefz = A_new(lz,3) - coefxyz = coefxy * coefz - if(dabs(coefxyz).lt.thr)cycle - iorder_tmp(3) = lz - call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - accu += coefxyz * overlap - enddo + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,& + delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) + ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 + accu = 0.d0 + do lx = 0, iorder_a_new(1) + coefx = A_new(lx,1) + if(dabs(coefx).lt.thr)cycle + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + coefy = A_new(ly,2) + coefxy = coefx * coefy + if(dabs(coefxy).lt.thr)cycle + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + coefz = A_new(lz,3) + coefxyz = coefxy * coefz + if(dabs(coefxyz).lt.thr)cycle + iorder_tmp(3) = lz + call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + accu += coefxyz * overlap + enddo + enddo enddo - enddo - overlap_gauss_r12 = fact_a_new * accu + overlap_gauss_r12 = fact_a_new * accu end +!--- + +subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) + BEGIN_DOC + ! Computes the following integral : + ! + ! .. math :: + ! + ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! using an array of D_centers + ! + END_DOC + + implicit none + include 'constants.include.F' + integer, intent(in) :: n_points + double precision, intent(in) :: D_center(3,n_points), delta ! pure gaussian "D" + double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" + integer, intent(in) :: power_A(3),power_B(3) + double precision, intent(out) :: rvec(n_points) + + double precision :: overlap_x,overlap_y,overlap_z,overlap + + integer :: maxab + integer, allocatable :: iorder_a_new(:) + double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) + double precision, allocatable :: fact_a_new(:) + double precision :: alpha_new + double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint + + dim1=100 + thr = 1.d-10 + d(:) = 0 + +! maxab = maxval(d(1:3)) + maxab = max_dim + allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & + fact_a_new(n_points), iorder_a_new(3)) + + call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & + alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & + D_center, A_center, n_points) + + do ipoint=1,n_points + + ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 + accu = 0.d0 + do lx = 0, iorder_a_new(1) + coefx = A_new(lx,1,ipoint) + if(dabs(coefx).lt.thr)cycle + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + coefy = A_new(ly,2,ipoint) + coefxy = coefx * coefy + if(dabs(coefxy).lt.thr)cycle + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + coefz = A_new(lz,3,ipoint) + coefxyz = coefxy * coefz + if(dabs(coefxyz).lt.thr)cycle + iorder_tmp(3) = lz + call overlap_gaussian_xyz(A_center_new(1,ipoint),B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + accu += coefxyz * overlap + enddo + enddo + enddo + rvec(ipoint) = fact_a_new(ipoint) * accu + end do +end + +!--- +!--- subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints) BEGIN_DOC ! Computes the following integral : ! ! .. math:: - ! + ! ! gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z @@ -69,14 +141,14 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: gauss_ints(3) double precision :: overlap_x,overlap_y,overlap_z,overlap ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_new(0:max_dim,3)! new polynom double precision :: A_center_new(3) ! new center integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A integer :: power_B_new(3) @@ -88,8 +160,8 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ thr = 1.d-10 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 gauss_ints = 0.d0 @@ -99,12 +171,12 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ iorder_tmp(1) = lx do ly = 0, iorder_a_new(2) coefy = A_new(ly,2) - coefxy = coefx * coefy + coefxy = coefx * coefy if(dabs(coefxy).lt.thr)cycle iorder_tmp(2) = ly do lz = 0, iorder_a_new(3) coefz = A_new(lz,3) - coefxyz = coefxy * coefz + coefxyz = coefxy * coefz if(dabs(coefxyz).lt.thr)cycle iorder_tmp(3) = lz do m = 1, 3 @@ -129,7 +201,7 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z @@ -137,13 +209,13 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3),mx double precision :: overlap_x,overlap_y,overlap_z,overlap ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_new(0:max_dim,3)! new polynom double precision :: A_center_new(3) ! new center integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A integer :: power_B_new(3) @@ -155,8 +227,8 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center thr = 1.d-10 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 overlap_gauss_xyz_r12_specific = 0.d0 @@ -166,12 +238,12 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center iorder_tmp(1) = lx do ly = 0, iorder_a_new(2) coefy = A_new(ly,2) - coefxy = coefx * coefy + coefxy = coefx * coefy if(dabs(coefxy).lt.thr)cycle iorder_tmp(2) = ly do lz = 0, iorder_a_new(3) coefz = A_new(lz,3) - coefxyz = coefxy * coefz + coefxyz = coefxy * coefz if(dabs(coefxyz).lt.thr)cycle iorder_tmp(3) = lz m = mx diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index c2bff2e8..fd9e233b 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,13 +86,11 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 p = 1.d+15 - P_new = 0.d0 - iorder = 0 fact_k = 0.d0 return endif @@ -129,6 +127,89 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, end +!--- + +subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points) + BEGIN_DOC + ! Transforms the product of + ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) + ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) + ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) + ! + ! WARNING :: : IF fact_k is too smal then: + ! returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + END_DOC + implicit none + include 'constants.include.F' + integer, intent(in) :: n_points, ldp + integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) + double precision, intent(in) :: alpha, beta ! exponents + double precision, intent(in) :: A_center(3,n_points) ! A center + double precision, intent(in) :: B_center (3) ! B center + double precision, intent(out) :: P_center(3,n_points) ! new center + double precision, intent(out) :: p ! new exponent + double precision, intent(out) :: fact_k(n_points) ! constant factor + double precision, intent(out) :: P_new(0:ldp,3,n_points)! polynomial + integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials + + double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) + + integer :: n_new,i,j, ipoint, lda, ldb, xyz + + call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) + + if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) + + lda = maxval(a) + ldb = 0 + allocate(P_a(0:lda,3,n_points),P_b(0:0,3,n_points)) + + call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) + + iorder(1:3) = a(1:3) + do ipoint=1,n_points + do xyz=1,3 + P_new(0,xyz,ipoint) = P_b(0,xyz,ipoint) * P_a(0,xyz,ipoint) + do i=1,a(xyz) + P_new(i,xyz,ipoint) = P_new(i,xyz,ipoint) + P_b(0,xyz,ipoint) * P_a(i,xyz,ipoint) + enddo + enddo + enddo + return + + endif + + lda = maxval(a) + ldb = maxval(b) + allocate(P_a(0:lda,3,n_points),P_b(0:ldb,3,n_points)) + call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) + + iorder(1:3) = a(1:3) + b(1:3) + do xyz=1,3 + if (b(xyz) == 0) then + do ipoint=1,n_points + P_new(0,xyz,ipoint) = P_b(0,xyz,ipoint) * P_a(0,xyz,ipoint) + do i=1,a(xyz) + P_new(i,xyz,ipoint) = P_new(i,xyz,ipoint) + P_b(0,xyz,ipoint) * P_a(i,xyz,ipoint) + enddo + enddo + else + do ipoint=1,n_points + do i=0,iorder(xyz) + P_new(i,xyz,ipoint) = 0.d0 + enddo + n_new=0 + call multiply_poly(P_a(0,xyz,ipoint),a(xyz),P_b(0,xyz,ipoint),b(xyz),P_new(0,xyz,ipoint),n_new) + enddo + endif + enddo + +end + +!- subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) BEGIN_DOC @@ -231,6 +312,59 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp) xp(3) = (a*xa(3)+b*xb(3))*p_inv end subroutine +!--- +subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D. + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + ! Using multiple A centers + END_DOC + + integer, intent(in) :: n_points + double precision, intent(in) :: a,b ! Exponents + double precision, intent(in) :: xa(3,n_points),xb(3) ! Centers + double precision, intent(out) :: p ! New exponent + double precision, intent(out) :: xp(3,n_points) ! New center + double precision, intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + + integer :: ipoint + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab(3), ab, ap, bp, bpxb(3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + ap = a*p_inv + bp = b*p_inv + bpxb(1) = bp*xb(1) + bpxb(2) = bp*xb(2) + bpxb(3) = bp*xb(3) + + do ipoint=1,n_points + xab(1) = xa(1,ipoint)-xb(1) + xab(2) = xa(2,ipoint)-xb(2) + xab(3) = xa(3,ipoint)-xb(3) + k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3)) + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + xp(1,ipoint) = 0.d0 + xp(2,ipoint) = 0.d0 + xp(3,ipoint) = 0.d0 + else + k(ipoint) = dexp(-k(ipoint)) + xp(1,ipoint) = ap*xa(1,ipoint)+bpxb(1) + xp(2,ipoint) = ap*xa(2,ipoint)+bpxb(2) + xp(3,ipoint) = ap*xa(3,ipoint)+bpxb(3) + endif + enddo +end subroutine + @@ -404,22 +538,152 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b) do i = minab+1,min(b,20) P_new2(i) = binom_transp(b-i,b) * pows_b(b-i) enddo - do i = 101,a + do i = 21,a P_new(i) = binom_func(a,a-i) * pows_a(a-i) enddo - do i = 101,b + do i = 21,b P_new2(i) = binom_func(b,b-i) * pows_b(b-i) enddo end -subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) +!- +subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) + implicit none + BEGIN_DOC + ! Recenter two polynomials + END_DOC + integer, intent(in) :: a(3),b(3), n_points, lda, ldb + double precision, intent(in) :: x_A(3,n_points),x_P(3,n_points),x_B(3),x_Q(3,n_points) + double precision, intent(out) :: P_new(0:lda,3,n_points),P_new2(0:ldb,3,n_points) + double precision :: binom_func + integer :: i,j,k,l, minab(3), maxab(3),ipoint, xyz + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + double precision :: fa, fb + + maxab(1:3) = max(a(1:3),b(1:3)) + minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/)) + + allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) + + + do xyz=1,3 + if ((a(xyz)<0).or.(b(xyz)<0) ) cycle + do ipoint=1,n_points + pows_a(ipoint,0) = 1.d0 + pows_a(ipoint,1) = (x_P(xyz,ipoint) - x_A(xyz,ipoint)) + pows_b(ipoint,0) = 1.d0 + pows_b(ipoint,1) = (x_Q(xyz,ipoint) - x_B(xyz)) + enddo + do i = 2,maxab(xyz) + do ipoint=1,n_points + pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1) + pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1) + enddo + enddo + do ipoint=1,n_points + P_new (0,xyz,ipoint) = pows_a(ipoint,a(xyz)) + P_new2(0,xyz,ipoint) = pows_b(ipoint,b(xyz)) + enddo + do i = 1,min(minab(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + fb = binom_transp(b(xyz)-i,b(xyz)) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + do i = minab(xyz)+1,min(a(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = minab(xyz)+1,min(b(xyz),20) + fb = binom_transp(b(xyz)-i,b(xyz)) + do ipoint=1,n_points + P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + do i = 21,a(xyz) + fa = binom_func(a(xyz),a(xyz)-i) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = 21,b(xyz) + fb = binom_func(b(xyz),b(xyz)-i) + do ipoint=1,n_points + P_new2(i,xyz,ipoint) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + enddo +end + + +subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) + implicit none + BEGIN_DOC + ! Recenter two polynomials. Special case for b=(0,0,0) + END_DOC + integer, intent(in) :: a(3), n_points, lda + double precision, intent(in) :: x_A(3,n_points),x_P(3,n_points),x_B(3),x_Q(3,n_points) + double precision, intent(out) :: P_new(0:lda,3,n_points),P_new2(3,n_points) + double precision :: binom_func + integer :: i,j,k,l, xyz, ipoint, maxab(3) + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + double precision :: fa + + maxab(1:3) = max(a(1:3),(/0,0,0/)) + + allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) + + do xyz=1,3 + if (a(xyz)<0) cycle + do ipoint=1,n_points + pows_a(ipoint,0) = 1.d0 + pows_a(ipoint,1) = (x_P(xyz,ipoint) - x_A(xyz,ipoint)) + pows_b(ipoint,0) = 1.d0 + pows_b(ipoint,1) = (x_Q(xyz,ipoint) - x_B(xyz)) + enddo + do i = 2,maxab(xyz) + do ipoint=1,n_points + pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1) + pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1) + enddo + enddo + do ipoint=1,n_points + P_new (0,xyz,ipoint) = pows_a(ipoint,a(xyz)) + P_new2(xyz,ipoint) = pows_b(ipoint,0) + enddo + do i = 1,min(a(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = 21,a(xyz) + fa = binom_func(a(xyz),a(xyz)-i) + do ipoint=1,n_points + P_new (i,xyz,ipoint) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + + enddo !xyz + + deallocate(pows_a, pows_b) +end + +!-- +!-- + +subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) BEGIN_DOC - ! + ! ! Transform the pol centerd on A: - ! [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] + ! [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] ! to a pol centered on B - ! [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] + ! [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] ! END_DOC @@ -437,7 +701,7 @@ subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) do i = 1, 3 Lmax = iorder(i) - call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) + call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) enddo return @@ -445,14 +709,14 @@ end subroutine pol_modif_center -subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) +subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) BEGIN_DOC - ! + ! ! Transform the pol centerd on A: - ! [ \sum_i ax_i (x-x_A)^i ] + ! [ \sum_i ax_i (x-x_A)^i ] ! to a pol centered on B - ! [ \sum_i bx_i (x-x_B)^i ] + ! [ \sum_i bx_i (x-x_B)^i ] ! ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ] ! = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i) @@ -591,7 +855,7 @@ double precision function rint_sum(n_pt_out,rho,d1) u_inv=1.d0/dsqrt(rho) u=rho*u_inv rint_sum=0.5d0*u_inv*sqpi*derf(u) *d1(0) -! print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) +! print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) endif do i=2,n_pt_out,2 diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90 index 98e73470..ceaec874 100644 --- a/src/utils/map_module.f90 +++ b/src/utils/map_module.f90 @@ -238,11 +238,11 @@ subroutine cache_map_sort(map) iorder(i) = i enddo if (cache_key_kind == 2) then - call i2radix_sort(map%key,iorder,map%n_elements,-1) + call i2sort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 4) then - call iradix_sort(map%key,iorder,map%n_elements,-1) + call isort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 8) then - call i8radix_sort(map%key,iorder,map%n_elements,-1) + call i8sort(map%key,iorder,map%n_elements,-1) endif if (integral_kind == 4) then call set_order(map%value,iorder,map%n_elements) diff --git a/src/utils/qsort.c b/src/utils/qsort.c new file mode 100644 index 00000000..c011b35a --- /dev/null +++ b/src/utils/qsort.c @@ -0,0 +1,373 @@ +/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ +#include +#include + +struct int16_t_comp { + int16_t x; + int32_t i; +}; + +int compare_int16_t( const void * l, const void * r ) +{ + const int16_t * restrict _l= l; + const int16_t * restrict _r= r; + if( *_l > *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct double_comp* A = malloc(isize * sizeof(struct double_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct float_comp* A = malloc(isize * sizeof(struct float_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i> +""" +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("TYPE", typ).replace("_big", "") ) + print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f2 +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +* Generated C file + +#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes +#include +#include +<> +#+END_SRC + +* Generated Fortran file + +#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes +module qsort_module + use iso_c_binding + + interface + <> + end interface + +end module qsort_module + +<> + +#+END_SRC + diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 new file mode 100644 index 00000000..a72a4f9e --- /dev/null +++ b/src/utils/qsort_module.f90 @@ -0,0 +1,347 @@ +module qsort_module + use iso_c_binding + + interface + + subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_c + + subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_c + + + + subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_big_c + + subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_big_c + + + + subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_c + + subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_c + + + + subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_big_c + + subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_big_c + + + + subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_c + + subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_c + + + + subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_big_c + + subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_big_c + + + + subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_c + + subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_c + + + + subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_big_c + + subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_big_c + + + + subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_c + + subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_c + + + + subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_big_c + + subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_big_c + + + + end interface + +end module qsort_module + + +subroutine i2sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_c(A, iorder, isize) +end subroutine i2sort + +subroutine i2sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_c(A, isize) +end subroutine i2sort_noidx + + + +subroutine i2sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_big_c(A, iorder, isize) +end subroutine i2sort_big + +subroutine i2sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_big_c(A, isize) +end subroutine i2sort_noidx_big + + + +subroutine isort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_c(A, iorder, isize) +end subroutine isort + +subroutine isort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_c(A, isize) +end subroutine isort_noidx + + + +subroutine isort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_big_c(A, iorder, isize) +end subroutine isort_big + +subroutine isort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_big_c(A, isize) +end subroutine isort_noidx_big + + + +subroutine i8sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_c(A, iorder, isize) +end subroutine i8sort + +subroutine i8sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_c(A, isize) +end subroutine i8sort_noidx + + + +subroutine i8sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_big_c(A, iorder, isize) +end subroutine i8sort_big + +subroutine i8sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_big_c(A, isize) +end subroutine i8sort_noidx_big + + + +subroutine dsort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_c(A, iorder, isize) +end subroutine dsort + +subroutine dsort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_c(A, isize) +end subroutine dsort_noidx + + + +subroutine dsort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_big_c(A, iorder, isize) +end subroutine dsort_big + +subroutine dsort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_big_c(A, isize) +end subroutine dsort_noidx_big + + + +subroutine sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_c(A, iorder, isize) +end subroutine sort + +subroutine sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_c(A, isize) +end subroutine sort_noidx + + + +subroutine sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_big_c(A, iorder, isize) +end subroutine sort_big + +subroutine sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_big_c(A, isize) +end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index a63eb4a3..089c3871 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,222 +1,4 @@ BEGIN_TEMPLATE - subroutine insertion_$Xsort (x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the insertion sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - $type :: xtmp - integer :: i, i0, j, jmax - - do i=2,isize - xtmp = x(i) - i0 = iorder(i) - j=i-1 - do while (j>0) - if ((x(j) <= xtmp)) exit - x(j+1) = x(j) - iorder(j+1) = iorder(j) - j=j-1 - enddo - x(j+1) = xtmp - iorder(j+1) = i0 - enddo - end subroutine insertion_$Xsort - - subroutine quick_$Xsort(x, iorder, isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the quicksort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer, external :: omp_get_num_threads - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - end - - recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) - implicit none - integer, intent(in) :: isize, first, last, level - integer,intent(inout) :: iorder(isize) - $type, intent(inout) :: x(isize) - $type :: c, tmp - integer :: itmp - integer :: i, j - - if(isize<2)return - - c = x( shiftr(first+last,1) ) - i = first - j = last - do - do while (x(i) < c) - i=i+1 - end do - do while (c < x(j)) - j=j-1 - end do - if (i >= j) exit - tmp = x(i) - x(i) = x(j) - x(j) = tmp - itmp = iorder(i) - iorder(i) = iorder(j) - iorder(j) = itmp - i=i+1 - j=j-1 - enddo - if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - else - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - endif - end - - subroutine heap_$Xsort(x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the heap sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - - integer :: i, k, j, l, i0 - $type :: xtemp - - l = isize/2+1 - k = isize - do while (.True.) - if (l>1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j0_8) - if (x(j)<=xtmp) exit - x(j+1_8) = x(j) - iorder(j+1_8) = iorder(j) - j = j-1_8 - enddo - x(j+1_8) = xtmp - iorder(j+1_8) = i0 - enddo - - end subroutine insertion_$Xsort_big - subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -563,223 +90,3 @@ SUBST [ X, type ] END_TEMPLATE -BEGIN_TEMPLATE - -recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) - implicit none - - BEGIN_DOC - ! Sort integer array x(isize) using the radix sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - ! iradix should be -1 in input. - END_DOC - integer*$int_type, intent(in) :: isize - integer*$int_type, intent(inout) :: iorder(isize) - integer*$type, intent(inout) :: x(isize) - integer, intent(in) :: iradix - integer :: iradix_new - integer*$type, allocatable :: x2(:), x1(:) - integer*$type :: i4 ! data type - integer*$int_type, allocatable :: iorder1(:),iorder2(:) - integer*$int_type :: i0, i1, i2, i3, i ! index type - integer*$type :: mask - integer :: err - !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 - - if (isize < 2) then - return - endif - - if (iradix == -1) then ! Sort Positive and negative - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - do i=1_$int_type,isize - if (x(i) < 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = -x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i2 - iorder(i1+i) = iorder2(i) - x(i1+i) = x2(i) - enddo - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i1 > 1_$int_type) then - call $Xradix_sort$big(x1,iorder1,i1,-2) - do i=1_$int_type,i1 - x(i) = -x1(1_$int_type+i1-i) - iorder(i) = iorder1(1_$int_type+i1-i) - enddo - endif - - if (i2>1_$int_type) then - call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) - endif - - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - return - - else if (iradix == -2) then ! Positive - - ! Find most significant bit - - i0 = 0_$int_type - i4 = maxval(x) - - iradix_new = max($integer_size-1-leadz(i4),1) - mask = ibset(0_$type,iradix_new) - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder1(i) - x(i0+i) = x1(i) - enddo - i0 = i0+i1 - i3 = i0 - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - - - do i=1_$int_type,i2 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - i0 = i0+i2 - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i3>1_$int_type) then - call $Xradix_sort$big(x,iorder,i3,iradix_new-1) - endif - - if (isize-i3>1_$int_type) then - call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) - endif - - return - endif - - ASSERT (iradix >= 0) - - if (isize < 48) then - call insertion_$Xsort$big(x,iorder,isize) - return - endif - - - allocate(x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x1, iorder1' - stop - endif - - - mask = ibset(0_$type,iradix) - i0=1_$int_type - i1=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder(i0) = iorder(i) - x(i0) = x(i) - i0 = i0+1_$int_type - else - iorder2(i1) = iorder(i) - x2(i1) = x(i) - i1 = i1+1_$int_type - endif - enddo - i0=i0-1_$int_type - i1=i1-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x2, iorder2' - stop - endif - - - if (iradix == 0) then - return - endif - - - if (i1>1_$int_type) then - call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) - endif - if (i0>1) then - call $Xradix_sort$big(x,iorder,i0,iradix-1) - endif - - end - -SUBST [ X, type, integer_size, is_big, big, int_type ] - i ; 4 ; 32 ; .False. ; ; 4 ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; -END_TEMPLATE - - -