diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index eb98994c..134cbcd2 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -233,9 +233,6 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) double precision :: NAI_pol_mult_erf ints = 0.d0 - if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then - return - endif num_A = ao_nucl(i_ao) power_A(1:3) = ao_power(i_ao,1:3) @@ -275,7 +272,7 @@ end subroutine NAI_pol_x_mult_erf_ao ! --- -subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) +subroutine NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) BEGIN_DOC ! @@ -293,20 +290,16 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) implicit none - integer, intent(in) :: i_ao, j_ao, n_points - double precision, intent(in) :: mu_in, C_center(n_points,3) - double precision, intent(out) :: ints(n_points,3) + integer, intent(in) :: i_ao, j_ao, LD_C, LD_ints, n_points + double precision, intent(in) :: mu_in, C_center(LD_C,3) + double precision, intent(out) :: ints(LD_ints,3) integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in integer :: power_xA(3), m, ipoint double precision :: A_center(3), B_center(3), alpha, beta, coef double precision, allocatable :: integral(:) - double precision :: NAI_pol_mult_erf - ints = 0.d0 - if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then - return - endif + ints(1:LD_ints,1:3) = 0.d0 num_A = ao_nucl(i_ao) power_A(1:3) = ao_power(i_ao,1:3) @@ -318,13 +311,15 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) n_pt_in = n_pt_max_integrals allocate(integral(n_points)) + integral = 0.d0 + do i = 1, ao_prim_num(i_ao) alpha = ao_expo_ordered_transp(i,i_ao) do m = 1, 3 - power_xA = power_A ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_A power_xA(m) += 1 do j = 1, ao_prim_num(j_ao) @@ -332,20 +327,101 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) ! First term = (x-Ax)**(ax+1) - call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) - do ipoint=1,n_points + call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) + do ipoint = 1, n_points ints(ipoint,m) += integral(ipoint) * coef enddo ! Second term = Ax * (x-Ax)**(ax) - call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) - do ipoint=1,n_points + call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) + do ipoint = 1, n_points ints(ipoint,m) += A_center(m) * integral(ipoint) * coef enddo enddo enddo enddo + + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_v0 + +! --- + +subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, LD_C, LD_ints, n_points(3) + double precision, intent(in) :: mu_in, C_center(LD_C,3,3) + double precision, intent(out) :: ints(LD_ints,3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, LD_integral + integer :: power_xA(3), m, ipoint, n_points_m + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision, allocatable :: integral(:) + + ints(1:LD_ints,1:3) = 0.d0 + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + LD_integral = max(max(n_points(1), n_points(2)), n_points(3)) + allocate(integral(LD_integral)) + integral = 0.d0 + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do m = 1, 3 + n_points_m = n_points(m) + + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_A + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + call NAI_pol_mult_erf_v( A_center, B_center, power_xA, power_B, alpha, beta & + , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + do ipoint = 1, n_points_m + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_v( A_center, B_center, power_A, power_B, alpha, beta & + , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + do ipoint = 1, n_points_m + ints(ipoint,m) += A_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + deallocate(integral) end subroutine NAI_pol_x_mult_erf_ao_v @@ -775,9 +851,6 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen endif ints = 0.d0 - if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then - return - endif power_Ai(1:3) = ao_power(i_ao,1:3) power_Aj(1:3) = ao_power(j_ao,1:3) @@ -802,13 +875,11 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) ! First term = (x-Ax)**(ax+1) - integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & - , beta, B_center, C_center, n_pt_in, mu_in ) + integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) ints(m) += integral * coef ! Second term = Ax * (x-Ax)**(ax) - integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj & - , beta, B_center, C_center, n_pt_in, mu_in ) + integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) ints(m) += Ai_center(m) * integral * coef enddo @@ -817,9 +888,9 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen end subroutine NAI_pol_x_mult_erf_ao_with1s -!-- +! --- -subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points) +subroutine NAI_pol_x_mult_erf_ao_with1s_v0(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points) BEGIN_DOC ! @@ -837,25 +908,23 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c implicit none - integer, intent(in) :: i_ao, j_ao, n_points - double precision, intent(in) :: beta, B_center(n_points,3), mu_in, C_center(n_points,3) - double precision, intent(out) :: ints(n_points,3) + integer, intent(in) :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points + double precision, intent(in) :: beta, mu_in + double precision, intent(in) :: B_center(LD_B,3), C_center(LD_C,3) + double precision, intent(out) :: ints(LD_ints,3) - integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m - double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi - integer :: ipoint + integer :: ipoint double precision, allocatable :: integral(:) if(beta .lt. 1d-10) then - call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + call NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) return endif - ints(:,:) = 0.d0 - if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then - return - endif + ints(1:LD_ints,1:3) = 0.d0 power_Ai(1:3) = ao_power(i_ao,1:3) power_Aj(1:3) = ao_power(j_ao,1:3) @@ -866,6 +935,8 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c n_pt_in = n_pt_max_integrals allocate(integral(n_points)) + integral = 0.d0 + do i = 1, ao_prim_num(i_ao) alphai = ao_expo_ordered_transp (i,i_ao) coefi = ao_coef_normalized_ordered_transp(i,i_ao) @@ -881,15 +952,17 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) ! First term = (x-Ax)**(ax+1) - call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, & - alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) + do ipoint = 1, n_points ints(ipoint,m) += integral(ipoint) * coef enddo ! Second term = Ax * (x-Ax)**(ax) - call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, & - alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) do ipoint = 1, n_points ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef enddo @@ -897,10 +970,100 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c enddo enddo enddo + deallocate(integral) -end subroutine NAI_pol_x_mult_erf_ao_with1s +end subroutine NAI_pol_x_mult_erf_ao_with1s_v0 +! --- + +subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points(3) + double precision, intent(in) :: beta, mu_in + double precision, intent(in) :: B_center(LD_B,3,3), C_center(LD_C,3,3) + double precision, intent(out) :: ints(LD_ints,3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + + integer :: ipoint, n_points_m, LD_integral + double precision, allocatable :: integral(:) + + if(beta .lt. 1d-10) then + print *, 'small beta', i_ao, j_ao + call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) + return + endif + + ints(1:LD_ints,1:3) = 0.d0 + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + LD_integral = max(max(n_points(1), n_points(2)), n_points(3)) + allocate(integral(LD_integral)) + integral = 0.d0 + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do m = 1, 3 + n_points_m = n_points(m) + + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_Ai + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + + do ipoint = 1, n_points_m + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + do ipoint = 1, n_points_m + ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_with1s_v ! --- diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index facb6264..213a63e4 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -1,4 +1,7 @@ +! --- + subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) + implicit none BEGIN_DOC ! gauss_ints(m) = \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2} @@ -32,6 +35,7 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) enddo enddo enddo + end @@ -152,24 +156,26 @@ end function overlap_gauss_r12_ao ! -- -subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) +subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points) BEGIN_DOC + ! ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2} + ! + ! n_points: nb of integrals <= min(LD_D, LD_resv) + ! END_DOC implicit none - integer, intent(in) :: i, j, n_points - double precision, intent(in) :: D_center(n_points,3), delta - double precision, intent(out) :: resv(n_points) + integer, intent(in) :: i, j, LD_D, LD_resv, n_points + double precision, intent(in) :: D_center(LD_D,3), delta + double precision, intent(out) :: resv(LD_resv) - integer :: power_A(3), power_B(3), l, k - double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1 + integer :: ipoint + integer :: power_A(3), power_B(3), l, k + double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1 double precision, allocatable :: analytical_j(:) - double precision, external :: overlap_gauss_r12 - integer :: ipoint - resv(:) = 0.d0 if(ao_overlap_abs(j,i).lt.1.d-12) then return @@ -182,6 +188,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) B_center(1:3) = nucl_coord(ao_nucl(j),1:3) allocate(analytical_j(n_points)) + do l = 1, ao_prim_num(i) alpha = ao_expo_ordered_transp (l,i) coef1 = ao_coef_normalized_ordered_transp(l,i) @@ -192,15 +199,18 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) if(dabs(coef) .lt. 1d-12) cycle - call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points) - do ipoint=1, n_points - resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint) + call overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points, n_points) + + do ipoint = 1, n_points + resv(ipoint) = resv(ipoint) + coef * analytical_j(ipoint) enddo + enddo enddo + deallocate(analytical_j) -end +end subroutine overlap_gauss_r12_ao_v ! --- @@ -274,7 +284,8 @@ end function overlap_gauss_r12_ao_with1s ! --- -subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points) +subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta, i, j, resv, LD_resv, n_points) + BEGIN_DOC ! ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} @@ -283,18 +294,16 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, END_DOC implicit none - integer, intent(in) :: i, j, n_points - double precision, intent(in) :: B_center(3), beta, D_center(n_points,3), delta - double precision, intent(out) :: resv(n_points) - - integer :: power_A1(3), power_A2(3), l, k - double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 - double precision :: coef12, coef12f - double precision :: gama, gama_inv - double precision :: bg, dg, bdg - - integer :: ipoint + integer, intent(in) :: i, j, n_points, LD_D, LD_resv + double precision, intent(in) :: B_center(3), beta, D_center(LD_D,3), delta + double precision, intent(out) :: resv(LD_resv) + integer :: ipoint + integer :: power_A1(3), power_A2(3), l, k + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 + double precision :: coef12, coef12f + double precision :: gama, gama_inv + double precision :: bg, dg, bdg double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:) if(ao_overlap_abs(j,i) .lt. 1.d-12) then @@ -304,7 +313,9 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, ASSERT(beta .gt. 0.d0) if(beta .lt. 1d-10) then - call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) + + call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points) + return endif @@ -312,8 +323,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2} - gama = beta + delta - gama_inv = 1.d0 / gama + gama = beta + delta + gama_inv = 1.d0 / gama power_A1(1:3) = ao_power(i,1:3) power_A2(1:3) = ao_power(j,1:3) @@ -323,8 +334,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) ) - bg = beta * gama_inv - dg = delta * gama_inv + bg = beta * gama_inv + dg = delta * gama_inv bdg = bg * delta do ipoint=1,n_points G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1) @@ -343,10 +354,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, enddo - ! --- - do l = 1, ao_prim_num(i) - alpha1 = ao_expo_ordered_transp (l,i) + alpha1 = ao_expo_ordered_transp (l,i) coef1 = ao_coef_normalized_ordered_transp(l,i) do k = 1, ao_prim_num(j) @@ -354,19 +363,19 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef12) .lt. 1d-12) cycle - call overlap_gauss_r12_v(G_center, gama, A1_center,& - A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points) + call overlap_gauss_r12_v(G_center, n_points, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points, n_points) - do ipoint=1,n_points + do ipoint = 1, n_points coef12f = coef12 * fact_g(ipoint) resv(ipoint) += coef12f * analytical_j(ipoint) end do enddo enddo - deallocate (fact_g, G_center, analytical_j ) + deallocate(fact_g, G_center, analytical_j) -end +end subroutine overlap_gauss_r12_ao_with1s_v +! --- diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 20796512..b568bf1e 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_v0, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! @@ -11,83 +11,96 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n implicit none integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), expo_fit, coef_fit + double precision :: r(3), int_fit, expo_fit, coef_fit double precision :: coef, beta, B_center(3) double precision :: tmp double precision :: wall0, wall1 - double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points_transp j1b_pen + provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 + int2_grad1u2_grad2u2_j1b2_v0 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points_transp, n_max_fit_slat, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& - !$OMP ao_overlap_abs) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, n_max_fit_slat, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2_v0) + !$OMP DO + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) - allocate(int_fit_v(n_points_final_grid)) - !$OMP DO SCHEDULE(dynamic) - do i = 1, ao_num - do j = i, ao_num + do i = 1, ao_num + do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then - cycle - endif + tmp = 0.d0 + do i_fit = 1, n_max_fit_slat - do i_1s = 1, List_all_comb_b3_size + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = coef_gauss_1_erf_x_2(i_fit) - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + ! --- - do i_fit = 1, n_max_fit_slat + coef = List_all_comb_b3_coef (1) + beta = List_all_comb_b3_expo (1) + B_center(1) = List_all_comb_b3_cent(1,1) + B_center(2) = List_all_comb_b3_cent(2,1) + B_center(3) = List_all_comb_b3_cent(3,1) - expo_fit = expo_gauss_1_erf_x_2(i_fit) - coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + if(dabs(int_fit) .lt. 1d-10) cycle - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & - expo_fit, i, j, int_fit_v, n_points_final_grid) + tmp += -0.25d0 * coef * coef_fit * int_fit - do ipoint = 1, n_points_final_grid - int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) - enddo + ! --- - enddo + do i_1s = 2, List_all_comb_b3_size - enddo - enddo - enddo + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + + tmp += -0.25d0 * coef * coef_fit * int_fit + enddo + + ! --- + + enddo + + int2_grad1u2_grad2u2_j1b2_v0(j,i,ipoint) = tmp + enddo + enddo + enddo !$OMP END DO - deallocate(int_fit_v) !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint) + int2_grad1u2_grad2u2_j1b2_v0(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_v0(i,j,ipoint) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 + print*, ' wall time for int2_grad1u2_grad2u2_j1b2_v0', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_u2_j1b2_v0, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! @@ -96,77 +109,96 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), expo_fit, coef_fit - double precision :: coef, beta, B_center(3), tmp - double precision :: wall0, wall1 - double precision, allocatable :: int_fit_v(:) + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), int_fit, expo_fit, coef_fit + double precision :: coef, beta, B_center(3), tmp + double precision :: wall0, wall1 - double precision, external :: overlap_gauss_r12_ao_with1s + double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points_transp j1b_pen + provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - int2_u2_j1b2(:,:,:) = 0.d0 + int2_u2_j1b2_v0 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points_transp, n_max_fit_slat, & - !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u2_j1b2) - allocate(int_fit_v(n_points_final_grid)) - !$OMP DO SCHEDULE(dynamic) - do i = 1, ao_num - do j = i, ao_num + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, n_max_fit_slat, & + !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u2_j1b2_v0) + !$OMP DO + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) - do i_1s = 1, List_all_comb_b3_size - - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + do i = 1, ao_num + do j = i, ao_num + tmp = 0.d0 do i_fit = 1, n_max_fit_slat expo_fit = expo_gauss_j_mu_x_2(i_fit) - coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef + coef_fit = coef_gauss_j_mu_x_2(i_fit) - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & - expo_fit, i, j, int_fit_v, n_points_final_grid) + ! --- - do ipoint = 1, n_points_final_grid - int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + coef = List_all_comb_b3_coef (1) + beta = List_all_comb_b3_expo (1) + B_center(1) = List_all_comb_b3_cent(1,1) + B_center(2) = List_all_comb_b3_cent(2,1) + B_center(3) = List_all_comb_b3_cent(3,1) + + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + if(dabs(int_fit) .lt. 1d-10) cycle + + tmp += coef * coef_fit * int_fit + + ! --- + + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + + tmp += coef * coef_fit * int_fit enddo + ! --- + enddo + int2_u2_j1b2_v0(j,i,ipoint) = tmp enddo enddo enddo - !$OMP END DO - deallocate(int_fit_v) - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint) + int2_u2_j1b2_v0(j,i,ipoint) = int2_u2_j1b2_v0(i,j,ipoint) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for int2_u2_j1b2', wall1 - wall0 + print*, ' wall time for int2_u2_j1b2_v0', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_v0, (3, ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! @@ -175,113 +207,118 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), expo_fit, coef_fit - double precision :: coef, beta, B_center(3) - double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp - double precision :: tmp_x, tmp_y, tmp_z - double precision :: wall0, wall1 - double precision, allocatable :: int_fit_v(:,:), dist(:), centr_1s(:,:) + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), int_fit(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3), dist + double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp + double precision :: tmp_x, tmp_y, tmp_z + double precision :: wall0, wall1 - provide mu_erf final_grid_points_transp j1b_pen + provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3)) + int2_u_grad1u_x_j1b2_v0 = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, n_max_fit_slat, & + !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2_v0) + !$OMP DO + do ipoint = 1, n_points_final_grid - r(1) = final_grid_points_transp(ipoint,1) - r(2) = final_grid_points_transp(ipoint,2) - r(3) = final_grid_points_transp(ipoint,3) + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) - dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1)) & - + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) - enddo + do i = 1, ao_num + do j = i, ao_num - int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 + tmp_x = 0.d0 + tmp_y = 0.d0 + tmp_z = 0.d0 + do i_fit = 1, n_max_fit_slat - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points_transp, n_max_fit_slat, dist, & - !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) - allocate(int_fit_v(n_points_final_grid,3)) + expo_fit = expo_gauss_j_mu_1_erf(i_fit) + coef_fit = coef_gauss_j_mu_1_erf(i_fit) - do i_1s = 1, List_all_comb_b3_size + ! --- - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit) + print*, ' integralll = ', int_fit(1) + print*, ' integralll = ', int_fit(2) + print*, ' integralll = ', int_fit(3) + - do i_fit = 1, n_max_fit_slat + tmp_x += coef_fit * int_fit(1) + tmp_y += coef_fit * int_fit(2) + tmp_z += coef_fit * int_fit(3) - expo_fit = expo_gauss_j_mu_1_erf(i_fit) - coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef + if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle - alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + ! --- - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points_transp(ipoint,1) - r(2) = final_grid_points_transp(ipoint,2) - r(3) = final_grid_points_transp(ipoint,3) + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + + (B_center(2) - r(2)) * (B_center(2) - r(2)) & + + (B_center(3) - r(3)) * (B_center(3) - r(3)) - centr_1s(ipoint,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) - centr_1s(ipoint,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) - centr_1s(ipoint,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - enddo + alpha_1s = beta + expo_fit + alpha_1s_inv = 1.d0 / alpha_1s - expo_coef_1s = beta * expo_fit * alpha_1s_inv - !$OMP BARRIER - !$OMP DO SCHEDULE(dynamic) - do i = 1, ao_num - do j = i, ao_num - call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,& - 1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid) + centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) + centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) + centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - do ipoint = 1, n_points_final_grid - coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint)) - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = & - int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1) - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = & - int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2) - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = & - int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3) + expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist + coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) + if(dabs(coef_tmp) .lt. 1d-10) cycle + + call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) + + tmp_x += coef_tmp * int_fit(1) + tmp_y += coef_tmp * int_fit(2) + tmp_z += coef_tmp * int_fit(3) enddo ! --- enddo - enddo - !$OMP END DO NOWAIT + int2_u_grad1u_x_j1b2_v0(1,j,i,ipoint) = tmp_x + int2_u_grad1u_x_j1b2_v0(2,j,i,ipoint) = tmp_y + int2_u_grad1u_x_j1b2_v0(3,j,i,ipoint) = tmp_z + enddo enddo enddo - deallocate(int_fit_v) - !$OMP END PARALLEL - - deallocate(dist) + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint) - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint) - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint) + int2_u_grad1u_x_j1b2_v0(1,j,i,ipoint) = int2_u_grad1u_x_j1b2_v0(1,i,j,ipoint) + int2_u_grad1u_x_j1b2_v0(2,j,i,ipoint) = int2_u_grad1u_x_j1b2_v0(2,i,j,ipoint) + int2_u_grad1u_x_j1b2_v0(3,j,i,ipoint) = int2_u_grad1u_x_j1b2_v0(3,i,j,ipoint) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 + print*, ' wall time for int2_u_grad1u_x_j1b2_v0', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -309,11 +346,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -369,13 +406,15 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points + (B_center(3) - r(3)) * (B_center(3) - r(3)) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) + if(dabs(coef_tmp) .lt. 1d-10) cycle + int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) tmp += coef_tmp * int_fit @@ -403,7 +442,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points call wall_time(wall1) print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f new file mode 100644 index 00000000..01118b24 --- /dev/null +++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f @@ -0,0 +1,562 @@ + +! --- + +BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! -\frac{1}{4} int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2 + ! + END_DOC + + implicit none + integer :: i, j, ipoint, i_1s, i_fit + integer :: i_mask_grid + double precision :: r(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3) + double precision :: wall0, wall1 + + integer, allocatable :: n_mask_grid(:) + double precision, allocatable :: r_mask_grid(:,:) + double precision, allocatable :: int_fit_v(:) + + print*, ' providing int2_grad1u2_grad2u2_j1b2' + + provide mu_erf final_grid_points_transp j1b_pen + call wall_time(wall0) + + int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, n_mask_grid, & + !$OMP i_mask_grid, r_mask_grid) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points_transp, n_max_fit_slat, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2, & + !$OMP ao_overlap_abs) + + allocate(int_fit_v(n_points_final_grid)) + allocate(n_mask_grid(n_points_final_grid)) + allocate(r_mask_grid(n_points_final_grid,3)) + + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num + + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + cycle + endif + + do i_fit = 1, n_max_fit_slat + + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = coef_gauss_1_erf_x_2(i_fit) * (-0.25d0) + + ! --- + + call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid) + + i_mask_grid = 0 ! dim + n_mask_grid = 0 ! ind + r_mask_grid = 0.d0 ! val + do ipoint = 1, n_points_final_grid + + int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + + if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then + i_mask_grid += 1 + n_mask_grid(i_mask_grid ) = ipoint + r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1) + r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2) + r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3) + endif + + enddo + + if(i_mask_grid .eq. 0) cycle + + ! --- + + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) * coef_fit + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid) + + do ipoint = 1, i_mask_grid + int2_grad1u2_grad2u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint) + enddo + + enddo + + ! --- + + enddo + enddo + enddo + !$OMP END DO + + deallocate(n_mask_grid) + deallocate(r_mask_grid) + deallocate(int_fit_v) + + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2 + ! + END_DOC + + implicit none + integer :: i, j, ipoint, i_1s, i_fit + integer :: i_mask_grid + double precision :: r(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3), tmp + double precision :: wall0, wall1 + + integer, allocatable :: n_mask_grid(:) + double precision, allocatable :: r_mask_grid(:,:) + double precision, allocatable :: int_fit_v(:) + + print*, ' providing int2_u2_j1b2' + + provide mu_erf final_grid_points_transp j1b_pen + call wall_time(wall0) + + int2_u2_j1b2(:,:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit_v, & + !$OMP i_mask_grid, n_mask_grid, r_mask_grid ) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points_transp, n_max_fit_slat, & + !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u2_j1b2) + + allocate(n_mask_grid(n_points_final_grid)) + allocate(r_mask_grid(n_points_final_grid,3)) + allocate(int_fit_v(n_points_final_grid)) + + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num + + do i_fit = 1, n_max_fit_slat + + expo_fit = expo_gauss_j_mu_x_2(i_fit) + coef_fit = coef_gauss_j_mu_x_2(i_fit) + + ! --- + + call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid) + + i_mask_grid = 0 ! dim + n_mask_grid = 0 ! ind + r_mask_grid = 0.d0 ! val + + do ipoint = 1, n_points_final_grid + int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + + if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then + i_mask_grid += 1 + n_mask_grid(i_mask_grid ) = ipoint + r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1) + r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2) + r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3) + endif + enddo + + if(i_mask_grid .eq. 0) cycle + + ! --- + + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) * coef_fit + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid) + + do ipoint = 1, i_mask_grid + int2_u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint) + enddo + + enddo + + ! --- + + enddo + enddo + enddo + !$OMP END DO + + deallocate(n_mask_grid) + deallocate(r_mask_grid) + deallocate(int_fit_v) + + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for int2_u2_j1b2', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2 + ! + END_DOC + + implicit none + + integer :: i, j, ipoint, i_1s, i_fit + integer :: i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid(3) + double precision :: x, y, z, expo_fit, coef_fit + double precision :: coef, beta, B_center(3) + double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s + double precision :: wall0, wall1 + + integer, allocatable :: n_mask_grid(:,:) + double precision, allocatable :: r_mask_grid(:,:,:) + double precision, allocatable :: int_fit_v(:,:), dist(:,:), centr_1s(:,:,:) + + print*, ' providing int2_u_grad1u_x_j1b2' + + provide mu_erf final_grid_points_transp j1b_pen + call wall_time(wall0) + + int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, x, y, z, coef, beta, & + !$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, dist, B_center,& + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, & + !$OMP i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid, & + !$OMP n_mask_grid, r_mask_grid) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points_transp, n_max_fit_slat, & + !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) + + allocate(dist(n_points_final_grid,3)) + allocate(centr_1s(n_points_final_grid,3,3)) + allocate(n_mask_grid(n_points_final_grid,3)) + allocate(r_mask_grid(n_points_final_grid,3,3)) + allocate(int_fit_v(n_points_final_grid,3)) + + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num + do i_fit = 1, n_max_fit_slat + + expo_fit = expo_gauss_j_mu_1_erf(i_fit) + coef_fit = coef_gauss_j_mu_1_erf(i_fit) + + ! --- + + call NAI_pol_x_mult_erf_ao_with1s_v0(i, j, expo_fit, final_grid_points_transp, n_points_final_grid, 1.d+9, final_grid_points_transp, n_points_final_grid, int_fit_v, n_points_final_grid, n_points_final_grid) + + i_mask_grid1 = 0 ! dim + i_mask_grid2 = 0 ! dim + i_mask_grid3 = 0 ! dim + n_mask_grid = 0 ! ind + r_mask_grid = 0.d0 ! val + do ipoint = 1, n_points_final_grid + + ! --- + + int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1) + + if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then + i_mask_grid1 += 1 + n_mask_grid(i_mask_grid1, 1) = ipoint + r_mask_grid(i_mask_grid1,1,1) = final_grid_points_transp(ipoint,1) + r_mask_grid(i_mask_grid1,2,1) = final_grid_points_transp(ipoint,2) + r_mask_grid(i_mask_grid1,3,1) = final_grid_points_transp(ipoint,3) + endif + + ! --- + + int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2) + + if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then + i_mask_grid2 += 1 + n_mask_grid(i_mask_grid2, 2) = ipoint + r_mask_grid(i_mask_grid2,1,2) = final_grid_points_transp(ipoint,1) + r_mask_grid(i_mask_grid2,2,2) = final_grid_points_transp(ipoint,2) + r_mask_grid(i_mask_grid2,3,2) = final_grid_points_transp(ipoint,3) + endif + + ! --- + + int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3) + + if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then + i_mask_grid3 += 1 + n_mask_grid(i_mask_grid3, 3) = ipoint + r_mask_grid(i_mask_grid3,1,3) = final_grid_points_transp(ipoint,1) + r_mask_grid(i_mask_grid3,2,3) = final_grid_points_transp(ipoint,2) + r_mask_grid(i_mask_grid3,3,3) = final_grid_points_transp(ipoint,3) + endif + + ! --- + + enddo + + if((i_mask_grid1+i_mask_grid2+i_mask_grid3) .eq. 0) cycle + + i_mask_grid(1) = i_mask_grid1 + i_mask_grid(2) = i_mask_grid2 + i_mask_grid(3) = i_mask_grid3 + + ! --- + + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) * coef_fit + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + alpha_1s = beta + expo_fit + alpha_1s_inv = 1.d0 / alpha_1s + expo_coef_1s = beta * expo_fit * alpha_1s_inv + + do ipoint = 1, i_mask_grid1 + + x = r_mask_grid(ipoint,1,1) + y = r_mask_grid(ipoint,2,1) + z = r_mask_grid(ipoint,3,1) + + centr_1s(ipoint,1,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x) + centr_1s(ipoint,2,1) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y) + centr_1s(ipoint,3,1) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z) + + dist(ipoint,1) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z) + enddo + + do ipoint = 1, i_mask_grid2 + + x = r_mask_grid(ipoint,1,2) + y = r_mask_grid(ipoint,2,2) + z = r_mask_grid(ipoint,3,2) + + centr_1s(ipoint,1,2) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x) + centr_1s(ipoint,2,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y) + centr_1s(ipoint,3,2) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z) + + dist(ipoint,2) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z) + enddo + + do ipoint = 1, i_mask_grid3 + + x = r_mask_grid(ipoint,1,3) + y = r_mask_grid(ipoint,2,3) + z = r_mask_grid(ipoint,3,3) + + centr_1s(ipoint,1,3) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x) + centr_1s(ipoint,2,3) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y) + centr_1s(ipoint,3,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z) + + dist(ipoint,3) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z) + enddo + + call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid) + + do ipoint = 1, i_mask_grid1 + int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1) + enddo + + do ipoint = 1, i_mask_grid2 + int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2) + enddo + + do ipoint = 1, i_mask_grid3 + int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3) + enddo + + enddo + + ! --- + + enddo + enddo + enddo + !$OMP END DO + + deallocate(dist) + deallocate(centr_1s) + deallocate(n_mask_grid) + deallocate(r_mask_grid) + deallocate(int_fit_v) + + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint) + int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint) + int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 + +END_PROVIDER + +! --- +! +!BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points_final_grid)] +! +! BEGIN_DOC +! ! +! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] +! ! +! END_DOC +! +! implicit none +! integer :: i, j, ipoint, i_1s, i_fit +! double precision :: r(3), int_fit, expo_fit, coef_fit, coef_tmp +! double precision :: coef, beta, B_center(3), dist +! double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp +! double precision :: wall0, wall1 +! double precision, external :: NAI_pol_mult_erf_ao_with1s +! +! provide mu_erf final_grid_points j1b_pen +! call wall_time(wall0) +! +! int2_u_grad1u_j1b2 = 0.d0 +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & +! !$OMP coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist, & +! !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & +! !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & +! !$OMP final_grid_points, n_max_fit_slat, & +! !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & +! !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & +! !$OMP List_all_comb_b3_cent, int2_u_grad1u_j1b2) +! !$OMP DO +! do ipoint = 1, n_points_final_grid +! +! r(1) = final_grid_points(1,ipoint) +! r(2) = final_grid_points(2,ipoint) +! r(3) = final_grid_points(3,ipoint) +! +! do i = 1, ao_num +! do j = i, ao_num +! +! tmp = 0.d0 +! do i_fit = 1, n_max_fit_slat +! +! expo_fit = expo_gauss_j_mu_1_erf(i_fit) +! coef_fit = coef_gauss_j_mu_1_erf(i_fit) +! +! ! --- +! +! if(dabs(coef_fit) .lt. 1d-10) cycle +! +! int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r) +! if(dabs(int_fit) .lt. 1d-10) cycle +! +! tmp += coef_tmp * int_fit +! +! ! --- +! +! do i_1s = 2, List_all_comb_b3_size +! +! coef = List_all_comb_b3_coef (i_1s) +! beta = List_all_comb_b3_expo (i_1s) +! B_center(1) = List_all_comb_b3_cent(1,i_1s) +! B_center(2) = List_all_comb_b3_cent(2,i_1s) +! B_center(3) = List_all_comb_b3_cent(3,i_1s) +! dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & +! + (B_center(2) - r(2)) * (B_center(2) - r(2)) & +! + (B_center(3) - r(3)) * (B_center(3) - r(3)) +! +! alpha_1s = beta + expo_fit +! alpha_1s_inv = 1.d0 / alpha_1s +! centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) +! centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) +! centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) +! +! expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist +! coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) +! int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) +! +! tmp += coef_tmp * int_fit +! enddo +! +! ! --- +! +! enddo +! +! int2_u_grad1u_j1b2(j,i,ipoint) = tmp +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! do ipoint = 1, n_points_final_grid +! do i = 2, ao_num +! do j = 1, i-1 +! int2_u_grad1u_j1b2(j,i,ipoint) = int2_u_grad1u_j1b2(i,j,ipoint) +! enddo +! enddo +! enddo +! +! call wall_time(wall1) +! print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0 +! +!END_PROVIDER +! +!! --- +! diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 1178cc31..0b40170c 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -168,7 +168,7 @@ END_PROVIDER do j = 1, nucl_num tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) - print*,List_all_comb_b3(j,i),j1b_pen(j) + !print*, List_all_comb_b3(j,i), j1b_pen(j) List_all_comb_b3_expo(i) += tmp_alphaj List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1) List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2) @@ -220,6 +220,10 @@ END_PROVIDER List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) enddo + print *, ' 1st coeff & expo of lists' + print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1) + print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1) + END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 1638aa9e..cfdaf95f 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -56,79 +56,83 @@ end !--- -subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) +! TODO apply Gaussian product three times first +subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, rvec, LD_rvec, n_points) + BEGIN_DOC + ! ! Computes the following integral : ! - ! .. math :: - ! - ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \int dr exp(-delta (r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2) ! using an array of D_centers ! + ! n_points: nb of integrals + ! END_DOC implicit none + include 'constants.include.F' - integer, intent(in) :: n_points - double precision, intent(in) :: D_center(n_points,3), delta ! pure gaussian "D" - double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" - integer, intent(in) :: power_A(3),power_B(3) - double precision, intent(out) :: rvec(n_points) - double precision, allocatable :: overlap(:) - double precision :: overlap_x, overlap_y, overlap_z + integer, intent(in) :: LD_D, LD_rvec, n_points + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: D_center(LD_D,3), delta + double precision, intent(in) :: A_center(3), B_center(3), alpha, beta + double precision, intent(out) :: rvec(LD_rvec) - integer :: maxab - integer, allocatable :: iorder_a_new(:) - double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) - double precision, allocatable :: fact_a_new(:) - double precision :: alpha_new - double precision :: accu,thr, coefxy - integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint + integer :: maxab + integer :: d(3), i, lx, ly, lz, iorder_tmp(3), ipoint + double precision :: overlap_x, overlap_y, overlap_z + double precision :: alpha_new + double precision :: accu, thr, coefxy + integer, allocatable :: iorder_a_new(:) + double precision, allocatable :: overlap(:) + double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) + double precision, allocatable :: fact_a_new(:) - dim1=100 - thr = 1.d-10 + thr = 1.d-10 d(:) = 0 maxab = maxval(power_A(1:3)) - allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), & - fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) + allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points)) - call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & - alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & - D_center, A_center, n_points) + call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & + alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, & + D_center, LD_D, A_center, n_points) - do ipoint=1,n_points - rvec(ipoint) = 0.d0 - enddo + rvec(:) = 0.d0 do lx = 0, iorder_a_new(1) iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) iorder_tmp(3) = lz - call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) - do ipoint=1,n_points - rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * & - A_new(ipoint,ly,2) * & - A_new(ipoint,lz,3) * overlap(ipoint) + + call overlap_gaussian_xyz_v(A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B, overlap, n_points) + + do ipoint = 1, n_points + rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * A_new(ipoint,ly,2) * A_new(ipoint,lz,3) * overlap(ipoint) enddo enddo enddo enddo - do ipoint=1,n_points + do ipoint = 1, n_points rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint) enddo + deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap) -end -!--- +end subroutine overlap_gauss_r12_v + !--- -subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints) +subroutine overlap_gauss_xyz_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, gauss_ints) + BEGIN_DOC ! Computes the following integral : ! diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f index 96625df5..5218f040 100644 --- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f @@ -180,8 +180,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, enddo ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) p_new = p_new * p_new - call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center & - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) + call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) if(n_pt_out < 0) then NAI_pol_mult_erf = 0.d0 @@ -198,7 +197,8 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, end function NAI_pol_mult_erf ! --- -subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points) + +subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) BEGIN_DOC ! @@ -214,74 +214,90 @@ subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, include 'utils/constants.include.F' implicit none - integer, intent(in) :: n_pt_in, n_points - integer, intent(in) :: power_A(3), power_B(3) - double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in - double precision, intent(out) :: res_v(n_points) - integer :: i, n_pt, n_pt_out, ipoint - double precision :: P_center(3) - double precision :: d(0:n_pt_in), coeff, dist, const, factor - double precision :: const_factor, dist_integral - double precision :: accu, p_inv, p, rho, p_inv_2 - double precision :: p_new + integer, intent(in) :: n_pt_in, n_points, LD_C, LD_resv + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), alpha, beta, mu_in + double precision, intent(in) :: C_center(LD_C,3) + double precision, intent(out) :: res_v(LD_resv) - double precision :: rint + integer :: i, n_pt, n_pt_out, ipoint + double precision :: P_center(3) + double precision :: d(0:n_pt_in), coeff, dist, const, factor + double precision :: const_factor, dist_integral + double precision :: accu, p_inv, p, rho, p_inv_2 + double precision :: p_new, p_new2, coef_tmp - p = alpha + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha * beta * p_inv - p_new = mu_in / dsqrt(p + mu_in * mu_in) + double precision :: rint - dist = 0.d0 + res_V(1:LD_resv) = 0.d0 + + p = alpha + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + coef_tmp = p * p_new2 + + dist = 0.d0 do i = 1, 3 - P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv - dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) enddo - do ipoint=1,n_points - dist_integral = 0.d0 - do i = 1, 3 - dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) - enddo - const_factor = dist * rho - if(const_factor > 80.d0) then - res_V(ipoint) = 0.d0 - cycle - endif + const_factor = dist * rho + if(const_factor > 80.d0) then + return + endif + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new + n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) + + if(n_pt == 0) then + + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral - n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) - const = p * dist_integral * p_new * p_new - if(n_pt == 0) then res_v(ipoint) = coeff * rint(0, const) - cycle - endif - - do i = 0, n_pt_in - d(i) = 0.d0 enddo - p_new = p_new * p_new - call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)& - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) - if(n_pt_out < 0) then - res_v(ipoint) = 0.d0 - cycle - endif + else - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + + res_v(ipoint) = accu * coeff enddo - res_v(ipoint) = accu * coeff - enddo -end + endif + +end subroutine NAI_pol_mult_erf_v ! --- @@ -380,9 +396,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A d(i) = 0.d0 enddo p_new = p_new * p_new - - call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center & - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) + call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) if(n_pt_out < 0) then NAI_pol_mult_erf_with1s = 0.d0 @@ -398,10 +412,9 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A end function NAI_pol_mult_erf_with1s -!-- +! --- -subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2& - , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points) +subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) BEGIN_DOC ! @@ -420,23 +433,26 @@ subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, include 'utils/constants.include.F' implicit none - integer, intent(in) :: n_pt_in, n_points - integer, intent(in) :: power_A1(3), power_A2(3) - double precision, intent(in) :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3) - double precision, intent(in) :: alpha1, alpha2, beta, mu_in - double precision, intent(out) :: res_v(n_points) + integer, intent(in) :: n_pt_in, LD_B, LD_C, LD_resv, n_points + integer, intent(in) :: power_A1(3), power_A2(3) + double precision, intent(in) :: A1_center(3), A2_center(3) + double precision, intent(in) :: C_center(LD_C,3), B_center(LD_B,3) + double precision, intent(in) :: alpha1, alpha2, beta, mu_in + double precision, intent(out) :: res_v(LD_resv) - integer :: i, n_pt, n_pt_out, ipoint - double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 - double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor - double precision :: dist_integral - double precision :: d(0:n_pt_in), coeff, const, factor - double precision :: accu - double precision :: p_new, p_new2 + integer :: i, n_pt, n_pt_out, ipoint + double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 + double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor + double precision :: dist_integral + double precision :: d(0:n_pt_in), coeff, const, factor + double precision :: accu + double precision :: p_new, p_new2, coef_tmp, cons_tmp - double precision :: rint + double precision :: rint + res_V(1:LD_resv) = 0.d0 + ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} alpha12 = alpha1 + alpha2 alpha12_inv = 1.d0 / alpha12 @@ -446,87 +462,92 @@ subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))& - + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))& - + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) + + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))& + + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) const_factor12 = dist12 * rho12 - if(const_factor12 > 80.d0) then - res_v(:) = 0.d0 return endif - ! --- - ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} - p = alpha12 + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha12 * beta * p_inv - p_new = mu_in / dsqrt(p + mu_in * mu_in) - p_new2 = p_new * p_new - n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) & - + power_A1(3) + power_A2(3) ) + p = alpha12 + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha12 * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + coef_tmp = dtwo_pi * p_inv * p_new + cons_tmp = p * p_new2 + n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) + power_A1(3) + power_A2(3) ) - do ipoint=1,n_points + if(n_pt == 0) then - P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv - P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv - P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv - dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& - + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& - + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + do ipoint = 1, n_points - const_factor = const_factor12 + dist * rho - if(const_factor > 80.d0) then - res_v(ipoint) = 0.d0 - cycle - endif + dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& + + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& + + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) cycle + coeff = coef_tmp * dexp(-const_factor) - dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& - + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& - + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv + dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& + + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& + + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + const = cons_tmp * dist_integral - ! --- - - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new - - const = p * dist_integral * p_new2 - if(n_pt == 0) then res_v(ipoint) = coeff * rint(0, const) - cycle - endif - - do i = 0, n_pt_in - d(i) = 0.d0 enddo - !TODO: VECTORIZE HERE - call give_polynomial_mult_center_one_e_erf_opt( & - A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)& - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1) + else - if(n_pt_out < 0) then - res_v(ipoint) = 0.d0 - cycle - endif - - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) + do ipoint = 1, n_points + + dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& + + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& + + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) cycle + coeff = coef_tmp * dexp(-const_factor) + + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv + dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& + + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& + + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + const = cons_tmp * dist_integral + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + !TODO: VECTORIZE HERE + call give_polynomial_mult_center_one_e_erf_opt(A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) + + if(n_pt_out < 0) then + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + + res_v(ipoint) = accu * coeff enddo - res_v(ipoint) = accu * coeff - end do -end + endif + +end subroutine NAI_pol_mult_erf_with1s_v -! --- ! --- -subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center & - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) +subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) BEGIN_DOC ! Returns the explicit polynomial in terms of the $t$ variable of the diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 4bfb2da3..a56a432f 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -66,7 +66,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'aab = ',accu + !print*,'aab = ',accu ! beta/beta/alpha three-body accu = 0.d0 @@ -83,7 +83,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'abb = ',accu + !print*,'abb = ',accu ! alpha/alpha/alpha three-body accu = 0.d0 @@ -99,7 +99,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'aaa = ',accu + !print*,'aaa = ',accu ! beta/beta/beta three-body accu = 0.d0 @@ -115,7 +115,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'bbb = ',accu + !print*,'bbb = ',accu endif end diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 49ea6f05..44e27e7c 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -19,9 +19,9 @@ ! < J | Htilde | I > call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) - print *, ' hmono = ', hmono - print *, ' htwoe = ', htwoe - print *, ' hthree = ', hthree + !print *, ' hmono = ', hmono + !print *, ' htwoe = ', htwoe + !print *, ' hthree = ', hthree htilde_matrix_elmt_bi_ortho(j,i) = htot enddo enddo diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index f68465c7..f593cefb 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -47,7 +47,9 @@ subroutine give_explicit_poly_and_gaussian_x(P_new,P_center,p,fact_k,iorder,alph end +! TODO remove dim subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,dim) + BEGIN_DOC ! Transforms the product of ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) @@ -60,6 +62,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC + implicit none include 'constants.include.F' integer, intent(in) :: dim @@ -129,7 +132,8 @@ end !--- -subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points) +subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, LD_A, B_center, n_points) + BEGIN_DOC ! Transforms the product of ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) @@ -142,24 +146,26 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC - implicit none + include 'constants.include.F' - integer, intent(in) :: n_points, ldp - integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) - double precision, intent(in) :: alpha, beta ! exponents - double precision, intent(in) :: A_center(n_points,3) ! A center - double precision, intent(in) :: B_center (3) ! B center - double precision, intent(out) :: P_center(n_points,3) ! new center - double precision, intent(out) :: p ! new exponent - double precision, intent(out) :: fact_k(n_points) ! constant factor - double precision, intent(out) :: P_new(n_points,0:ldp,3)! polynomial - integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials - double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) + implicit none + integer, intent(in) :: n_points, ldp, LD_A + integer, intent(in) :: a(3), b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) + double precision, intent(in) :: alpha, beta ! exponents + double precision, intent(in) :: A_center(LD_A,3) ! A center + double precision, intent(in) :: B_center(3) ! B center + integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials + double precision, intent(out) :: P_center(n_points,3) ! new center + double precision, intent(out) :: p ! new exponent + double precision, intent(out) :: fact_k(n_points) ! constant factor + double precision, intent(out) :: P_new(n_points,0:ldp,3) ! polynomial - integer :: n_new,i,j, ipoint, lda, ldb, xyz + integer :: n_new, i, j, ipoint, lda, ldb, xyz + double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) - call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) + + call gaussian_product_v(alpha, A_center, LD_A, beta, B_center, fact_k, p, P_center, n_points) if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) @@ -167,13 +173,13 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde ldb = 0 allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3)) - call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) + call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points) iorder(1:3) = a(1:3) - do ipoint=1,n_points - do xyz=1,3 + do ipoint = 1, n_points + do xyz = 1, 3 P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) - do i=1,a(xyz) + do i = 1, a(xyz) P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo @@ -187,31 +193,31 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde ldb = maxval(b) allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3)) - call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) + call recentered_poly2_v(P_a, lda, A_center, LD_A, P_center, a, P_b, ldb, B_center, P_center, b, n_points) iorder(1:3) = a(1:3) + b(1:3) - do xyz=1,3 + do xyz = 1, 3 if (b(xyz) == 0) then - do ipoint=1,n_points + do ipoint = 1, n_points P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) - do i=1,a(xyz) + do i = 1, a(xyz) P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo else - do i=0,iorder(xyz) - do ipoint=1,n_points + do i = 0, iorder(xyz) + do ipoint = 1, n_points P_new(ipoint,i,xyz) = 0.d0 enddo enddo - call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points) + call multiply_poly_v(P_a(1,0,xyz), a(xyz), P_b(1,0,xyz), b(xyz), P_new(1,0,xyz), ldp, n_points) endif enddo -end +end subroutine give_explicit_poly_and_gaussian_v -!- +! --- subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) BEGIN_DOC @@ -273,15 +279,16 @@ subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder end +! --- +subroutine gaussian_product(a, xa, b, xb, k, p, xp) -subroutine gaussian_product(a,xa,b,xb,k,p,xp) - implicit none BEGIN_DOC ! Gaussian product in 1D. ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} END_DOC + implicit none double precision, intent(in) :: a,b ! Exponents double precision, intent(in) :: xa(3),xb(3) ! Centers double precision, intent(out) :: p ! New exponent @@ -312,33 +319,39 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp) xp(1) = (a*xa(1)+b*xb(1))*p_inv xp(2) = (a*xa(2)+b*xb(2))*p_inv xp(3) = (a*xa(3)+b*xb(3))*p_inv + end subroutine !--- -subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) - implicit none + +subroutine gaussian_product_v(a, xa, LD_xa, b, xb, k, p, xp, n_points) + BEGIN_DOC + ! ! Gaussian product in 1D. ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + ! ! Using multiple A centers + ! END_DOC - integer, intent(in) :: n_points - double precision, intent(in) :: a,b ! Exponents - double precision, intent(in) :: xa(n_points,3),xb(3) ! Centers - double precision, intent(out) :: p ! New exponent - double precision, intent(out) :: xp(n_points,3) ! New center - double precision, intent(out) :: k(n_points) ! Constant + implicit none - double precision :: p_inv + integer, intent(in) :: LD_xa, n_points + double precision, intent(in) :: a, b ! Exponents + double precision, intent(in) :: xa(LD_xa,3), xb(3) ! Centers + double precision, intent(out) :: p ! New exponent + double precision, intent(out) :: xp(n_points,3) ! New center + double precision, intent(out) :: k(n_points) ! Constant + + integer :: ipoint + double precision :: p_inv + double precision :: xab(3), ab, ap, bp, bpxb(3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab - integer :: ipoint ASSERT (a>0.) ASSERT (b>0.) - double precision :: xab(3), ab, ap, bp, bpxb(3) - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab - p = a+b p_inv = 1.d0/(a+b) ab = a*b*p_inv @@ -348,7 +361,7 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) bpxb(2) = bp*xb(2) bpxb(3) = bp*xb(3) - do ipoint=1,n_points + do ipoint = 1, n_points xab(1) = xa(ipoint,1)-xb(1) xab(2) = xa(ipoint,2)-xb(2) xab(3) = xa(ipoint,3)-xb(3) @@ -365,18 +378,19 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3) endif enddo -end subroutine +end subroutine gaussian_product_v +! --- +subroutine gaussian_product_x(a, xa, b, xb, k, p, xp) -subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) - implicit none BEGIN_DOC ! Gaussian product in 1D. ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} END_DOC + implicit none double precision , intent(in) :: a,b ! Exponents double precision , intent(in) :: xa,xb ! Centers double precision , intent(out) :: p ! New exponent @@ -625,16 +639,20 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b) do i = 21,b P_new2(i) = binom_func(b,b-i) * pows_b(b-i) enddo + end -!- -subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) - implicit none +! --- + +subroutine recentered_poly2_v(P_new, lda, x_A, LD_xA, x_P, a, P_new2, ldb, x_B, x_Q, b, n_points) + BEGIN_DOC ! Recenter two polynomials END_DOC - integer, intent(in) :: a(3),b(3), n_points, lda, ldb - double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) + + implicit none + integer, intent(in) :: a(3), b(3), n_points, lda, ldb, LD_xA + double precision, intent(in) :: x_A(LD_xA,3), x_P(n_points,3), x_B(3), x_Q(n_points,3) double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3) double precision :: binom_func integer :: i,j,k,l, minab(3), maxab(3),ipoint, xyz @@ -646,7 +664,6 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) - do xyz=1,3 if ((a(xyz)<0).or.(b(xyz)<0) ) cycle do ipoint=1,n_points @@ -698,27 +715,34 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) enddo enddo enddo -end +end subroutine recentered_poly2_v + +! --- + +subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, P_new2, x_B, x_Q, n_points) -subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) - implicit none BEGIN_DOC ! Recenter two polynomials. Special case for b=(0,0,0) END_DOC - integer, intent(in) :: a(3), n_points, lda - double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) - double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,3) - double precision :: binom_func - integer :: i,j,k,l, xyz, ipoint, maxab(3) - double precision, allocatable :: pows_a(:,:), pows_b(:,:) - double precision :: fa + + implicit none + integer, intent(in) :: a(3), n_points, lda, LD_xA + double precision, intent(in) :: x_A(LD_xA,3) + double precision, intent(in) :: x_B(3) + double precision, intent(in) :: x_P(n_points,3), x_Q(n_points,3) + double precision, intent(out) :: P_new(n_points,0:lda,3), P_new2(n_points,3) + integer :: i, j, k, l, xyz, ipoint, maxab(3) + double precision :: fa + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + + double precision :: binom_func maxab(1:3) = max(a(1:3),(/0,0,0/)) allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) - do xyz=1,3 + do xyz = 1, 3 if (a(xyz)<0) cycle do ipoint=1,n_points pows_a(ipoint,0) = 1.d0 @@ -736,25 +760,25 @@ subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) P_new (ipoint,0,xyz) = pows_a(ipoint,a(xyz)) P_new2(ipoint,xyz) = pows_b(ipoint,0) enddo - do i = 1,min(a(xyz),20) - fa = binom_transp(a(xyz)-i,a(xyz)) - do ipoint=1,n_points - P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + do i = 1, min(a(xyz), 20) + fa = binom_transp(a(xyz)-i, a(xyz)) + do ipoint = 1, n_points + P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo - do i = 21,a(xyz) - fa = binom_func(a(xyz),a(xyz)-i) - do ipoint=1,n_points - P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + do i = 21, a(xyz) + fa = binom_func(a(xyz), a(xyz)-i) + do ipoint = 1, n_points + P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo enddo !xyz deallocate(pows_a, pows_b) -end -!-- +end subroutine recentered_poly2_v0 + !-- subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index a62c657e..c797c87e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -32,9 +32,8 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_ end -subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& - power_B,overlap_x,overlap_y,overlap_z,overlap,dim) - implicit none +subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim) + BEGIN_DOC !.. math:: ! @@ -42,7 +41,10 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& ! S = S_x S_y S_z ! END_DOC + include 'constants.include.F' + + implicit none integer,intent(in) :: dim ! dimension maximum for the arrays representing the polynomials double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta @@ -51,17 +53,18 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p double precision :: F_integral_tab(0:max_dim) integer :: iorder_p(3) - - call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) - if(fact_p.lt.1d-20)then - overlap_x = 1.d-10 - overlap_y = 1.d-10 - overlap_z = 1.d-10 - overlap = 1.d-10 - return - endif integer :: nmax double precision :: F_integral + + call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim) + if(fact_p.lt.1d-20)then + overlap_x = 1.d-10 + overlap_y = 1.d-10 + overlap_z = 1.d-10 + overlap = 1.d-10 + return + endif + nmax = maxval(iorder_p) do i = 0,nmax F_integral_tab(i) = F_integral(i,p) @@ -150,72 +153,74 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over overlap_x = factor * dx * overlap_x end - ! --- -subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& - power_B,overlap,dim, n_points) - implicit none +subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points) + BEGIN_DOC !.. math:: ! - ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\ + ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\ ! S = S_x S_y S_z ! END_DOC + include 'constants.include.F' - integer,intent(in) :: dim, n_points - double precision,intent(in) :: A_center(n_points,3),B_center(3) ! center of the x1 functions - double precision, intent(in) :: alpha,beta - integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions - double precision, intent(out) :: overlap(n_points) - double precision :: F_integral_tab(0:max_dim) - double precision :: p, overlap_x, overlap_y, overlap_z - double precision, allocatable :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:) - integer :: iorder_p(3), ipoint, ldp - integer :: nmax - double precision :: F_integral + + implicit none + + integer, intent(in) :: n_points + integer, intent(in) :: power_A(3), power_B(3) ! power of the x1 functions + double precision, intent(in) :: A_center(n_points,3), B_center(3) ! center of the x1 functions + double precision, intent(in) :: alpha, beta + double precision, intent(out) :: overlap(n_points) + + integer :: i + integer :: iorder_p(3), ipoint, ldp + integer :: nmax + double precision :: F_integral_tab(0:max_dim) + double precision :: p, overlap_x, overlap_y, overlap_z + double precision :: F_integral + double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:) ldp = maxval( power_A(1:3) + power_B(1:3) ) - allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), & - fact_pp(n_points), pp(n_points)) - call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) + allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points)) + + call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, n_points, B_center, n_points) nmax = maxval(iorder_p) - do i=0, nmax + do i = 0, nmax F_integral_tab(i) = F_integral(i,p) enddo - integer :: i + do ipoint = 1, n_points - call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points) - - do ipoint=1,n_points - if(fact_p(ipoint).lt.1d-20)then + if(fact_p(ipoint) .lt. 1d-20) then overlap(ipoint) = 1.d-10 cycle endif overlap_x = P_new(ipoint,0,1) * F_integral_tab(0) - do i = 1,iorder_p(1) + do i = 1, iorder_p(1) overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i) enddo overlap_y = P_new(ipoint,0,2) * F_integral_tab(0) - do i = 1,iorder_p(2) + do i = 1, iorder_p(2) overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i) enddo overlap_z = P_new(ipoint,0,3) * F_integral_tab(0) - do i = 1,iorder_p(3) + do i = 1, iorder_p(3) overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i) enddo - overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint) + overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_p(ipoint) enddo - deallocate(P_new, P_center, fact_p, pp, fact_pp) -end + deallocate(P_new, P_center, fact_p) + +end subroutine overlap_gaussian_xyz_v ! --- diff --git a/src/utils/qsort.c b/src/utils/qsort.c deleted file mode 100644 index c011b35a..00000000 --- a/src/utils/qsort.c +++ /dev/null @@ -1,373 +0,0 @@ -/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ -#include -#include - -struct int16_t_comp { - int16_t x; - int32_t i; -}; - -int compare_int16_t( const void * l, const void * r ) -{ - const int16_t * restrict _l= l; - const int16_t * restrict _r= r; - if( *_l > *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct double_comp* A = malloc(isize * sizeof(struct double_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct float_comp* A = malloc(isize * sizeof(struct float_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i> -""" -for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: - print( data.replace("TYPE", typ).replace("_big", "") ) - print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) -#+end_src - -#+NAME: replaced_f -#+begin_src python :results output :noweb yes -data = """ -<> -""" -c1 = { - "int16_t": "i2", - "int32_t": "i", - "int64_t": "i8", - "double": "d", - "float": "" -} -c2 = { - "int16_t": "integer", - "int32_t": "integer", - "int64_t": "integer", - "double": "real", - "float": "real" -} - -for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) -#+end_src - -#+NAME: replaced_f2 -#+begin_src python :results output :noweb yes -data = """ -<> -""" -c1 = { - "int16_t": "i2", - "int32_t": "i", - "int64_t": "i8", - "double": "d", - "float": "" -} -c2 = { - "int16_t": "integer", - "int32_t": "integer", - "int64_t": "integer", - "double": "real", - "float": "real" -} - -for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) -#+end_src - -* Generated C file - -#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes -#include -#include -<> -#+END_SRC - -* Generated Fortran file - -#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes -module qsort_module - use iso_c_binding - - interface - <> - end interface - -end module qsort_module - -<> - -#+END_SRC - diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 deleted file mode 100644 index a72a4f9e..00000000 --- a/src/utils/qsort_module.f90 +++ /dev/null @@ -1,347 +0,0 @@ -module qsort_module - use iso_c_binding - - interface - - subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - end subroutine i2sort_c - - subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - integer (c_int16_t) :: A(isize) - end subroutine i2sort_noidx_c - - - - subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - end subroutine i2sort_big_c - - subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer (c_int16_t) :: A(isize) - end subroutine i2sort_noidx_big_c - - - - subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - end subroutine isort_c - - subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - integer (c_int32_t) :: A(isize) - end subroutine isort_noidx_c - - - - subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - end subroutine isort_big_c - - subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer (c_int32_t) :: A(isize) - end subroutine isort_noidx_big_c - - - - subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - end subroutine i8sort_c - - subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - integer (c_int64_t) :: A(isize) - end subroutine i8sort_noidx_c - - - - subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - end subroutine i8sort_big_c - - subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer (c_int64_t) :: A(isize) - end subroutine i8sort_noidx_big_c - - - - subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - real (c_double) :: A(isize) - end subroutine dsort_c - - subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - real (c_double) :: A(isize) - end subroutine dsort_noidx_c - - - - subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - real (c_double) :: A(isize) - end subroutine dsort_big_c - - subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - real (c_double) :: A(isize) - end subroutine dsort_noidx_big_c - - - - subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - real (c_float) :: A(isize) - end subroutine sort_c - - subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - real (c_float) :: A(isize) - end subroutine sort_noidx_c - - - - subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - real (c_float) :: A(isize) - end subroutine sort_big_c - - subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - real (c_float) :: A(isize) - end subroutine sort_noidx_big_c - - - - end interface - -end module qsort_module - - -subroutine i2sort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - call i2sort_c(A, iorder, isize) -end subroutine i2sort - -subroutine i2sort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - integer (c_int16_t) :: A(isize) - call i2sort_noidx_c(A, isize) -end subroutine i2sort_noidx - - - -subroutine i2sort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - call i2sort_big_c(A, iorder, isize) -end subroutine i2sort_big - -subroutine i2sort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - integer (c_int16_t) :: A(isize) - call i2sort_noidx_big_c(A, isize) -end subroutine i2sort_noidx_big - - - -subroutine isort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - call isort_c(A, iorder, isize) -end subroutine isort - -subroutine isort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - integer (c_int32_t) :: A(isize) - call isort_noidx_c(A, isize) -end subroutine isort_noidx - - - -subroutine isort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - call isort_big_c(A, iorder, isize) -end subroutine isort_big - -subroutine isort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - integer (c_int32_t) :: A(isize) - call isort_noidx_big_c(A, isize) -end subroutine isort_noidx_big - - - -subroutine i8sort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - call i8sort_c(A, iorder, isize) -end subroutine i8sort - -subroutine i8sort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - integer (c_int64_t) :: A(isize) - call i8sort_noidx_c(A, isize) -end subroutine i8sort_noidx - - - -subroutine i8sort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - call i8sort_big_c(A, iorder, isize) -end subroutine i8sort_big - -subroutine i8sort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - integer (c_int64_t) :: A(isize) - call i8sort_noidx_big_c(A, isize) -end subroutine i8sort_noidx_big - - - -subroutine dsort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - real (c_double) :: A(isize) - call dsort_c(A, iorder, isize) -end subroutine dsort - -subroutine dsort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - real (c_double) :: A(isize) - call dsort_noidx_c(A, isize) -end subroutine dsort_noidx - - - -subroutine dsort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - real (c_double) :: A(isize) - call dsort_big_c(A, iorder, isize) -end subroutine dsort_big - -subroutine dsort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - real (c_double) :: A(isize) - call dsort_noidx_big_c(A, isize) -end subroutine dsort_noidx_big - - - -subroutine sort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - real (c_float) :: A(isize) - call sort_c(A, iorder, isize) -end subroutine sort - -subroutine sort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - real (c_float) :: A(isize) - call sort_noidx_c(A, isize) -end subroutine sort_noidx - - - -subroutine sort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - real (c_float) :: A(isize) - call sort_big_c(A, iorder, isize) -end subroutine sort_big - -subroutine sort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - real (c_float) :: A(isize) - call sort_noidx_big_c(A, isize) -end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index 089c3871..ff40263c 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,4 +1,222 @@ BEGIN_TEMPLATE + subroutine insertion_$Xsort (x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize) using the insertion sort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + $type :: xtmp + integer :: i, i0, j, jmax + + do i=2,isize + xtmp = x(i) + i0 = iorder(i) + j=i-1 + do while (j>0) + if ((x(j) <= xtmp)) exit + x(j+1) = x(j) + iorder(j+1) = iorder(j) + j=j-1 + enddo + x(j+1) = xtmp + iorder(j+1) = i0 + enddo + end subroutine insertion_$Xsort + + subroutine quick_$Xsort(x, iorder, isize) + implicit none + BEGIN_DOC + ! Sort array x(isize) using the quicksort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer, external :: omp_get_num_threads + call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) + end + + recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) + implicit none + integer, intent(in) :: isize, first, last, level + integer,intent(inout) :: iorder(isize) + $type, intent(inout) :: x(isize) + $type :: c, tmp + integer :: itmp + integer :: i, j + + if(isize<2)return + + c = x( shiftr(first+last,1) ) + i = first + j = last + do + do while (x(i) < c) + i=i+1 + end do + do while (c < x(j)) + j=j-1 + end do + if (i >= j) exit + tmp = x(i) + x(i) = x(j) + x(j) = tmp + itmp = iorder(i) + iorder(i) = iorder(j) + iorder(j) = itmp + i=i+1 + j=j-1 + enddo + if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then + if (first < i-1) then + call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) + endif + if (j+1 < last) then + call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) + endif + else + if (first < i-1) then + call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) + endif + if (j+1 < last) then + call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) + endif + endif + end + + subroutine heap_$Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize) using the heap sort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + + integer :: i, k, j, l, i0 + $type :: xtemp + + l = isize/2+1 + k = isize + do while (.True.) + if (l>1) then + l=l-1 + xtemp = x(l) + i0 = iorder(l) + else + xtemp = x(k) + i0 = iorder(k) + x(k) = x(1) + iorder(k) = iorder(1) + k = k-1 + if (k == 1) then + x(1) = xtemp + iorder(1) = i0 + exit + endif + endif + i=l + j = shiftl(l,1) + do while (j1) then + l=l-1 + xtemp = x(l) + i0 = iorder(l) + else + xtemp = x(k) + i0 = iorder(k) + x(k) = x(1) + iorder(k) = iorder(1) + k = k-1 + if (k == 1) then + x(1) = xtemp + iorder(1) = i0 + exit + endif + endif + i=l + j = shiftl(l,1) + do while (j0_8) + if (x(j)<=xtmp) exit + x(j+1_8) = x(j) + iorder(j+1_8) = iorder(j) + j = j-1_8 + enddo + x(j+1_8) = xtmp + iorder(j+1_8) = i0 + enddo + + end subroutine insertion_$Xsort_big + subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -90,3 +565,223 @@ SUBST [ X, type ] END_TEMPLATE +BEGIN_TEMPLATE + +recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) + implicit none + + BEGIN_DOC + ! Sort integer array x(isize) using the radix sort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + ! iradix should be -1 in input. + END_DOC + integer*$int_type, intent(in) :: isize + integer*$int_type, intent(inout) :: iorder(isize) + integer*$type, intent(inout) :: x(isize) + integer, intent(in) :: iradix + integer :: iradix_new + integer*$type, allocatable :: x2(:), x1(:) + integer*$type :: i4 ! data type + integer*$int_type, allocatable :: iorder1(:),iorder2(:) + integer*$int_type :: i0, i1, i2, i3, i ! index type + integer*$type :: mask + integer :: err + !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 + + if (isize < 2) then + return + endif + + if (iradix == -1) then ! Sort Positive and negative + + allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays' + stop + endif + + i1=1_$int_type + i2=1_$int_type + do i=1_$int_type,isize + if (x(i) < 0_$type) then + iorder1(i1) = iorder(i) + x1(i1) = -x(i) + i1 = i1+1_$int_type + else + iorder2(i2) = iorder(i) + x2(i2) = x(i) + i2 = i2+1_$int_type + endif + enddo + i1=i1-1_$int_type + i2=i2-1_$int_type + + do i=1_$int_type,i2 + iorder(i1+i) = iorder2(i) + x(i1+i) = x2(i) + enddo + deallocate(x2,iorder2,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x2, iorder2' + stop + endif + + + if (i1 > 1_$int_type) then + call $Xradix_sort$big(x1,iorder1,i1,-2) + do i=1_$int_type,i1 + x(i) = -x1(1_$int_type+i1-i) + iorder(i) = iorder1(1_$int_type+i1-i) + enddo + endif + + if (i2>1_$int_type) then + call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) + endif + + deallocate(x1,iorder1,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x1, iorder1' + stop + endif + return + + else if (iradix == -2) then ! Positive + + ! Find most significant bit + + i0 = 0_$int_type + i4 = maxval(x) + + iradix_new = max($integer_size-1-leadz(i4),1) + mask = ibset(0_$type,iradix_new) + + allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays' + stop + endif + + i1=1_$int_type + i2=1_$int_type + + do i=1_$int_type,isize + if (iand(mask,x(i)) == 0_$type) then + iorder1(i1) = iorder(i) + x1(i1) = x(i) + i1 = i1+1_$int_type + else + iorder2(i2) = iorder(i) + x2(i2) = x(i) + i2 = i2+1_$int_type + endif + enddo + i1=i1-1_$int_type + i2=i2-1_$int_type + + do i=1_$int_type,i1 + iorder(i0+i) = iorder1(i) + x(i0+i) = x1(i) + enddo + i0 = i0+i1 + i3 = i0 + deallocate(x1,iorder1,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x1, iorder1' + stop + endif + + + do i=1_$int_type,i2 + iorder(i0+i) = iorder2(i) + x(i0+i) = x2(i) + enddo + i0 = i0+i2 + deallocate(x2,iorder2,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x2, iorder2' + stop + endif + + + if (i3>1_$int_type) then + call $Xradix_sort$big(x,iorder,i3,iradix_new-1) + endif + + if (isize-i3>1_$int_type) then + call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) + endif + + return + endif + + ASSERT (iradix >= 0) + + if (isize < 48) then + call insertion_$Xsort$big(x,iorder,isize) + return + endif + + + allocate(x2(isize),iorder2(isize),stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays x1, iorder1' + stop + endif + + + mask = ibset(0_$type,iradix) + i0=1_$int_type + i1=1_$int_type + + do i=1_$int_type,isize + if (iand(mask,x(i)) == 0_$type) then + iorder(i0) = iorder(i) + x(i0) = x(i) + i0 = i0+1_$int_type + else + iorder2(i1) = iorder(i) + x2(i1) = x(i) + i1 = i1+1_$int_type + endif + enddo + i0=i0-1_$int_type + i1=i1-1_$int_type + + do i=1_$int_type,i1 + iorder(i0+i) = iorder2(i) + x(i0+i) = x2(i) + enddo + + deallocate(x2,iorder2,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays x2, iorder2' + stop + endif + + + if (iradix == 0) then + return + endif + + + if (i1>1_$int_type) then + call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) + endif + if (i0>1) then + call $Xradix_sort$big(x,iorder,i0,iradix-1) + endif + + end + +SUBST [ X, type, integer_size, is_big, big, int_type ] + i ; 4 ; 32 ; .False. ; ; 4 ;; + i8 ; 8 ; 64 ; .False. ; ; 4 ;; + i2 ; 2 ; 16 ; .False. ; ; 4 ;; + i ; 4 ; 32 ; .True. ; _big ; 8 ;; + i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; +END_TEMPLATE + + +