diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index 4fc9ad6b..3d7fbe50 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -233,9 +233,6 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) double precision :: NAI_pol_mult_erf ints = 0.d0 - if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then - return - endif num_A = ao_nucl(i_ao) power_A(1:3) = ao_power(i_ao,1:3) @@ -260,11 +257,11 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) ! First term = (x-Ax)**(ax+1) - integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in) + integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in) ints(m) += integral * coef ! Second term = Ax * (x-Ax)**(ax) - integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) ints(m) += A_center(m) * integral * coef enddo @@ -274,7 +271,8 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) end subroutine NAI_pol_x_mult_erf_ao ! --- -subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + +subroutine NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) BEGIN_DOC ! @@ -292,20 +290,16 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) implicit none - integer, intent(in) :: i_ao, j_ao, n_points - double precision, intent(in) :: mu_in, C_center(n_points,3) - double precision, intent(out) :: ints(n_points,3) + integer, intent(in) :: i_ao, j_ao, LD_C, LD_ints, n_points + double precision, intent(in) :: mu_in, C_center(LD_C,3) + double precision, intent(out) :: ints(LD_ints,3) integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in integer :: power_xA(3), m, ipoint double precision :: A_center(3), B_center(3), alpha, beta, coef double precision, allocatable :: integral(:) - double precision :: NAI_pol_mult_erf - ints = 0.d0 - if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then - return - endif + ints(1:LD_ints,1:3) = 0.d0 num_A = ao_nucl(i_ao) power_A(1:3) = ao_power(i_ao,1:3) @@ -317,13 +311,15 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) n_pt_in = n_pt_max_integrals allocate(integral(n_points)) + integral = 0.d0 + do i = 1, ao_prim_num(i_ao) alpha = ao_expo_ordered_transp(i,i_ao) do m = 1, 3 - power_xA = power_A ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_A power_xA(m) += 1 do j = 1, ao_prim_num(j_ao) @@ -331,25 +327,494 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) ! First term = (x-Ax)**(ax+1) - call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) - do ipoint=1,n_points + call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) + do ipoint = 1, n_points ints(ipoint,m) += integral(ipoint) * coef enddo ! Second term = Ax * (x-Ax)**(ax) - call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) - do ipoint=1,n_points + call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) + do ipoint = 1, n_points ints(ipoint,m) += A_center(m) * integral(ipoint) * coef enddo enddo enddo enddo + + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_v0 + +! --- + +subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, LD_C, LD_ints, n_points(3) + double precision, intent(in) :: mu_in, C_center(LD_C,3,3) + double precision, intent(out) :: ints(LD_ints,3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, LD_integral + integer :: power_xA(3), m, ipoint, n_points_m + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision, allocatable :: integral(:) + + ints(1:LD_ints,1:3) = 0.d0 + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + LD_integral = max(max(n_points(1), n_points(2)), n_points(3)) + allocate(integral(LD_integral)) + integral = 0.d0 + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do m = 1, 3 + n_points_m = n_points(m) + + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_A + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + call NAI_pol_mult_erf_v( A_center, B_center, power_xA, power_B, alpha, beta & + , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + do ipoint = 1, n_points_m + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_v( A_center, B_center, power_A, power_B, alpha, beta & + , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + do ipoint = 1, n_points_m + ints(ipoint,m) += A_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + deallocate(integral) end subroutine NAI_pol_x_mult_erf_ao_v ! --- + +double precision function NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: mu_in, C_center(3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3) + double precision :: A_center(3), B_center(3), integral, alpha, beta, coef + + double precision :: NAI_pol_mult_erf + + NAI_pol_x_mult_erf_ao_x = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + power_xA = power_A + power_xA(1) += 1 + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in) + NAI_pol_x_mult_erf_ao_x += integral * coef + + ! Second term = Ax * (x-Ax)**(ax) + integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + NAI_pol_x_mult_erf_ao_x += A_center(1) * integral * coef + + enddo + enddo + +end function NAI_pol_x_mult_erf_ao_x + +! --- + +double precision function NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: mu_in, C_center(3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3) + double precision :: A_center(3), B_center(3), integral, alpha, beta, coef + + double precision :: NAI_pol_mult_erf + + NAI_pol_x_mult_erf_ao_y = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + power_xA = power_A + power_xA(2) += 1 + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in) + NAI_pol_x_mult_erf_ao_y += integral * coef + + ! Second term = Ax * (x-Ax)**(ax) + integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + NAI_pol_x_mult_erf_ao_y += A_center(2) * integral * coef + + enddo + enddo + +end function NAI_pol_x_mult_erf_ao_y + +! --- + +double precision function NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: mu_in, C_center(3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3) + double precision :: A_center(3), B_center(3), integral, alpha, beta, coef + + double precision :: NAI_pol_mult_erf + + NAI_pol_x_mult_erf_ao_z = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + power_xA = power_A + power_xA(3) += 1 + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in) + NAI_pol_x_mult_erf_ao_z += integral * coef + + ! Second term = Ax * (x-Ax)**(ax) + integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + NAI_pol_x_mult_erf_ao_z += A_center(3) * integral * coef + + enddo + enddo + +end function NAI_pol_x_mult_erf_ao_z + +! --- + +double precision function NAI_pol_x_mult_erf_ao_with1s_x(i_ao, j_ao, beta, B_center, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3), mu_in, C_center(3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3) + double precision :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi + + double precision, external :: NAI_pol_mult_erf_with1s + double precision, external :: NAI_pol_x_mult_erf_ao_x + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + NAI_pol_x_mult_erf_ao_with1s_x = NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center) + return + endif + + NAI_pol_x_mult_erf_ao_with1s_x = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then + return + endif + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + power_xA = power_Ai + power_xA(1) += 1 + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & + , beta, B_center, C_center, n_pt_in, mu_in ) + NAI_pol_x_mult_erf_ao_with1s_x += integral * coef + + ! Second term = Ax * (x-Ax)**(ax) + integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj & + , beta, B_center, C_center, n_pt_in, mu_in ) + NAI_pol_x_mult_erf_ao_with1s_x += Ai_center(1) * integral * coef + + enddo + enddo + +end function NAI_pol_x_mult_erf_ao_with1s_x + +! --- + +double precision function NAI_pol_x_mult_erf_ao_with1s_y(i_ao, j_ao, beta, B_center, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3), mu_in, C_center(3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3) + double precision :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi + + double precision, external :: NAI_pol_mult_erf_with1s + double precision, external :: NAI_pol_x_mult_erf_ao_y + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + NAI_pol_x_mult_erf_ao_with1s_y = NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center) + return + endif + + NAI_pol_x_mult_erf_ao_with1s_y = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then + return + endif + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + power_xA = power_Ai + power_xA(2) += 1 + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & + , beta, B_center, C_center, n_pt_in, mu_in ) + NAI_pol_x_mult_erf_ao_with1s_y += integral * coef + + ! Second term = Ax * (x-Ax)**(ax) + integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj & + , beta, B_center, C_center, n_pt_in, mu_in ) + NAI_pol_x_mult_erf_ao_with1s_y += Ai_center(2) * integral * coef + + enddo + enddo + +end function NAI_pol_x_mult_erf_ao_with1s_y + +! --- + +double precision function NAI_pol_x_mult_erf_ao_with1s_z(i_ao, j_ao, beta, B_center, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3), mu_in, C_center(3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3) + double precision :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi + + double precision, external :: NAI_pol_mult_erf_with1s + double precision, external :: NAI_pol_x_mult_erf_ao_z + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + NAI_pol_x_mult_erf_ao_with1s_z = NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center) + return + endif + + NAI_pol_x_mult_erf_ao_with1s_z = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then + return + endif + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + power_xA = power_Ai + power_xA(3) += 1 + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & + , beta, B_center, C_center, n_pt_in, mu_in ) + NAI_pol_x_mult_erf_ao_with1s_z += integral * coef + + ! Second term = Ax * (x-Ax)**(ax) + integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj & + , beta, B_center, C_center, n_pt_in, mu_in ) + NAI_pol_x_mult_erf_ao_with1s_z += Ai_center(3) * integral * coef + + enddo + enddo + +end function NAI_pol_x_mult_erf_ao_with1s_z + +! --- + subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints) BEGIN_DOC @@ -384,9 +849,6 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen endif ints = 0.d0 - if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then - return - endif power_Ai(1:3) = ao_power(i_ao,1:3) power_Aj(1:3) = ao_power(j_ao,1:3) @@ -411,13 +873,11 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) ! First term = (x-Ax)**(ax+1) - integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & - , beta, B_center, C_center, n_pt_in, mu_in ) + integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) ints(m) += integral * coef ! Second term = Ax * (x-Ax)**(ax) - integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj & - , beta, B_center, C_center, n_pt_in, mu_in ) + integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) ints(m) += Ai_center(m) * integral * coef enddo @@ -426,9 +886,9 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen end subroutine NAI_pol_x_mult_erf_ao_with1s -!-- +! --- -subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points) +subroutine NAI_pol_x_mult_erf_ao_with1s_v0(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points) BEGIN_DOC ! @@ -446,25 +906,23 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c implicit none - integer, intent(in) :: i_ao, j_ao, n_points - double precision, intent(in) :: beta, B_center(n_points,3), mu_in, C_center(n_points,3) - double precision, intent(out) :: ints(n_points,3) + integer, intent(in) :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points + double precision, intent(in) :: beta, mu_in + double precision, intent(in) :: B_center(LD_B,3), C_center(LD_C,3) + double precision, intent(out) :: ints(LD_ints,3) - integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m - double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi - integer :: ipoint + integer :: ipoint double precision, allocatable :: integral(:) if(beta .lt. 1d-10) then - call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + call NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) return endif - ints(:,:) = 0.d0 - if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then - return - endif + ints(1:LD_ints,1:3) = 0.d0 power_Ai(1:3) = ao_power(i_ao,1:3) power_Aj(1:3) = ao_power(j_ao,1:3) @@ -475,6 +933,8 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c n_pt_in = n_pt_max_integrals allocate(integral(n_points)) + integral = 0.d0 + do i = 1, ao_prim_num(i_ao) alphai = ao_expo_ordered_transp (i,i_ao) coefi = ao_coef_normalized_ordered_transp(i,i_ao) @@ -490,15 +950,17 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) ! First term = (x-Ax)**(ax+1) - call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, & - alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) + do ipoint = 1, n_points ints(ipoint,m) += integral(ipoint) * coef enddo ! Second term = Ax * (x-Ax)**(ax) - call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, & - alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points) do ipoint = 1, n_points ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef enddo @@ -506,10 +968,100 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c enddo enddo enddo + deallocate(integral) -end subroutine NAI_pol_x_mult_erf_ao_with1s +end subroutine NAI_pol_x_mult_erf_ao_with1s_v0 +! --- + +subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points(3) + double precision, intent(in) :: beta, mu_in + double precision, intent(in) :: B_center(LD_B,3,3), C_center(LD_C,3,3) + double precision, intent(out) :: ints(LD_ints,3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + + integer :: ipoint, n_points_m, LD_integral + double precision, allocatable :: integral(:) + + if(beta .lt. 1d-10) then + print *, 'small beta', i_ao, j_ao + call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points) + return + endif + + ints(1:LD_ints,1:3) = 0.d0 + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + LD_integral = max(max(n_points(1), n_points(2)), n_points(3)) + allocate(integral(LD_integral)) + integral = 0.d0 + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do m = 1, 3 + n_points_m = n_points(m) + + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_Ai + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + + do ipoint = 1, n_points_m + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta & + , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m) + do ipoint = 1, n_points_m + ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_with1s_v ! --- diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index facb6264..213a63e4 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -1,4 +1,7 @@ +! --- + subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) + implicit none BEGIN_DOC ! gauss_ints(m) = \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2} @@ -32,6 +35,7 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) enddo enddo enddo + end @@ -152,24 +156,26 @@ end function overlap_gauss_r12_ao ! -- -subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) +subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points) BEGIN_DOC + ! ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2} + ! + ! n_points: nb of integrals <= min(LD_D, LD_resv) + ! END_DOC implicit none - integer, intent(in) :: i, j, n_points - double precision, intent(in) :: D_center(n_points,3), delta - double precision, intent(out) :: resv(n_points) + integer, intent(in) :: i, j, LD_D, LD_resv, n_points + double precision, intent(in) :: D_center(LD_D,3), delta + double precision, intent(out) :: resv(LD_resv) - integer :: power_A(3), power_B(3), l, k - double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1 + integer :: ipoint + integer :: power_A(3), power_B(3), l, k + double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1 double precision, allocatable :: analytical_j(:) - double precision, external :: overlap_gauss_r12 - integer :: ipoint - resv(:) = 0.d0 if(ao_overlap_abs(j,i).lt.1.d-12) then return @@ -182,6 +188,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) B_center(1:3) = nucl_coord(ao_nucl(j),1:3) allocate(analytical_j(n_points)) + do l = 1, ao_prim_num(i) alpha = ao_expo_ordered_transp (l,i) coef1 = ao_coef_normalized_ordered_transp(l,i) @@ -192,15 +199,18 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) if(dabs(coef) .lt. 1d-12) cycle - call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points) - do ipoint=1, n_points - resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint) + call overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points, n_points) + + do ipoint = 1, n_points + resv(ipoint) = resv(ipoint) + coef * analytical_j(ipoint) enddo + enddo enddo + deallocate(analytical_j) -end +end subroutine overlap_gauss_r12_ao_v ! --- @@ -274,7 +284,8 @@ end function overlap_gauss_r12_ao_with1s ! --- -subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points) +subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta, i, j, resv, LD_resv, n_points) + BEGIN_DOC ! ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} @@ -283,18 +294,16 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, END_DOC implicit none - integer, intent(in) :: i, j, n_points - double precision, intent(in) :: B_center(3), beta, D_center(n_points,3), delta - double precision, intent(out) :: resv(n_points) - - integer :: power_A1(3), power_A2(3), l, k - double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 - double precision :: coef12, coef12f - double precision :: gama, gama_inv - double precision :: bg, dg, bdg - - integer :: ipoint + integer, intent(in) :: i, j, n_points, LD_D, LD_resv + double precision, intent(in) :: B_center(3), beta, D_center(LD_D,3), delta + double precision, intent(out) :: resv(LD_resv) + integer :: ipoint + integer :: power_A1(3), power_A2(3), l, k + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 + double precision :: coef12, coef12f + double precision :: gama, gama_inv + double precision :: bg, dg, bdg double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:) if(ao_overlap_abs(j,i) .lt. 1.d-12) then @@ -304,7 +313,9 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, ASSERT(beta .gt. 0.d0) if(beta .lt. 1d-10) then - call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) + + call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points) + return endif @@ -312,8 +323,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2} - gama = beta + delta - gama_inv = 1.d0 / gama + gama = beta + delta + gama_inv = 1.d0 / gama power_A1(1:3) = ao_power(i,1:3) power_A2(1:3) = ao_power(j,1:3) @@ -323,8 +334,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) ) - bg = beta * gama_inv - dg = delta * gama_inv + bg = beta * gama_inv + dg = delta * gama_inv bdg = bg * delta do ipoint=1,n_points G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1) @@ -343,10 +354,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, enddo - ! --- - do l = 1, ao_prim_num(i) - alpha1 = ao_expo_ordered_transp (l,i) + alpha1 = ao_expo_ordered_transp (l,i) coef1 = ao_coef_normalized_ordered_transp(l,i) do k = 1, ao_prim_num(j) @@ -354,19 +363,19 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef12) .lt. 1d-12) cycle - call overlap_gauss_r12_v(G_center, gama, A1_center,& - A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points) + call overlap_gauss_r12_v(G_center, n_points, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points, n_points) - do ipoint=1,n_points + do ipoint = 1, n_points coef12f = coef12 * fact_g(ipoint) resv(ipoint) += coef12f * analytical_j(ipoint) end do enddo enddo - deallocate (fact_g, G_center, analytical_j ) + deallocate(fact_g, G_center, analytical_j) -end +end subroutine overlap_gauss_r12_ao_with1s_v +! --- diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index c9c3b259..39249e0a 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -11,65 +11,72 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n implicit none integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), expo_fit, coef_fit + double precision :: r(3), int_fit, expo_fit, coef_fit double precision :: coef, beta, B_center(3) double precision :: tmp double precision :: wall0, wall1 - double precision, allocatable :: int_fit_v(:) + double precision, external :: overlap_gauss_r12_ao double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points_transp j1b_pen + provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 + int2_grad1u2_grad2u2_j1b2 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points_transp, n_max_fit_slat, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& - !$OMP ao_overlap_abs) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2) + !$OMP DO + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) - allocate(int_fit_v(n_points_final_grid)) - !$OMP DO SCHEDULE(dynamic) - do i = 1, ao_num - do j = i, ao_num + do i = 1, ao_num + do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then - cycle - endif + tmp = 0.d0 + do i_fit = 1, ng_fit_jast - do i_1s = 1, List_all_comb_b3_size + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = coef_gauss_1_erf_x_2(i_fit) - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + ! --- - do i_fit = 1, n_max_fit_slat + int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) + tmp += -0.25d0 * coef_fit * int_fit + if(dabs(int_fit) .lt. 1d-10) cycle - expo_fit = expo_gauss_1_erf_x_2(i_fit) - coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef + ! --- - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & - expo_fit, i, j, int_fit_v, n_points_final_grid) + do i_1s = 2, List_all_comb_b3_size - do ipoint = 1, n_points_final_grid - int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) - enddo + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - enddo + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - enddo - enddo - enddo + tmp += -0.25d0 * coef * coef_fit * int_fit + enddo + + ! --- + + enddo + + int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp + enddo + enddo + enddo !$OMP END DO - deallocate(int_fit_v) !$OMP END PARALLEL do ipoint = 1, n_points_final_grid @@ -83,7 +90,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n call wall_time(wall1) print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -96,60 +103,73 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), expo_fit, coef_fit - double precision :: coef, beta, B_center(3), tmp - double precision :: wall0, wall1 - double precision, allocatable :: int_fit_v(:) + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), int_fit, expo_fit, coef_fit + double precision :: coef, beta, B_center(3), tmp + double precision :: wall0, wall1 - double precision, external :: overlap_gauss_r12_ao_with1s + double precision, external :: overlap_gauss_r12_ao + double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points_transp j1b_pen + provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - int2_u2_j1b2(:,:,:) = 0.d0 + int2_u2_j1b2 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points_transp, n_max_fit_slat, & - !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u2_j1b2) - allocate(int_fit_v(n_points_final_grid)) - !$OMP DO SCHEDULE(dynamic) - do i = 1, ao_num - do j = i, ao_num + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u2_j1b2) + !$OMP DO + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) - do i_1s = 1, List_all_comb_b3_size + do i = 1, ao_num + do j = i, ao_num - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - - do i_fit = 1, n_max_fit_slat + tmp = 0.d0 + do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x_2(i_fit) - coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef + coef_fit = coef_gauss_j_mu_x_2(i_fit) - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & - expo_fit, i, j, int_fit_v, n_points_final_grid) + ! --- - do ipoint = 1, n_points_final_grid - int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) + tmp += coef_fit * int_fit + if(dabs(int_fit) .lt. 1d-10) cycle + + ! --- + + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + + tmp += coef * coef_fit * int_fit enddo + ! --- + enddo + int2_u2_j1b2(j,i,ipoint) = tmp enddo enddo enddo - !$OMP END DO - deallocate(int_fit_v) - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -162,7 +182,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final call wall_time(wall1) print*, ' wall time for int2_u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -175,95 +195,97 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), expo_fit, coef_fit - double precision :: coef, beta, B_center(3) - double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp - double precision :: tmp_x, tmp_y, tmp_z - double precision :: wall0, wall1 - double precision, allocatable :: int_fit_v(:,:), dist(:), centr_1s(:,:) + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), int_fit(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3), dist + double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp + double precision :: tmp_x, tmp_y, tmp_z + double precision :: wall0, wall1 - provide mu_erf final_grid_points_transp j1b_pen + provide mu_erf final_grid_points j1b_pen call wall_time(wall0) - allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3)) + int2_u_grad1u_x_j1b2 = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) + !$OMP DO + do ipoint = 1, n_points_final_grid - r(1) = final_grid_points_transp(ipoint,1) - r(2) = final_grid_points_transp(ipoint,2) - r(3) = final_grid_points_transp(ipoint,3) + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) - dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1)) & - + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) - enddo + do i = 1, ao_num + do j = i, ao_num - int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 + tmp_x = 0.d0 + tmp_y = 0.d0 + tmp_z = 0.d0 + do i_fit = 1, ng_fit_jast - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& - !$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points_transp, n_max_fit_slat, dist, & - !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) - allocate(int_fit_v(n_points_final_grid,3)) + expo_fit = expo_gauss_j_mu_1_erf(i_fit) + coef_fit = coef_gauss_j_mu_1_erf(i_fit) - do i_1s = 1, List_all_comb_b3_size + ! --- - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) + call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit) + tmp_x += coef_fit * int_fit(1) + tmp_y += coef_fit * int_fit(2) + tmp_z += coef_fit * int_fit(3) + if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle - do i_fit = 1, n_max_fit_slat + ! --- - expo_fit = expo_gauss_j_mu_1_erf(i_fit) - coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + + (B_center(2) - r(2)) * (B_center(2) - r(2)) & + + (B_center(3) - r(3)) * (B_center(3) - r(3)) - alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s = beta + expo_fit + alpha_1s_inv = 1.d0 / alpha_1s - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points_transp(ipoint,1) - r(2) = final_grid_points_transp(ipoint,2) - r(3) = final_grid_points_transp(ipoint,3) + centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) + centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) + centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - centr_1s(ipoint,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) - centr_1s(ipoint,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) - centr_1s(ipoint,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - enddo + expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist + coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) + if(dabs(coef_tmp) .lt. 1d-10) cycle + + call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) - expo_coef_1s = beta * expo_fit * alpha_1s_inv - !$OMP BARRIER - !$OMP DO SCHEDULE(dynamic) - do i = 1, ao_num - do j = i, ao_num - call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,& - 1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid) - - do ipoint = 1, n_points_final_grid - coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint)) - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = & - int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1) - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = & - int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2) - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = & - int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3) + tmp_x += coef_tmp * int_fit(1) + tmp_y += coef_tmp * int_fit(2) + tmp_z += coef_tmp * int_fit(3) enddo - enddo - enddo - !$OMP END DO NOWAIT + ! --- + + enddo + + int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x + int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y + int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z + enddo enddo enddo - deallocate(int_fit_v) - !$OMP END PARALLEL - - deallocate(dist) + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -278,7 +300,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p call wall_time(wall1) print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -306,11 +328,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -321,24 +343,33 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points r(3) = final_grid_points(3,ipoint) tmp = 0.d0 - do i_1s = 1, List_all_comb_b3_size + do i_fit = 1, ng_fit_jast - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & - + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) + expo_fit = expo_gauss_j_mu_1_erf(i_fit) + coef_fit = coef_gauss_j_mu_1_erf(i_fit) - do i_fit = 1, n_max_fit_slat + ! --- - expo_fit = expo_gauss_j_mu_1_erf(i_fit) - coef_fit = coef_gauss_j_mu_1_erf(i_fit) + int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r) + if(dabs(int_fit) .lt. 1d-10) cycle + + tmp += coef_fit * int_fit + + ! --- + + do i_1s = 2, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) + dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + + (B_center(2) - r(2)) * (B_center(2) - r(2)) & + + (B_center(3) - r(3)) * (B_center(3) - r(3)) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) @@ -352,6 +383,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points tmp += coef_tmp * int_fit enddo + + ! --- + enddo int2_u_grad1u_j1b2(j,i,ipoint) = tmp @@ -372,7 +406,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points call wall_time(wall1) print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f new file mode 100644 index 00000000..6d3931f5 --- /dev/null +++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f @@ -0,0 +1,453 @@ +! +!! --- +! +!BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)] +! +! BEGIN_DOC +! ! +! ! -\frac{1}{4} int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2 +! ! +! END_DOC +! +! implicit none +! integer :: i, j, ipoint, i_1s, i_fit +! integer :: i_mask_grid +! double precision :: r(3), expo_fit, coef_fit +! double precision :: coef, beta, B_center(3) +! double precision :: wall0, wall1 +! +! integer, allocatable :: n_mask_grid(:) +! double precision, allocatable :: r_mask_grid(:,:) +! double precision, allocatable :: int_fit_v(:) +! +! print*, ' providing int2_grad1u2_grad2u2_j1b2' +! +! provide mu_erf final_grid_points_transp j1b_pen +! call wall_time(wall0) +! +! int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& +! !$OMP coef_fit, expo_fit, int_fit_v, n_mask_grid, & +! !$OMP i_mask_grid, r_mask_grid) & +! !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& +! !$OMP final_grid_points_transp, n_max_fit_slat, & +! !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & +! !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & +! !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2, & +! !$OMP ao_overlap_abs) +! +! allocate(int_fit_v(n_points_final_grid)) +! allocate(n_mask_grid(n_points_final_grid)) +! allocate(r_mask_grid(n_points_final_grid,3)) +! +! !$OMP DO SCHEDULE(dynamic) +! do i = 1, ao_num +! do j = i, ao_num +! +! if(ao_overlap_abs(j,i) .lt. 1.d-12) then +! cycle +! endif +! +! do i_fit = 1, n_max_fit_slat +! +! expo_fit = expo_gauss_1_erf_x_2(i_fit) +! coef_fit = coef_gauss_1_erf_x_2(i_fit) * (-0.25d0) +! +! ! --- +! +! call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid) +! +! i_mask_grid = 0 ! dim +! n_mask_grid = 0 ! ind +! r_mask_grid = 0.d0 ! val +! do ipoint = 1, n_points_final_grid +! +! int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) +! +! if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then +! i_mask_grid += 1 +! n_mask_grid(i_mask_grid ) = ipoint +! r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1) +! r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2) +! r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3) +! endif +! +! enddo +! +! if(i_mask_grid .eq. 0) cycle +! +! ! --- +! +! do i_1s = 2, List_all_comb_b3_size +! +! coef = List_all_comb_b3_coef (i_1s) * coef_fit +! beta = List_all_comb_b3_expo (i_1s) +! B_center(1) = List_all_comb_b3_cent(1,i_1s) +! B_center(2) = List_all_comb_b3_cent(2,i_1s) +! B_center(3) = List_all_comb_b3_cent(3,i_1s) +! +! call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid) +! +! do ipoint = 1, i_mask_grid +! int2_grad1u2_grad2u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint) +! enddo +! +! enddo +! +! ! --- +! +! enddo +! enddo +! enddo +! !$OMP END DO +! +! deallocate(n_mask_grid) +! deallocate(r_mask_grid) +! deallocate(int_fit_v) +! +! !$OMP END PARALLEL +! +! do ipoint = 1, n_points_final_grid +! do i = 2, ao_num +! do j = 1, i-1 +! int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint) +! enddo +! enddo +! enddo +! +! call wall_time(wall1) +! print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 +! +!END_PROVIDER +! +!! --- +! +!BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] +! +! BEGIN_DOC +! ! +! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2 +! ! +! END_DOC +! +! implicit none +! integer :: i, j, ipoint, i_1s, i_fit +! integer :: i_mask_grid +! double precision :: r(3), expo_fit, coef_fit +! double precision :: coef, beta, B_center(3), tmp +! double precision :: wall0, wall1 +! +! integer, allocatable :: n_mask_grid(:) +! double precision, allocatable :: r_mask_grid(:,:) +! double precision, allocatable :: int_fit_v(:) +! +! print*, ' providing int2_u2_j1b2' +! +! provide mu_erf final_grid_points_transp j1b_pen +! call wall_time(wall0) +! +! int2_u2_j1b2(:,:,:) = 0.d0 +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & +! !$OMP coef_fit, expo_fit, int_fit_v, & +! !$OMP i_mask_grid, n_mask_grid, r_mask_grid ) & +! !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & +! !$OMP final_grid_points_transp, n_max_fit_slat, & +! !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & +! !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & +! !$OMP List_all_comb_b3_cent, int2_u2_j1b2) +! +! allocate(n_mask_grid(n_points_final_grid)) +! allocate(r_mask_grid(n_points_final_grid,3)) +! allocate(int_fit_v(n_points_final_grid)) +! +! !$OMP DO SCHEDULE(dynamic) +! do i = 1, ao_num +! do j = i, ao_num +! +! do i_fit = 1, n_max_fit_slat +! +! expo_fit = expo_gauss_j_mu_x_2(i_fit) +! coef_fit = coef_gauss_j_mu_x_2(i_fit) +! +! ! --- +! +! call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid) +! +! i_mask_grid = 0 ! dim +! n_mask_grid = 0 ! ind +! r_mask_grid = 0.d0 ! val +! +! do ipoint = 1, n_points_final_grid +! int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) +! +! if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then +! i_mask_grid += 1 +! n_mask_grid(i_mask_grid ) = ipoint +! r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1) +! r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2) +! r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3) +! endif +! enddo +! +! if(i_mask_grid .eq. 0) cycle +! +! ! --- +! +! do i_1s = 2, List_all_comb_b3_size +! +! coef = List_all_comb_b3_coef (i_1s) * coef_fit +! beta = List_all_comb_b3_expo (i_1s) +! B_center(1) = List_all_comb_b3_cent(1,i_1s) +! B_center(2) = List_all_comb_b3_cent(2,i_1s) +! B_center(3) = List_all_comb_b3_cent(3,i_1s) +! +! call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid) +! +! do ipoint = 1, i_mask_grid +! int2_u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint) +! enddo +! +! enddo +! +! ! --- +! +! enddo +! enddo +! enddo +! !$OMP END DO +! +! deallocate(n_mask_grid) +! deallocate(r_mask_grid) +! deallocate(int_fit_v) +! +! !$OMP END PARALLEL +! +! do ipoint = 1, n_points_final_grid +! do i = 2, ao_num +! do j = 1, i-1 +! int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint) +! enddo +! enddo +! enddo +! +! call wall_time(wall1) +! print*, ' wall time for int2_u2_j1b2', wall1 - wall0 +! +!END_PROVIDER +! +!! --- +! +!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)] +! +! BEGIN_DOC +! ! +! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2 +! ! +! END_DOC +! +! implicit none +! +! integer :: i, j, ipoint, i_1s, i_fit +! integer :: i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid(3) +! double precision :: x, y, z, expo_fit, coef_fit +! double precision :: coef, beta, B_center(3) +! double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s +! double precision :: wall0, wall1 +! +! integer, allocatable :: n_mask_grid(:,:) +! double precision, allocatable :: r_mask_grid(:,:,:) +! double precision, allocatable :: int_fit_v(:,:), dist(:,:), centr_1s(:,:,:) +! +! print*, ' providing int2_u_grad1u_x_j1b2' +! +! provide mu_erf final_grid_points_transp j1b_pen +! call wall_time(wall0) +! +! int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 +! +! !$OMP PARALLEL DEFAULT (NONE) & +! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, x, y, z, coef, beta, & +! !$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, dist, B_center,& +! !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, & +! !$OMP i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid, & +! !$OMP n_mask_grid, r_mask_grid) & +! !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & +! !$OMP final_grid_points_transp, n_max_fit_slat, & +! !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & +! !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & +! !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) +! +! allocate(dist(n_points_final_grid,3)) +! allocate(centr_1s(n_points_final_grid,3,3)) +! allocate(n_mask_grid(n_points_final_grid,3)) +! allocate(r_mask_grid(n_points_final_grid,3,3)) +! allocate(int_fit_v(n_points_final_grid,3)) +! +! !$OMP DO SCHEDULE(dynamic) +! do i = 1, ao_num +! do j = i, ao_num +! do i_fit = 1, n_max_fit_slat +! +! expo_fit = expo_gauss_j_mu_1_erf(i_fit) +! coef_fit = coef_gauss_j_mu_1_erf(i_fit) +! +! ! --- +! +! call NAI_pol_x_mult_erf_ao_with1s_v0(i, j, expo_fit, final_grid_points_transp, n_points_final_grid, 1.d+9, final_grid_points_transp, n_points_final_grid, int_fit_v, n_points_final_grid, n_points_final_grid) +! +! i_mask_grid1 = 0 ! dim +! i_mask_grid2 = 0 ! dim +! i_mask_grid3 = 0 ! dim +! n_mask_grid = 0 ! ind +! r_mask_grid = 0.d0 ! val +! do ipoint = 1, n_points_final_grid +! +! ! --- +! +! int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1) +! +! if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then +! i_mask_grid1 += 1 +! n_mask_grid(i_mask_grid1, 1) = ipoint +! r_mask_grid(i_mask_grid1,1,1) = final_grid_points_transp(ipoint,1) +! r_mask_grid(i_mask_grid1,2,1) = final_grid_points_transp(ipoint,2) +! r_mask_grid(i_mask_grid1,3,1) = final_grid_points_transp(ipoint,3) +! endif +! +! ! --- +! +! int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2) +! +! if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then +! i_mask_grid2 += 1 +! n_mask_grid(i_mask_grid2, 2) = ipoint +! r_mask_grid(i_mask_grid2,1,2) = final_grid_points_transp(ipoint,1) +! r_mask_grid(i_mask_grid2,2,2) = final_grid_points_transp(ipoint,2) +! r_mask_grid(i_mask_grid2,3,2) = final_grid_points_transp(ipoint,3) +! endif +! +! ! --- +! +! int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3) +! +! if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then +! i_mask_grid3 += 1 +! n_mask_grid(i_mask_grid3, 3) = ipoint +! r_mask_grid(i_mask_grid3,1,3) = final_grid_points_transp(ipoint,1) +! r_mask_grid(i_mask_grid3,2,3) = final_grid_points_transp(ipoint,2) +! r_mask_grid(i_mask_grid3,3,3) = final_grid_points_transp(ipoint,3) +! endif +! +! ! --- +! +! enddo +! +! if((i_mask_grid1+i_mask_grid2+i_mask_grid3) .eq. 0) cycle +! +! i_mask_grid(1) = i_mask_grid1 +! i_mask_grid(2) = i_mask_grid2 +! i_mask_grid(3) = i_mask_grid3 +! +! ! --- +! +! do i_1s = 2, List_all_comb_b3_size +! +! coef = List_all_comb_b3_coef (i_1s) * coef_fit +! beta = List_all_comb_b3_expo (i_1s) +! B_center(1) = List_all_comb_b3_cent(1,i_1s) +! B_center(2) = List_all_comb_b3_cent(2,i_1s) +! B_center(3) = List_all_comb_b3_cent(3,i_1s) +! +! alpha_1s = beta + expo_fit +! alpha_1s_inv = 1.d0 / alpha_1s +! expo_coef_1s = beta * expo_fit * alpha_1s_inv +! +! do ipoint = 1, i_mask_grid1 +! +! x = r_mask_grid(ipoint,1,1) +! y = r_mask_grid(ipoint,2,1) +! z = r_mask_grid(ipoint,3,1) +! +! centr_1s(ipoint,1,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x) +! centr_1s(ipoint,2,1) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y) +! centr_1s(ipoint,3,1) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z) +! +! dist(ipoint,1) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z) +! enddo +! +! do ipoint = 1, i_mask_grid2 +! +! x = r_mask_grid(ipoint,1,2) +! y = r_mask_grid(ipoint,2,2) +! z = r_mask_grid(ipoint,3,2) +! +! centr_1s(ipoint,1,2) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x) +! centr_1s(ipoint,2,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y) +! centr_1s(ipoint,3,2) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z) +! +! dist(ipoint,2) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z) +! enddo +! +! do ipoint = 1, i_mask_grid3 +! +! x = r_mask_grid(ipoint,1,3) +! y = r_mask_grid(ipoint,2,3) +! z = r_mask_grid(ipoint,3,3) +! +! centr_1s(ipoint,1,3) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x) +! centr_1s(ipoint,2,3) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y) +! centr_1s(ipoint,3,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z) +! +! dist(ipoint,3) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z) +! enddo +! +! call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid) +! +! do ipoint = 1, i_mask_grid1 +! int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1) +! enddo +! +! do ipoint = 1, i_mask_grid2 +! int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2) +! enddo +! +! do ipoint = 1, i_mask_grid3 +! int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3) +! enddo +! +! enddo +! +! ! --- +! +! enddo +! enddo +! enddo +! !$OMP END DO +! +! deallocate(dist) +! deallocate(centr_1s) +! deallocate(n_mask_grid) +! deallocate(r_mask_grid) +! deallocate(int_fit_v) +! +! !$OMP END PARALLEL +! +! do ipoint = 1, n_points_final_grid +! do i = 2, ao_num +! do j = 1, i-1 +! int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint) +! int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint) +! int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint) +! enddo +! enddo +! enddo +! +! call wall_time(wall1) +! print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 +! +!END_PROVIDER +! diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 552e7069..6a662533 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -38,7 +38,24 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po do j = i, ao_num tmp = 0.d0 - do i_1s = 1, List_all_comb_b2_size + + ! --- + + coef = List_all_comb_b2_coef (1) + beta = List_all_comb_b2_expo (1) + B_center(1) = List_all_comb_b2_cent(1,1) + B_center(2) = List_all_comb_b2_cent(2,1) + B_center(3) = List_all_comb_b2_cent(3,1) + + int_mu = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r) + int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r) + if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle + + tmp += coef * (int_mu - int_coulomb) + + ! --- + + do i_1s = 2, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) beta = List_all_comb_b2_expo (i_1s) @@ -52,6 +69,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po tmp += coef * (int_mu - int_coulomb) enddo + ! --- + v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = tmp enddo enddo @@ -138,7 +157,27 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_ tmp_x = 0.d0 tmp_y = 0.d0 tmp_z = 0.d0 - do i_1s = 1, List_all_comb_b2_size + + ! --- + + coef = List_all_comb_b2_coef (1) + beta = List_all_comb_b2_expo (1) + B_center(1) = List_all_comb_b2_cent(1,1) + B_center(2) = List_all_comb_b2_cent(2,1) + B_center(3) = List_all_comb_b2_cent(3,1) + + call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) + call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) + + if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle + + tmp_x += coef * (ints(1) - ints_coulomb(1)) + tmp_y += coef * (ints(2) - ints_coulomb(2)) + tmp_z += coef * (ints(3) - ints_coulomb(3)) + + ! --- + + do i_1s = 2, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) beta = List_all_comb_b2_expo (i_1s) @@ -154,6 +193,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_ tmp_z += coef * (ints(3) - ints_coulomb(3)) enddo + ! --- + x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z @@ -207,7 +248,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & - !$OMP final_grid_points, n_max_fit_slat, & + !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b) @@ -222,22 +263,41 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ do j = i, ao_num tmp = 0.d0 - do i_1s = 1, List_all_comb_b2_size + do i_fit = 1, ng_fit_jast - coef = List_all_comb_b2_coef (i_1s) - beta = List_all_comb_b2_expo (i_1s) - B_center(1) = List_all_comb_b2_cent(1,i_1s) - B_center(2) = List_all_comb_b2_cent(2,i_1s) - B_center(3) = List_all_comb_b2_cent(3,i_1s) + expo_fit = expo_gauss_j_mu_x(i_fit) + coef_fit = coef_gauss_j_mu_x(i_fit) - do i_fit = 1, n_max_fit_slat + ! --- - expo_fit = expo_gauss_j_mu_x(i_fit) - coef_fit = coef_gauss_j_mu_x(i_fit) - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + coef = List_all_comb_b2_coef (1) + beta = List_all_comb_b2_expo (1) + B_center(1) = List_all_comb_b2_cent(1,1) + B_center(2) = List_all_comb_b2_cent(2,1) + B_center(3) = List_all_comb_b2_cent(3,1) + + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) + if(dabs(int_fit) .lt. 1d-10) cycle + + tmp += coef * coef_fit * int_fit + + ! --- + + do i_1s = 2, List_all_comb_b2_size + + coef = List_all_comb_b2_coef (i_1s) + beta = List_all_comb_b2_expo (i_1s) + B_center(1) = List_all_comb_b2_cent(1,i_1s) + B_center(2) = List_all_comb_b2_cent(2,i_1s) + B_center(3) = List_all_comb_b2_cent(3,i_1s) + + int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo + + ! --- + enddo v_ij_u_cst_mu_j1b(j,i,ipoint) = tmp diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 32308f59..c41b312d 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -168,7 +168,6 @@ END_PROVIDER do j = 1, nucl_num tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) -! print*,List_all_comb_b3(j,i),j1b_pen(j) List_all_comb_b3_expo(i) += tmp_alphaj List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1) List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2) @@ -220,6 +219,10 @@ END_PROVIDER List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) enddo + print *, ' 1st coeff & expo of lists' + print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1) + print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1) + END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 1638aa9e..cfdaf95f 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -56,79 +56,83 @@ end !--- -subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) +! TODO apply Gaussian product three times first +subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, rvec, LD_rvec, n_points) + BEGIN_DOC + ! ! Computes the following integral : ! - ! .. math :: - ! - ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \int dr exp(-delta (r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2) ! using an array of D_centers ! + ! n_points: nb of integrals + ! END_DOC implicit none + include 'constants.include.F' - integer, intent(in) :: n_points - double precision, intent(in) :: D_center(n_points,3), delta ! pure gaussian "D" - double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" - integer, intent(in) :: power_A(3),power_B(3) - double precision, intent(out) :: rvec(n_points) - double precision, allocatable :: overlap(:) - double precision :: overlap_x, overlap_y, overlap_z + integer, intent(in) :: LD_D, LD_rvec, n_points + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: D_center(LD_D,3), delta + double precision, intent(in) :: A_center(3), B_center(3), alpha, beta + double precision, intent(out) :: rvec(LD_rvec) - integer :: maxab - integer, allocatable :: iorder_a_new(:) - double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) - double precision, allocatable :: fact_a_new(:) - double precision :: alpha_new - double precision :: accu,thr, coefxy - integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint + integer :: maxab + integer :: d(3), i, lx, ly, lz, iorder_tmp(3), ipoint + double precision :: overlap_x, overlap_y, overlap_z + double precision :: alpha_new + double precision :: accu, thr, coefxy + integer, allocatable :: iorder_a_new(:) + double precision, allocatable :: overlap(:) + double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) + double precision, allocatable :: fact_a_new(:) - dim1=100 - thr = 1.d-10 + thr = 1.d-10 d(:) = 0 maxab = maxval(power_A(1:3)) - allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), & - fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) + allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points)) - call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & - alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & - D_center, A_center, n_points) + call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & + alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, & + D_center, LD_D, A_center, n_points) - do ipoint=1,n_points - rvec(ipoint) = 0.d0 - enddo + rvec(:) = 0.d0 do lx = 0, iorder_a_new(1) iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) iorder_tmp(3) = lz - call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) - do ipoint=1,n_points - rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * & - A_new(ipoint,ly,2) * & - A_new(ipoint,lz,3) * overlap(ipoint) + + call overlap_gaussian_xyz_v(A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B, overlap, n_points) + + do ipoint = 1, n_points + rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * A_new(ipoint,ly,2) * A_new(ipoint,lz,3) * overlap(ipoint) enddo enddo enddo enddo - do ipoint=1,n_points + do ipoint = 1, n_points rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint) enddo + deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap) -end -!--- +end subroutine overlap_gauss_r12_v + !--- -subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints) +subroutine overlap_gauss_xyz_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, gauss_ints) + BEGIN_DOC ! Computes the following integral : ! diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f index 96625df5..dddf98d4 100644 --- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f @@ -180,8 +180,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, enddo ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) p_new = p_new * p_new - call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center & - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) + call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) if(n_pt_out < 0) then NAI_pol_mult_erf = 0.d0 @@ -198,7 +197,8 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, end function NAI_pol_mult_erf ! --- -subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points) + +subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) BEGIN_DOC ! @@ -214,74 +214,90 @@ subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, include 'utils/constants.include.F' implicit none - integer, intent(in) :: n_pt_in, n_points - integer, intent(in) :: power_A(3), power_B(3) - double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in - double precision, intent(out) :: res_v(n_points) - integer :: i, n_pt, n_pt_out, ipoint - double precision :: P_center(3) - double precision :: d(0:n_pt_in), coeff, dist, const, factor - double precision :: const_factor, dist_integral - double precision :: accu, p_inv, p, rho, p_inv_2 - double precision :: p_new + integer, intent(in) :: n_pt_in, n_points, LD_C, LD_resv + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), alpha, beta, mu_in + double precision, intent(in) :: C_center(LD_C,3) + double precision, intent(out) :: res_v(LD_resv) - double precision :: rint + integer :: i, n_pt, n_pt_out, ipoint + double precision :: P_center(3) + double precision :: d(0:n_pt_in), coeff, dist, const, factor + double precision :: const_factor, dist_integral + double precision :: accu, p_inv, p, rho, p_inv_2 + double precision :: p_new, p_new2, coef_tmp - p = alpha + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha * beta * p_inv - p_new = mu_in / dsqrt(p + mu_in * mu_in) + double precision :: rint - dist = 0.d0 + res_V(1:LD_resv) = 0.d0 + + p = alpha + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + coef_tmp = p * p_new2 + + dist = 0.d0 do i = 1, 3 - P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv - dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) enddo - do ipoint=1,n_points - dist_integral = 0.d0 - do i = 1, 3 - dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) - enddo - const_factor = dist * rho - if(const_factor > 80.d0) then - res_V(ipoint) = 0.d0 - cycle - endif + const_factor = dist * rho + if(const_factor > 80.d0) then + return + endif + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new + n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) + + if(n_pt == 0) then + + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral - n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) - const = p * dist_integral * p_new * p_new - if(n_pt == 0) then res_v(ipoint) = coeff * rint(0, const) - cycle - endif - - do i = 0, n_pt_in - d(i) = 0.d0 enddo - p_new = p_new * p_new - call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)& - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) - if(n_pt_out < 0) then - res_v(ipoint) = 0.d0 - cycle - endif + else - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + + res_v(ipoint) = accu * coeff enddo - res_v(ipoint) = accu * coeff - enddo -end + endif + +end subroutine NAI_pol_mult_erf_v ! --- @@ -380,9 +396,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A d(i) = 0.d0 enddo p_new = p_new * p_new - - call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center & - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) + call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) if(n_pt_out < 0) then NAI_pol_mult_erf_with1s = 0.d0 @@ -398,10 +412,9 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A end function NAI_pol_mult_erf_with1s -!-- +! --- -subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2& - , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points) +subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) BEGIN_DOC ! @@ -420,23 +433,26 @@ subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, include 'utils/constants.include.F' implicit none - integer, intent(in) :: n_pt_in, n_points - integer, intent(in) :: power_A1(3), power_A2(3) - double precision, intent(in) :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3) - double precision, intent(in) :: alpha1, alpha2, beta, mu_in - double precision, intent(out) :: res_v(n_points) + integer, intent(in) :: n_pt_in, LD_B, LD_C, LD_resv, n_points + integer, intent(in) :: power_A1(3), power_A2(3) + double precision, intent(in) :: A1_center(3), A2_center(3) + double precision, intent(in) :: C_center(LD_C,3), B_center(LD_B,3) + double precision, intent(in) :: alpha1, alpha2, beta, mu_in + double precision, intent(out) :: res_v(LD_resv) - integer :: i, n_pt, n_pt_out, ipoint - double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 - double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor - double precision :: dist_integral - double precision :: d(0:n_pt_in), coeff, const, factor - double precision :: accu - double precision :: p_new, p_new2 + integer :: i, n_pt, n_pt_out, ipoint + double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 + double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor + double precision :: dist_integral + double precision :: d(0:n_pt_in), coeff, const, factor + double precision :: accu + double precision :: p_new, p_new2, coef_tmp, cons_tmp - double precision :: rint + double precision :: rint + res_V(1:LD_resv) = 0.d0 + ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} alpha12 = alpha1 + alpha2 alpha12_inv = 1.d0 / alpha12 @@ -446,87 +462,92 @@ subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))& - + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))& - + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) + + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))& + + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) const_factor12 = dist12 * rho12 - if(const_factor12 > 80.d0) then - res_v(:) = 0.d0 return endif - ! --- - ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} - p = alpha12 + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha12 * beta * p_inv - p_new = mu_in / dsqrt(p + mu_in * mu_in) - p_new2 = p_new * p_new - n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) & - + power_A1(3) + power_A2(3) ) + p = alpha12 + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha12 * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + coef_tmp = dtwo_pi * p_inv * p_new + cons_tmp = p * p_new2 + n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) + power_A1(3) + power_A2(3) ) - do ipoint=1,n_points + if(n_pt == 0) then - P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv - P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv - P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv - dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& - + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& - + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + do ipoint = 1, n_points - const_factor = const_factor12 + dist * rho - if(const_factor > 80.d0) then - res_v(ipoint) = 0.d0 - cycle - endif + dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& + + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& + + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) cycle + coeff = coef_tmp * dexp(-const_factor) - dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& - + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& - + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv + dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& + + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& + + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + const = cons_tmp * dist_integral - ! --- - - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new - - const = p * dist_integral * p_new2 - if(n_pt == 0) then res_v(ipoint) = coeff * rint(0, const) - cycle - endif - - do i = 0, n_pt_in - d(i) = 0.d0 enddo - !TODO: VECTORIZE HERE - call give_polynomial_mult_center_one_e_erf_opt( & - A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)& - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1) + else - if(n_pt_out < 0) then - res_v(ipoint) = 0.d0 - cycle - endif - - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) + do ipoint = 1, n_points + + dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& + + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& + + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) cycle + coeff = coef_tmp * dexp(-const_factor) + + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv + dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& + + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& + + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + const = cons_tmp * dist_integral + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + !TODO: VECTORIZE HERE + call give_polynomial_mult_center_one_e_erf_opt(A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) + + if(n_pt_out < 0) then + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + + res_v(ipoint) = accu * coeff enddo - res_v(ipoint) = accu * coeff - end do -end + endif + +end subroutine NAI_pol_mult_erf_with1s_v -! --- ! --- -subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center & - , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) +subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) BEGIN_DOC ! Returns the explicit polynomial in terms of the $t$ variable of the @@ -554,17 +575,17 @@ subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_ accu = 0.d0 ASSERT (n_pt_in > 1) - R1x(0) = (P_center(1) - A_center(1)) - R1x(1) = 0.d0 - R1x(2) = -(P_center(1) - C_center(1))* p_new + R1x(0) = (P_center(1) - A_center(1)) + R1x(1) = 0.d0 + R1x(2) = -(P_center(1) - C_center(1))* p_new ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2 - R1xp(0) = (P_center(1) - B_center(1)) - R1xp(1) = 0.d0 - R1xp(2) =-(P_center(1) - C_center(1))* p_new + R1xp(0) = (P_center(1) - B_center(1)) + R1xp(1) = 0.d0 + R1xp(2) =-(P_center(1) - C_center(1))* p_new !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2 - R2x(0) = p_inv_2 - R2x(1) = 0.d0 - R2x(2) = -p_inv_2 * p_new + R2x(0) = p_inv_2 + R2x(1) = 0.d0 + R2x(2) = -p_inv_2 * p_new !R2x = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2 do i = 0, n_pt_in @@ -588,13 +609,13 @@ subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_ return endif - R1x(0) = (P_center(2) - A_center(2)) - R1x(1) = 0.d0 - R1x(2) = -(P_center(2) - C_center(2))* p_new + R1x(0) = (P_center(2) - A_center(2)) + R1x(1) = 0.d0 + R1x(2) = -(P_center(2) - C_center(2))* p_new ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2 - R1xp(0) = (P_center(2) - B_center(2)) - R1xp(1) = 0.d0 - R1xp(2) =-(P_center(2) - C_center(2))* p_new + R1xp(0) = (P_center(2) - B_center(2)) + R1xp(1) = 0.d0 + R1xp(2) =-(P_center(2) - C_center(2))* p_new !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2 a_y = power_A(2) b_y = power_B(2) @@ -607,13 +628,13 @@ subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_ return endif - R1x(0) = (P_center(3) - A_center(3)) - R1x(1) = 0.d0 - R1x(2) = -(P_center(3) - C_center(3)) * p_new + R1x(0) = (P_center(3) - A_center(3)) + R1x(1) = 0.d0 + R1x(2) = -(P_center(3) - C_center(3)) * p_new ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2 - R1xp(0) = (P_center(3) - B_center(3)) - R1xp(1) = 0.d0 - R1xp(2) =-(P_center(3) - C_center(3)) * p_new + R1xp(0) = (P_center(3) - B_center(3)) + R1xp(1) = 0.d0 + R1xp(2) =-(P_center(3) - C_center(3)) * p_new !R2x = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2 a_z = power_A(3) b_z = power_B(3) @@ -642,16 +663,15 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt ! --- +subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) - -subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,& - power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) BEGIN_DOC ! Returns the explicit polynomial in terms of the $t$ variable of the ! following polynomial: ! ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$. END_DOC + implicit none integer, intent(in) :: n_pt_in integer,intent(out) :: n_pt_out diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f index d53209d0..8fad9079 100644 --- a/src/ao_tc_eff_map/fit_j.irp.f +++ b/src/ao_tc_eff_map/fit_j.irp.f @@ -1,3 +1,5 @@ +! --- + BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ] implicit none BEGIN_DOC @@ -14,8 +16,8 @@ END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x, (n_max_fit_slat)] -&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x, (n_max_fit_slat)] + BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x, (ng_fit_jast)] +&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x, (ng_fit_jast)] BEGIN_DOC ! @@ -34,31 +36,96 @@ END_PROVIDER implicit none integer :: i double precision :: tmp - double precision :: expos(n_max_fit_slat), alpha, beta + double precision :: expos(ng_fit_jast), alpha, beta - tmp = -0.5d0 / (mu_erf * sqrt(dacos(-1.d0))) + if(ng_fit_jast .eq. 1) then - alpha = expo_j_xmu(1) * mu_erf - call expo_fit_slater_gam(alpha, expos) - beta = expo_j_xmu(2) * mu_erf * mu_erf + coef_gauss_j_mu_x = (/ -0.47947881d0 /) + expo_gauss_j_mu_x = (/ 3.4987848d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + elseif(ng_fit_jast .eq. 2) then + + coef_gauss_j_mu_x = (/ -0.18390742d0, -0.35512656d0 /) + expo_gauss_j_mu_x = (/ 31.9279947d0 , 2.11428789d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + elseif(ng_fit_jast .eq. 3) then + + coef_gauss_j_mu_x = (/ -0.07501725d0, -0.28499012d0, -0.1953932d0 /) + expo_gauss_j_mu_x = (/ 206.74058566d0, 1.72974157d0, 11.18735164d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + elseif(ng_fit_jast .eq. 5) then + + coef_gauss_j_mu_x = (/ -0.01832955d0 , -0.10188952d0 , -0.20710858d0 , -0.18975032d0 , -0.04641657d0 /) + expo_gauss_j_mu_x = (/ 4.33116687d+03, 2.61292842d+01, 1.43447161d+00, 4.92767426d+00, 2.10654699d+02 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + elseif(ng_fit_jast .eq. 6) then + + coef_gauss_j_mu_x = (/ -0.08783664d0 , -0.16088711d0 , -0.18464486d0 , -0.0368509d0 , -0.08130028d0 , -0.0126972d0 /) + expo_gauss_j_mu_x = (/ 4.09729729d+01, 7.11620618d+00, 2.03692338d+00, 4.10831731d+02, 1.12480198d+00, 1.00000000d+04 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i) + enddo + + elseif(ng_fit_jast .eq. 20) then + + ASSERT(n_max_fit_slat == 20) + + alpha = expo_j_xmu(1) * mu_erf + call expo_fit_slater_gam(alpha, expos) + beta = expo_j_xmu(2) * mu_erf * mu_erf + + tmp = -1.0d0 / sqrt(dacos(-1.d0)) + do i = 1, ng_fit_jast + expo_gauss_j_mu_x(i) = expos(i) + beta + coef_gauss_j_mu_x(i) = tmp * coef_fit_slat_gauss(i) + enddo + + else + + print *, ' not implemented yet' + stop - do i = 1, n_max_fit_slat - expo_gauss_j_mu_x(i) = expos(i) + beta - coef_gauss_j_mu_x(i) = tmp * coef_fit_slat_gauss(i) + endif + + tmp = 0.5d0 / mu_erf + do i = 1, ng_fit_jast + coef_gauss_j_mu_x(i) = tmp * coef_gauss_j_mu_x(i) enddo END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x_2, (n_max_fit_slat)] -&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x_2, (n_max_fit_slat)] + BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x_2, (ng_fit_jast)] +&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x_2, (ng_fit_jast)] BEGIN_DOC ! ! J(mu,r12)^2 = 0.25/mu^2 F(r12*mu)^2 ! - ! F(x)^2 = 1 /pi * exp(-2 * alpha * x) exp(-2 * beta * x^2) + ! F(x)^2 = 1/pi * exp(-2 * alpha * x) exp(-2 * beta * x^2) ! ! The slater function exp(-2 * alpha * x) is fitted with n_max_fit_slat gaussians ! @@ -69,33 +136,98 @@ END_PROVIDER implicit none integer :: i double precision :: tmp - double precision :: expos(n_max_fit_slat), alpha, beta + double precision :: expos(ng_fit_jast), alpha, beta double precision :: alpha_opt, beta_opt - !alpha_opt = 2.d0 * expo_j_xmu(1) - !beta_opt = 2.d0 * expo_j_xmu(2) - - ! direct opt - alpha_opt = 3.52751759d0 - beta_opt = 1.26214809d0 + if(ng_fit_jast .eq. 1) then - tmp = 0.25d0 / (mu_erf * mu_erf * dacos(-1.d0)) + coef_gauss_j_mu_x_2 = (/ 0.26699573d0 /) + expo_gauss_j_mu_x_2 = (/ 11.71029824d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo - alpha = alpha_opt * mu_erf - call expo_fit_slater_gam(alpha, expos) - beta = beta_opt * mu_erf * mu_erf + elseif(ng_fit_jast .eq. 2) then + + coef_gauss_j_mu_x_2 = (/ 0.11627934d0 , 0.18708824d0 /) + expo_gauss_j_mu_x_2 = (/ 102.41386863d0, 6.36239771d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 3) then + + coef_gauss_j_mu_x_2 = (/ 0.04947216d0 , 0.14116238d0, 0.12276501d0 /) + expo_gauss_j_mu_x_2 = (/ 635.29701766d0, 4.87696954d0, 33.36745891d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 5) then + + coef_gauss_j_mu_x_2 = (/ 0.01461527d0 , 0.03257147d0 , 0.08831354d0 , 0.11411794d0 , 0.06858783d0 /) + expo_gauss_j_mu_x_2 = (/ 8.76554470d+03, 4.90224577d+02, 3.68267125d+00, 1.29663940d+01, 6.58240931d+01 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 6) then + + coef_gauss_j_mu_x_2 = (/ 0.01347632d0 , 0.03929124d0 , 0.06289468d0 , 0.10702493d0 , 0.06999865d0 , 0.02558191d0 /) + expo_gauss_j_mu_x_2 = (/ 1.00000000d+04, 1.20900717d+02, 3.20346191d+00, 8.92157196d+00, 3.28119120d+01, 6.49045808d+02 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 20) then + + ASSERT(n_max_fit_slat == 20) + + !alpha_opt = 2.d0 * expo_j_xmu(1) + !beta_opt = 2.d0 * expo_j_xmu(2) + + ! direct opt + alpha_opt = 3.52751759d0 + beta_opt = 1.26214809d0 - do i = 1, n_max_fit_slat - expo_gauss_j_mu_x_2(i) = expos(i) + beta - coef_gauss_j_mu_x_2(i) = tmp * coef_fit_slat_gauss(i) + alpha = alpha_opt * mu_erf + call expo_fit_slater_gam(alpha, expos) + beta = beta_opt * mu_erf * mu_erf + + tmp = 1.d0 / dacos(-1.d0) + do i = 1, ng_fit_jast + expo_gauss_j_mu_x_2(i) = expos(i) + beta + coef_gauss_j_mu_x_2(i) = tmp * coef_fit_slat_gauss(i) + enddo + + else + + print *, ' not implemented yet' + stop + + endif + + tmp = 0.25d0 / (mu_erf * mu_erf) + do i = 1, ng_fit_jast + coef_gauss_j_mu_x_2(i) = tmp * coef_gauss_j_mu_x_2(i) enddo END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, expo_gauss_j_mu_1_erf, (n_max_fit_slat)] -&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_1_erf, (n_max_fit_slat)] + BEGIN_PROVIDER [double precision, expo_gauss_j_mu_1_erf, (ng_fit_jast)] +&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_1_erf, (ng_fit_jast)] BEGIN_DOC ! @@ -108,25 +240,90 @@ END_PROVIDER implicit none integer :: i double precision :: tmp - double precision :: expos(n_max_fit_slat), alpha, beta + double precision :: expos(ng_fit_jast), alpha, beta double precision :: alpha_opt, beta_opt - !alpha_opt = expo_j_xmu(1) + expo_gauss_1_erf_x(1) - !beta_opt = expo_j_xmu(2) + expo_gauss_1_erf_x(2) - - ! direct opt - alpha_opt = 2.87875632d0 - beta_opt = 1.34801003d0 + if(ng_fit_jast .eq. 1) then - tmp = -0.25d0 / (mu_erf * dsqrt(dacos(-1.d0))) + coef_gauss_j_mu_1_erf = (/ -0.47742461d0 /) + expo_gauss_j_mu_1_erf = (/ 8.72255696d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo - alpha = alpha_opt * mu_erf - call expo_fit_slater_gam(alpha, expos) - beta = beta_opt * mu_erf * mu_erf + elseif(ng_fit_jast .eq. 2) then + + coef_gauss_j_mu_1_erf = (/ -0.19342649d0, -0.34563835d0 /) + expo_gauss_j_mu_1_erf = (/ 78.66099999d0, 5.04324363d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo + + elseif(ng_fit_jast .eq. 3) then + + coef_gauss_j_mu_1_erf = (/ -0.0802541d0 , -0.27019258d0, -0.20546681d0 /) + expo_gauss_j_mu_1_erf = (/ 504.53350764d0, 4.01408169d0, 26.5758329d0 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo + + elseif(ng_fit_jast .eq. 5) then + + coef_gauss_j_mu_1_erf = (/ -0.02330531d0 , -0.11888176d0 , -0.16476192d0 , -0.19874713d0 , -0.05889174d0 /) + expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 4.66067922d+01, 3.04359857d+00, 9.54726649d+00, 3.59796835d+02 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo + + elseif(ng_fit_jast .eq. 6) then + + coef_gauss_j_mu_1_erf = (/ -0.01865654d0 , -0.18319251d0 , -0.06543196d0 , -0.11522778d0 , -0.14825793d0 , -0.03327101d0 /) + expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 8.05593848d+00, 1.27986190d+02, 2.92674319d+01, 2.93583623d+00, 7.65609148d+02 /) + + tmp = mu_erf * mu_erf + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i) + enddo + + elseif(ng_fit_jast .eq. 20) then + + ASSERT(n_max_fit_slat == 20) + + !alpha_opt = expo_j_xmu(1) + expo_gauss_1_erf_x(1) + !beta_opt = expo_j_xmu(2) + expo_gauss_1_erf_x(2) + + ! direct opt + alpha_opt = 2.87875632d0 + beta_opt = 1.34801003d0 + + alpha = alpha_opt * mu_erf + call expo_fit_slater_gam(alpha, expos) + beta = beta_opt * mu_erf * mu_erf + + tmp = -1.d0 / dsqrt(dacos(-1.d0)) + do i = 1, ng_fit_jast + expo_gauss_j_mu_1_erf(i) = expos(i) + beta + coef_gauss_j_mu_1_erf(i) = tmp * coef_fit_slat_gauss(i) + enddo + + else + + print *, ' not implemented yet' + stop - do i = 1, n_max_fit_slat - expo_gauss_j_mu_1_erf(i) = expos(i) + beta - coef_gauss_j_mu_1_erf(i) = tmp * coef_fit_slat_gauss(i) + endif + + tmp = 0.25d0 / mu_erf + do i = 1, ng_fit_jast + coef_gauss_j_mu_1_erf(i) = tmp * coef_gauss_j_mu_1_erf(i) enddo END_PROVIDER diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f index d8b3c442..67d572e5 100644 --- a/src/ao_tc_eff_map/potential.irp.f +++ b/src/ao_tc_eff_map/potential.irp.f @@ -142,27 +142,96 @@ double precision function fit_1_erf_x(x) end - BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x_2, (n_max_fit_slat)] -&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x_2, (n_max_fit_slat)] - implicit none - BEGIN_DOC -! (1 - erf(mu*x))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * x^2) -! -! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2) -! -! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians - END_DOC - integer :: i - double precision :: expos(n_max_fit_slat),alpha,beta - alpha = 2.d0 * expos_slat_gauss_1_erf_x(1) * mu_erf - call expo_fit_slater_gam(alpha,expos) - beta = 2.d0 * expos_slat_gauss_1_erf_x(2) * mu_erf**2.d0 - do i = 1, n_max_fit_slat - expo_gauss_1_erf_x_2(i) = expos(i) + beta - coef_gauss_1_erf_x_2(i) = coef_fit_slat_gauss(i) - enddo +! --- + + BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x_2, (ng_fit_jast)] +&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x_2, (ng_fit_jast)] + + BEGIN_DOC + ! (1 - erf(mu*x))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * x^2) + ! + ! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2) + ! + ! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians + END_DOC + + implicit none + integer :: i + double precision :: expos(ng_fit_jast), alpha, beta, tmp + + if(ng_fit_jast .eq. 1) then + + coef_gauss_1_erf_x_2 = (/ 0.85345277d0 /) + expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /) + + tmp = mu_erf * mu_erf + do i = 1, n_max_fit_slat + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 2) then + + coef_gauss_1_erf_x_2 = (/ 0.31030624d0 , 0.64364964d0 /) + expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /) + + tmp = mu_erf * mu_erf + do i = 1, n_max_fit_slat + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 3) then + + coef_gauss_1_erf_x_2 = (/ 0.33206082d0 , 0.52347449d0, 0.12605012d0 /) + expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /) + + tmp = mu_erf * mu_erf + do i = 1, n_max_fit_slat + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 5) then + + coef_gauss_1_erf_x_2 = (/ 0.02956716d0, 0.17025555d0, 0.32774114d0, 0.39034764d0, 0.07822781d0 /) + expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /) + + tmp = mu_erf * mu_erf + do i = 1, n_max_fit_slat + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 6) then + + coef_gauss_1_erf_x_2 = (/ 0.18331042d0 , 0.10971118d0 , 0.29949169d0 , 0.34853132d0 , 0.0394275d0 , 0.01874444d0 /) + expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /) + + tmp = mu_erf * mu_erf + do i = 1, n_max_fit_slat + expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i) + enddo + + elseif(ng_fit_jast .eq. 20) then + + ASSERT(n_max_fit_slat == 20) + + alpha = 2.d0 * expos_slat_gauss_1_erf_x(1) * mu_erf + call expo_fit_slater_gam(alpha, expos) + beta = 2.d0 * expos_slat_gauss_1_erf_x(2) * mu_erf * mu_erf + do i = 1, n_max_fit_slat + expo_gauss_1_erf_x_2(i) = expos(i) + beta + coef_gauss_1_erf_x_2(i) = coef_fit_slat_gauss(i) + enddo + + else + + print *, ' not implemented yet' + stop + + endif + END_PROVIDER +! --- + double precision function fit_1_erf_x_2(x) implicit none double precision, intent(in) :: x diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 33f512cf..4762c25e 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -122,35 +122,40 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none integer :: ipoint + double precision :: wall0, wall1 - print*,'providing int2_grad1_u12_bimo_transp' - double precision :: wall0, wall1 - call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp) - !$OMP DO SCHEDULE (dynamic) - do ipoint = 1, n_points_final_grid - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & - , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - call wall_time(wall1) - print*,'Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !print *, ' providing int2_grad1_u12_bimo_transp' + + call wall_time(wall0) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp) + !$OMP DO SCHEDULE (dynamic) + do ipoint = 1, n_points_final_grid + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,1,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,2,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp (1,1,3,ipoint), size(int2_grad1_u12_ao_transp , 1) & + , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 END_PROVIDER ! --- + BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )] implicit none integer :: i, j, ipoint diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 89f46a05..31cf0624 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -45,6 +45,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis integral_nsym = ao_non_hermit_term_chemist(k,i,l,j) + !print *, ' sym integ = ', integral_sym + !print *, ' non-sym integ = ', integral_nsym + ao_two_e_tc_tot(k,i,l,j) = integral_sym + integral_nsym !write(111,*) ao_two_e_tc_tot(k,i,l,j) enddo diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f index 947be870..0de8ce69 100644 --- a/src/bi_ortho_mos/bi_density.irp.f +++ b/src/bi_ortho_mos/bi_density.irp.f @@ -22,7 +22,7 @@ BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ] ! ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0> END_DOC - call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0 & + call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) ) END_PROVIDER diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f index 034a436e..d51999fc 100644 --- a/src/bi_ortho_mos/mos_rl.irp.f +++ b/src/bi_ortho_mos/mos_rl.irp.f @@ -37,6 +37,52 @@ end subroutine ao_to_mo_bi_ortho ! --- +subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao) + + BEGIN_DOC + ! + ! mo_l_coef.T x A_ao x mo_r_coef = A_mo + ! mo_l_coef.T x ao_overlap x mo_r_coef = I + ! + ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T + ! + END_DOC + + implicit none + integer, intent(in) :: LDA_ao, LDA_mo + double precision, intent(in) :: A_mo(LDA_mo,mo_num) + double precision, intent(out) :: A_ao(LDA_ao,ao_num) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:) + + ! ao_overlap x mo_r_coef + allocate( tmp_1(ao_num,mo_num) ) + call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0 & + , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) & + , 0.d0, tmp_1, size(tmp_1, 1) ) + + ! (ao_overlap x mo_r_coef) x A_mo + allocate( tmp_2(ao_num,mo_num) ) + call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 & + , tmp_1, size(tmp_1, 1), A_mo, LDA_mo & + , 0.d0, tmp_2, size(tmp_2, 1) ) + + ! ao_overlap x mo_l_coef + tmp_1 = 0.d0 + call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0 & + , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) & + , 0.d0, tmp_1, size(tmp_1, 1) ) + + ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T + call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0 & + , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) & + , 0.d0, A_ao, LDA_ao ) + + deallocate(tmp_1, tmp_2) + +end subroutine mo_to_ao_bi_ortho + +! --- + BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ] BEGIN_DOC @@ -175,3 +221,4 @@ END_PROVIDER ! --- + diff --git a/src/bi_ortho_mos/overlap.irp.f b/src/bi_ortho_mos/overlap.irp.f index 09cec15f..d7f45c94 100644 --- a/src/bi_ortho_mos/overlap.irp.f +++ b/src/bi_ortho_mos/overlap.irp.f @@ -58,7 +58,7 @@ accu_nd = accu_nd/dble(mo_num**2-mo_num) if(dabs(accu_d-1.d0).gt.1.d-10.or.dabs(accu_nd).gt.1.d-10)then print*,'Warning !!!' - print*,'Average trace of overlap_bi_ortho is different from 1 by ', accu_d + print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0) print*,'And bi orthogonality is off by an average of ',accu_nd print*,'****************' print*,'Overlap matrix betwee mo_l_coef and mo_r_coef ' @@ -76,67 +76,85 @@ END_PROVIDER ! --- - BEGIN_PROVIDER [ double precision, overlap_mo_r, (mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, overlap_mo_l, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! overlap_mo_r_mo(j,i) = - END_DOC - integer :: i,j,p,q - overlap_mo_r= 0.d0 - overlap_mo_l= 0.d0 - do i = 1, mo_num - do j = 1, mo_num - do p = 1, ao_num - do q = 1, ao_num - overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) - overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p) - enddo + + BEGIN_DOC + ! overlap_mo_r_mo(j,i) = + END_DOC + + implicit none + integer :: i, j, p, q + + overlap_mo_r = 0.d0 + overlap_mo_l = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do p = 1, ao_num + do q = 1, ao_num + overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) + overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p) + enddo + enddo enddo - enddo - enddo + enddo + END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, overlap_mo_r_mo, (mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, overlap_mo_l_mo, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! overlap_mo_r_mo(j,i) = - END_DOC - integer :: i,j,p,q - overlap_mo_r_mo = 0.d0 - overlap_mo_l_mo = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - do p = 1, ao_num - do q = 1, ao_num - overlap_mo_r_mo(j,i) += mo_coef(p,j) * mo_r_coef(q,i) * ao_overlap(q,p) - overlap_mo_l_mo(j,i) += mo_coef(p,j) * mo_l_coef(q,i) * ao_overlap(q,p) - enddo + + BEGIN_DOC + ! overlap_mo_r_mo(j,i) = + END_DOC + + implicit none + integer :: i, j, p, q + + overlap_mo_r_mo = 0.d0 + overlap_mo_l_mo = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do p = 1, ao_num + do q = 1, ao_num + overlap_mo_r_mo(j,i) += mo_coef(p,j) * mo_r_coef(q,i) * ao_overlap(q,p) + overlap_mo_l_mo(j,i) += mo_coef(p,j) * mo_l_coef(q,i) * ao_overlap(q,p) + enddo + enddo enddo - enddo - enddo + enddo + END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, angle_left_right, (mo_num)] &BEGIN_PROVIDER [ double precision, max_angle_left_right] - implicit none - BEGIN_DOC + + BEGIN_DOC ! angle_left_right(i) = angle between the left-eigenvector chi_i and the right-eigenvector phi_i - END_DOC - integer :: i,j - double precision :: left,right,arg - do i = 1, mo_num - left = overlap_mo_l(i,i) - right = overlap_mo_r(i,i) - arg = min(overlap_bi_ortho(i,i)/(left*right),1.d0) - arg = max(arg,-1.d0) - angle_left_right(i) = dacos(arg) * 180.d0/dacos(-1.d0) - enddo - double precision :: angle(mo_num) - angle(1:mo_num) = dabs(angle_left_right(1:mo_num)) - max_angle_left_right = maxval(angle) + END_DOC + + implicit none + integer :: i, j + double precision :: left, right, arg + double precision :: angle(mo_num) + + do i = 1, mo_num + left = overlap_mo_l(i,i) + right = overlap_mo_r(i,i) + arg = min(overlap_bi_ortho(i,i)/(left*right),1.d0) + arg = max(arg, -1.d0) + angle_left_right(i) = dacos(arg) * 180.d0/dacos(-1.d0) + enddo + + angle(1:mo_num) = dabs(angle_left_right(1:mo_num)) + max_angle_left_right = maxval(angle) + END_PROVIDER +! --- + diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 4a9ed5f3..62d7c52c 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -750,7 +750,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if (delta_E < 0.d0) then tmp = -tmp endif + + !e_pert(istate) = alpha_h_psi * alpha_h_psi / (E0(istate) - Hii) e_pert(istate) = 0.5d0 * (tmp - delta_E) + if (dabs(alpha_h_psi) > 1.d-4) then coef(istate) = e_pert(istate) / alpha_h_psi else diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f index 36d0120d..b621206a 100644 --- a/src/non_hermit_dav/biorthog.irp.f +++ b/src/non_hermit_dav/biorthog.irp.f @@ -252,7 +252,7 @@ end subroutine non_hrmt_real_diag_new ! --- -subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) +subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, eigval) BEGIN_DOC ! @@ -266,13 +266,14 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) implicit none integer, intent(in) :: n double precision, intent(in) :: A(n,n) + double precision, intent(in) :: thr_d, thr_nd integer, intent(out) :: n_real_eigv double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) integer :: i, j integer :: n_good double precision :: thr, thr_cut, thr_diag, thr_norm - double precision :: accu_d, accu_nd, thr_d, thr_nd + double precision :: accu_d, accu_nd integer, allocatable :: list_good(:), iorder(:) double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) @@ -282,16 +283,16 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) ! ------------------------------------------------------------------------------------- ! - print *, ' ' - print *, ' Computing the left/right eigenvectors ...' - print *, ' ' + !print *, ' ' + !print *, ' Computing the left/right eigenvectors ...' + !print *, ' ' - allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) + allocate(WR(n), WI(n), VL(n,n), VR(n,n)) - print *, ' fock matrix' - do i = 1, n - write(*, '(1000(F16.10,X))') A(i,:) - enddo + !print *, ' fock matrix' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') A(i,:) + !enddo !thr_cut = 1.d-15 !call cancel_small_elmts(A, n, thr_cut) @@ -300,11 +301,11 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) call lapack_diag_non_sym(n, A, WR, WI, VL, VR) !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) - print *, ' ' - print *, ' eigenvalues' - do i = 1, n - write(*, '(1000(F16.10,X))') WR(i), WI(i) - enddo + !print *, ' ' + !print *, ' eigenvalues' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') WR(i), WI(i) + !enddo !print *, ' right eigenvect bef' !do i = 1, n ! write(*, '(1000(F16.10,X))') VR(:,i) @@ -327,9 +328,10 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) ! track & sort the real eigenvalues n_good = 0 - thr = 1.d-5 + !thr = 100d0 + thr = Im_thresh_tcscf do i = 1, n - print*, 'Re(i) + Im(i)', WR(i), WI(i) + !print*, 'Re(i) + Im(i)', WR(i), WI(i) if(dabs(WI(i)) .lt. thr) then n_good += 1 else @@ -395,31 +397,32 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) ! ------------------------------------------------------------------------------------- ! check bi-orthogonality - thr_d = 1d-10 ! -7 - thr_nd = 1d-10 ! -7 + thr_diag = 10.d0 + thr_norm = 1d+10 allocate( S(n_real_eigv,n_real_eigv) ) - call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.) + call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.) - if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .lt. thr_d) ) then + if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then - print *, ' lapack vectors are normalized and bi-orthogonalized' + !print *, ' lapack vectors are normalized and bi-orthogonalized' deallocate(S) return - elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then + ! accu_nd is modified after adding the normalization + !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then - print *, ' lapack vectors are not normalized but bi-orthogonalized' - call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.) + ! print *, ' lapack vectors are not normalized but bi-orthogonalized' + ! call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.) - call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.) + ! call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.) - deallocate(S) - return + ! deallocate(S) + ! return else - print *, ' lapack vectors are not normalized neither bi-orthogonalized' + !print *, ' lapack vectors are not normalized neither bi-orthogonalized' ! --- @@ -429,17 +432,17 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval) call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec) - !call impose_orthog_biorthog_degen_eigvec(n, eigval, leigvec, reigvec) + !call impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, eigval, leigvec, reigvec) !call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, ao_overlap, leigvec, reigvec) ! --- - call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.) + call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.) if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then - call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.) + call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.) endif - call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .true.) + call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.) !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec) !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec) diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f index d79e25ba..0d652af4 100644 --- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -356,6 +356,7 @@ subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval) ! Eigvalue(n) = WR(n) + i * WI(n) allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n)) Aw = A + !print *, ' matrix to diagonalize', Aw call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR) ! --- @@ -573,21 +574,22 @@ end subroutine non_hrmt_general_real_diag ! --- -subroutine impose_biorthog_qr(m, n, Vl, Vr) +subroutine impose_biorthog_qr(m, n, thr_d, thr_nd, Vl, Vr) implicit none - integer, intent(in) :: m, n + integer, intent(in) :: m, n + double precision, intent(in) :: thr_d, thr_nd double precision, intent(inout) :: Vl(m,n), Vr(m,n) integer :: i, j integer :: LWORK, INFO - double precision :: accu_nd, accu_d, thr_nd, thr_d + double precision :: accu_nd, accu_d double precision, allocatable :: TAU(:), WORK(:) double precision, allocatable :: S(:,:), R(:,:), tmp(:,:) ! --- - call check_biorthog_binormalize(m, n, Vl, Vr, .false.) + call check_biorthog_binormalize(m, n, Vl, Vr, thr_d, thr_nd, .false.) ! --- @@ -609,9 +611,7 @@ subroutine impose_biorthog_qr(m, n, Vl, Vr) enddo accu_nd = dsqrt(accu_nd) - thr_d = 1d-10 - thr_nd = 1d-08 - if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n)) .lt. thr_d)) then + if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then print *, ' bi-orthogonal vectors without QR !' deallocate(S) return @@ -930,7 +930,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s tmp_abs = tmp_abs + tmp V_nrm = V_nrm + U_nrm - write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm + !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm enddo @@ -973,7 +973,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s tmp_abs = tmp_abs + tmp V_nrm = V_nrm + U_nrm - write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm + !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm enddo @@ -1011,7 +1011,7 @@ subroutine check_degen(n, m, eigval, leigvec, reigvec) double precision :: ei, ej, de, de_thr, accu_nd double precision, allocatable :: S(:,:) - de_thr = 1d-7 + de_thr = 1d-6 do i = 1, m-1 ei = eigval(i) @@ -1082,7 +1082,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C) double precision, allocatable :: S(:,:), tmp(:,:) double precision, allocatable :: U(:,:), Vt(:,:), D(:) - print *, ' apply SVD to orthogonalize vectors' + !print *, ' apply SVD to orthogonalize & normalize weighted vectors' ! --- @@ -1097,10 +1097,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C) , 0.d0, S, size(S, 1) ) deallocate(tmp) - print *, ' eigenvec overlap bef SVD: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo + !print *, ' overlap bef SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo ! --- @@ -1160,10 +1160,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C) , 0.d0, S, size(S, 1) ) deallocate(tmp) - print *, ' eigenvec overlap aft SVD: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo + !print *, ' overlap aft SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo deallocate(S) @@ -1185,7 +1185,7 @@ subroutine impose_orthog_svd(n, m, C) double precision, allocatable :: S(:,:), tmp(:,:) double precision, allocatable :: U(:,:), Vt(:,:), D(:) - print *, ' apply SVD to orthogonalize vectors' + !print *, ' apply SVD to orthogonalize & normalize vectors' ! --- @@ -1196,10 +1196,10 @@ subroutine impose_orthog_svd(n, m, C) , C, size(C, 1), C, size(C, 1) & , 0.d0, S, size(S, 1) ) - print *, ' eigenvec overlap bef SVD: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo + !print *, ' eigenvec overlap bef SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo ! --- @@ -1213,6 +1213,7 @@ subroutine impose_orthog_svd(n, m, C) num_linear_dependencies = 0 do i = 1, m if(abs(D(i)) <= threshold) then + write(*,*) ' D(i) = ', D(i) D(i) = 0.d0 num_linear_dependencies += 1 else @@ -1223,6 +1224,7 @@ subroutine impose_orthog_svd(n, m, C) if(num_linear_dependencies > 0) then write(*,*) ' linear dependencies = ', num_linear_dependencies write(*,*) ' m = ', m + write(*,*) ' try with Graham-Schmidt' stop endif @@ -1255,10 +1257,10 @@ subroutine impose_orthog_svd(n, m, C) , C, size(C, 1), C, size(C, 1) & , 0.d0, S, size(S, 1) ) - print *, ' eigenvec overlap aft SVD: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo + !print *, ' eigenvec overlap aft SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo deallocate(S) @@ -1268,7 +1270,7 @@ end subroutine impose_orthog_svd ! --- -subroutine impose_orthog_svd_overlap(n, m, C,overlap) +subroutine impose_orthog_svd_overlap(n, m, C, overlap) implicit none @@ -1278,27 +1280,27 @@ subroutine impose_orthog_svd_overlap(n, m, C,overlap) integer :: i, j, num_linear_dependencies double precision :: threshold - double precision, allocatable :: S(:,:), tmp(:,:),Stmp(:,:) + double precision, allocatable :: S(:,:), tmp(:,:), Stmp(:,:) double precision, allocatable :: U(:,:), Vt(:,:), D(:) print *, ' apply SVD to orthogonalize vectors' ! --- - allocate(S(m,m),Stmp(n,m)) - ! S = C.T x overlap x C - call dgemm( 'N', 'N', n, m, n, 1.d0 & + allocate(S(m,m), Stmp(n,m)) + call dgemm( 'N', 'N', n, m, n, 1.d0 & , overlap, size(overlap, 1), C, size(C, 1) & , 0.d0, Stmp, size(Stmp, 1) ) - call dgemm( 'T', 'N', m, m, n, 1.d0 & + call dgemm( 'T', 'N', m, m, n, 1.d0 & , C, size(C, 1), Stmp, size(Stmp, 1) & , 0.d0, S, size(S, 1) ) + deallocate(Stmp) -! print *, ' eigenvec overlap bef SVD: ' -! do i = 1, m -! write(*, '(1000(F16.10,X))') S(i,:) -! enddo + !print *, ' eigenvec overlap bef SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo ! --- @@ -1347,23 +1349,23 @@ subroutine impose_orthog_svd_overlap(n, m, C,overlap) ! --- - allocate(S(m,m)) - - ! S = C.T x C - call dgemm( 'T', 'N', m, m, n, 1.d0 & - , C, size(C, 1), C, size(C, 1) & + ! S = C.T x overlap x C + allocate(S(m,m), Stmp(n,m)) + call dgemm( 'N', 'N', n, m, n, 1.d0 & + , overlap, size(overlap, 1), C, size(C, 1) & + , 0.d0, Stmp, size(Stmp, 1) ) + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , C, size(C, 1), Stmp, size(Stmp, 1) & , 0.d0, S, size(S, 1) ) + deallocate(Stmp) -! print *, ' eigenvec overlap aft SVD: ' -! do i = 1, m -! write(*, '(1000(F16.10,X))') S(i,:) -! enddo - + !print *, ' eigenvec overlap aft SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo deallocate(S) - ! --- - -end subroutine impose_orthog_svd +end subroutine impose_orthog_svd_overlap ! --- @@ -1379,7 +1381,7 @@ subroutine impose_orthog_GramSchmidt(n, m, C) double precision, allocatable :: S(:,:) print *, '' - print *, ' apply Gram-Schmidt to orthogonalize vectors' + print *, ' apply Gram-Schmidt to orthogonalize & normalize vectors' print *, '' ! --- @@ -1527,11 +1529,11 @@ subroutine impose_orthog_degen_eigvec(n, e0, C0) enddo - do i = 1, n - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - endif - enddo + !do i = 1, n + ! if(deg_num(i) .gt. 1) then + ! print *, ' degen on', i, deg_num(i) + ! endif + !enddo ! --- @@ -1663,23 +1665,20 @@ end subroutine get_halfinv_svd ! --- -subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot) +subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot) implicit none integer, intent(in) :: n, m logical, intent(in) :: stop_ifnot + double precision, intent(in) :: thr_d, thr_nd double precision, intent(inout) :: Vl(n,m), Vr(n,m) integer :: i, j - double precision :: thr_d, thr_nd double precision :: accu_d, accu_nd, s_tmp double precision, allocatable :: S(:,:) - thr_d = 1d-6 - thr_nd = 1d-7 - - print *, ' check bi-orthonormality' + !print *, ' check bi-orthonormality' ! --- @@ -1694,11 +1693,13 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot) ! S(i,i) = -1 do i = 1, m - if( (S(i,i) + 1.d0) .lt. thr_d ) then + if(S(i,i) .lt. 0.d0) then + !if( (S(i,i) + 1.d0) .lt. thr_d ) then do j = 1, n Vl(j,i) = -1.d0 * Vl(j,i) enddo - S(i,i) = 1.d0 + !S(i,i) = 1.d0 + S(i,i) = -S(i,i) endif enddo @@ -1713,16 +1714,20 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot) endif enddo enddo - accu_nd = dsqrt(accu_nd) - print*, ' diag acc: ', accu_d - print*, ' nondiag acc: ', accu_nd + accu_nd = dsqrt(accu_nd) / dble(m) + !print*, ' diag acc bef = ', accu_d + !print*, ' nondiag acc bef = ', accu_nd ! --- if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then do i = 1, m - print *, i, S(i,i) + if(S(i,i) <= 0.d0) then + print *, ' overap negative' + print *, i, S(i,i) + exit + endif if(dabs(S(i,i) - 1.d0) .gt. thr_d) then s_tmp = 1.d0 / dsqrt(S(i,i)) do j = 1, n @@ -1730,6 +1735,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot) Vr(j,i) = Vr(j,i) * s_tmp enddo endif + enddo endif @@ -1755,9 +1761,9 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot) endif enddo enddo - accu_nd = dsqrt(accu_nd) - print *, ' diag acc: ', accu_d - print *, ' nondiag acc: ', accu_nd + accu_nd = dsqrt(accu_nd) / dble(m) + !print *, ' diag acc aft = ', accu_d + !print *, ' nondiag acc aft = ', accu_nd deallocate(S) @@ -1774,22 +1780,19 @@ end subroutine check_biorthog_binormalize ! --- -subroutine check_weighted_biorthog(n, m, W, Vl, Vr, accu_d, accu_nd, S, stop_ifnot) +subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_nd, S, stop_ifnot) implicit none integer, intent(in) :: n, m double precision, intent(in) :: Vl(n,m), Vr(n,m), W(n,n) + double precision, intent(in) :: thr_d, thr_nd logical, intent(in) :: stop_ifnot double precision, intent(out) :: accu_d, accu_nd, S(m,m) integer :: i, j - double precision :: thr_d, thr_nd double precision, allocatable :: SS(:,:), tmp(:,:) - thr_d = 1d-6 - thr_nd = 1d-08 - print *, ' check weighted bi-orthogonality' ! --- @@ -1803,10 +1806,10 @@ subroutine check_weighted_biorthog(n, m, W, Vl, Vr, accu_d, accu_nd, S, stop_ifn , 0.d0, S, size(S, 1) ) deallocate(tmp) - print *, ' overlap matrix:' - do i = 1, m - write(*,'(1000(F16.10,X))') S(i,:) - enddo + !print *, ' overlap matrix:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo accu_d = 0.d0 accu_nd = 0.d0 @@ -1841,33 +1844,31 @@ end subroutine check_weighted_biorthog ! --- -subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, stop_ifnot) +subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ifnot) implicit none integer, intent(in) :: n, m double precision, intent(in) :: Vl(n,m), Vr(n,m) logical, intent(in) :: stop_ifnot + double precision, intent(in) :: thr_d, thr_nd double precision, intent(out) :: accu_d, accu_nd, S(m,m) integer :: i, j - double precision :: thr_d, thr_nd double precision, allocatable :: SS(:,:) - thr_d = 1d-6 - thr_nd = 1d-08 - - print *, ' check bi-orthogonality' + !print *, ' check bi-orthogonality' ! --- call dgemm( 'T', 'N', m, m, n, 1.d0 & , Vl, size(Vl, 1), Vr, size(Vr, 1) & , 0.d0, S, size(S, 1) ) - print *, ' overlap matrix:' - do i = 1, m - write(*,'(1000(F16.10,X))') S(i,:) - enddo + + !print *, ' overlap matrix:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo accu_d = 0.d0 accu_nd = 0.d0 @@ -1880,14 +1881,14 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, stop_ifnot) endif enddo enddo - accu_nd = dsqrt(accu_nd) + accu_nd = dsqrt(accu_nd) / dble(m) - print *, ' accu_nd = ', accu_nd - print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) + !print *, ' accu_nd = ', accu_nd + !print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) ! --- - if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then + if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then print *, ' non bi-orthogonal vectors !' print *, ' accu_nd = ', accu_nd print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) @@ -1917,12 +1918,12 @@ subroutine check_orthog(n, m, V, accu_d, accu_nd, S) , V, size(V, 1), V, size(V, 1) & , 0.d0, S, size(S, 1) ) - print *, '' - print *, ' overlap matrix:' - do i = 1, m - write(*,'(1000(F16.10,X))') S(i,:) - enddo - print *, '' + !print *, '' + !print *, ' overlap matrix:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + !print *, '' accu_d = 0.d0 accu_nd = 0.d0 @@ -1986,11 +1987,11 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) enddo enddo - do i = 1, n - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - endif - enddo + !do i = 1, n + ! if(deg_num(i) .gt. 1) then + ! print *, ' degen on', i, deg_num(i), e0(i) + ! endif + !enddo ! --- @@ -2011,6 +2012,8 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) call impose_orthog_svd(n, m, L) call impose_orthog_svd(n, m, R) + !call impose_orthog_GramSchmidt(n, m, L) + !call impose_orthog_GramSchmidt(n, m, R) ! --- @@ -2029,7 +2032,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) call impose_biorthog_svd(n, m, L, R) - !call impose_biorthog_qr(n, m, L, R) + !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R) ! --- @@ -2047,11 +2050,12 @@ end subroutine impose_biorthog_degen_eigvec ! --- -subroutine impose_orthog_biorthog_degen_eigvec(n, e0, L0, R0) +subroutine impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, L0, R0) implicit none integer, intent(in) :: n + double precision, intent(in) :: thr_d, thr_nd double precision, intent(in) :: e0(n) double precision, intent(inout) :: L0(n,n), R0(n,n) @@ -2116,12 +2120,12 @@ subroutine impose_orthog_biorthog_degen_eigvec(n, e0, L0, R0) ! --- - call impose_biorthog_qr(n, m, L, R) + call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R) allocate(S(m,m)) - call check_biorthog(n, m, L, R, accu_d, accu_nd, S, .true.) - !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, .true.) - !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, .false.) + call check_biorthog(n, m, L, R, accu_d, accu_nd, S, thr_d, thr_nd, .true.) + !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, thr_d, thr_nd, .true.) + !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, thr_d, thr_nd, .false.) deallocate(S) ! --- @@ -2140,11 +2144,12 @@ end subroutine impose_orthog_biorthog_degen_eigvec ! --- -subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0) +subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0, R0) implicit none integer, intent(in) :: n + double precision, intent(in) :: thr_d, thr_nd double precision, intent(in) :: e0(n), W0(n,n), C0(n,n) double precision, intent(inout) :: L0(n,n), R0(n,n) @@ -2182,11 +2187,11 @@ subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0) enddo enddo - do i = 1, n - if(deg_num(i).gt.1) then - print *, ' degen on', i, deg_num(i) - endif - enddo + !do i = 1, n + ! if(deg_num(i) .gt. 1) then + ! print *, ' degen on', i, deg_num(i) + ! endif + !enddo ! --- @@ -2255,7 +2260,7 @@ subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0) call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root) if(complex_root)then call impose_biorthog_svd(n, m, L, R) - !call impose_biorthog_qr(n, m, L, R) + !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R) else call bi_ortho_s_inv_half(m, L, R, S_inv_half) endif @@ -2415,10 +2420,10 @@ subroutine impose_biorthog_svd(n, m, L, R) , L, size(L, 1), R, size(R, 1) & , 0.d0, S, size(S, 1) ) - print *, ' overlap bef SVD: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo + !print *, ' overlap bef SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo ! --- @@ -2490,10 +2495,11 @@ subroutine impose_biorthog_svd(n, m, L, R) , L, size(L, 1), R, size(R, 1) & , 0.d0, S, size(S, 1) ) - print *, ' overlap aft SVD: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo + !print *, ' overlap aft SVD: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo + deallocate(S) ! --- @@ -2502,8 +2508,286 @@ end subroutine impose_biorthog_svd ! --- +subroutine impose_weighted_biorthog_qr(m, n, thr_d, thr_nd, Vl, W, Vr) -subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R) + implicit none + integer, intent(in) :: m, n + double precision, intent(in) :: thr_d, thr_nd + double precision, intent(inout) :: Vl(m,n), W(m,m), Vr(m,n) + + integer :: i, j + integer :: LWORK, INFO + double precision :: accu_nd, accu_d + double precision, allocatable :: TAU(:), WORK(:) + double precision, allocatable :: S(:,:), R(:,:), tmp(:,:), Stmp(:,:) + + + call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.) + + ! --- + + allocate(Stmp(n,m), S(n,n)) + call dgemm( 'T', 'N', n, m, m, 1.d0 & + , Vl, size(Vl, 1), W, size(W, 1) & + , 0.d0, Stmp, size(Stmp, 1) ) + call dgemm( 'N', 'N', n, n, m, 1.d0 & + , Stmp, size(Stmp, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + deallocate(Stmp) + + accu_nd = 0.d0 + accu_d = 0.d0 + do i = 1, n + do j = 1, n + if(i==j) then + accu_d += S(j,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) + + if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then + print *, ' bi-orthogonal vectors without QR !' + deallocate(S) + return + endif + + ! ------------------------------------------------------------------------------------- + ! QR factorization of S: S = Q x R + + + print *, ' apply QR decomposition ...' + + allocate( TAU(n), WORK(1) ) + + LWORK = -1 + call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dgeqrf failed !!', INFO + stop + endif + + LWORK = max(n, int(WORK(1))) + deallocate(WORK) + + allocate( WORK(LWORK) ) + call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dgeqrf failed !!', INFO + stop + endif + + ! save the upper triangular R + allocate( R(n,n) ) + R(:,:) = S(:,:) + + ! get Q + LWORK = -1 + call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dorgqr failed !!', INFO + stop + endif + + LWORK = max(n, int(WORK(1))) + deallocate(WORK) + + allocate( WORK(LWORK) ) + call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO) + if(INFO .ne. 0) then + print*,'dorgqr failed !!', INFO + stop + endif + + deallocate( WORK, TAU ) + + ! + ! ------------------------------------------------------------------------------------- + + ! --- + + ! ------------------------------------------------------------------------------------- + ! get bi-orhtog left & right vectors: + ! Vr' = Vr x inv(R) + ! Vl' = inv(Q) x Vl = Q.T x Vl + + ! Q.T x Vl, where Q = S + + allocate( tmp(n,m) ) + call dgemm( 'T', 'T', n, m, n, 1.d0 & + , S, size(S, 1), Vl, size(Vl, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + do i = 1, n + do j = 1, m + Vl(j,i) = tmp(i,j) + enddo + enddo + deallocate(tmp) + + ! --- + + ! inv(R) + !print *, ' inversing upper triangular matrix ...' + call dtrtri("U", "N", n, R, n, INFO) + if(INFO .ne. 0) then + print*,'dtrtri failed !!', INFO + stop + endif + !print *, ' inversing upper triangular matrix OK' + + do i = 1, n-1 + do j = i+1, n + R(j,i) = 0.d0 + enddo + enddo + + !print *, ' inv(R):' + !do i = 1, n + ! write(*, '(1000(F16.10,X))') R(i,:) + !enddo + + ! Vr x inv(R) + allocate( tmp(m,n) ) + call dgemm( 'N', 'N', m, n, n, 1.d0 & + , Vr, size(Vr, 1), R, size(R, 1) & + , 0.d0, tmp, size(tmp, 1) ) + deallocate( R ) + + do i = 1, n + do j = 1, m + Vr(j,i) = tmp(j,i) + enddo + enddo + deallocate(tmp) + + call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.) + + return +end subroutine impose_weighted_biorthog_qr + +! --- + +subroutine check_weighted_biorthog_binormalize(n, m, Vl, W, Vr, thr_d, thr_nd, stop_ifnot) + + implicit none + + integer, intent(in) :: n, m + logical, intent(in) :: stop_ifnot + double precision, intent(in) :: thr_d, thr_nd + double precision, intent(inout) :: Vl(n,m), W(n,n), Vr(n,m) + + integer :: i, j + double precision :: accu_d, accu_nd, s_tmp + double precision, allocatable :: S(:,:), Stmp(:,:) + + print *, ' check weighted bi-orthonormality' + + ! --- + + allocate(Stmp(m,n), S(m,m)) + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , Vl, size(Vl, 1), W, size(W, 1) & + , 0.d0, Stmp, size(Stmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , Stmp, size(Stmp, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + deallocate(Stmp) + !print *, ' overlap matrix before:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + + ! S(i,i) = -1 + do i = 1, m + if( (S(i,i) + 1.d0) .lt. thr_d ) then + do j = 1, n + Vl(j,i) = -1.d0 * Vl(j,i) + enddo + S(i,i) = 1.d0 + endif + enddo + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + S(i,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) / dble(m) + print*, ' diag acc: ', accu_d + print*, ' nondiag acc: ', accu_nd + + ! --- + + if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then + + do i = 1, m + print *, i, S(i,i) + if(dabs(S(i,i) - 1.d0) .gt. thr_d) then + s_tmp = 1.d0 / dsqrt(S(i,i)) + do j = 1, n + Vl(j,i) = Vl(j,i) * s_tmp + Vr(j,i) = Vr(j,i) * s_tmp + enddo + endif + enddo + + endif + + ! --- + + allocate(Stmp(m,n)) + call dgemm( 'T', 'N', m, n, n, 1.d0 & + , Vl, size(Vl, 1), W, size(W, 1) & + , 0.d0, Stmp, size(Stmp, 1) ) + call dgemm( 'N', 'N', m, m, n, 1.d0 & + , Stmp, size(Stmp, 1), Vr, size(Vr, 1) & + , 0.d0, S, size(S, 1) ) + deallocate(Stmp) + !print *, ' overlap matrix after:' + !do i = 1, m + ! write(*,'(1000(F16.10,X))') S(i,:) + !enddo + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, m + do j = 1, m + if(i==j) then + accu_d = accu_d + S(i,i) + else + accu_nd = accu_nd + S(j,i) * S(j,i) + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) / dble(m) + print *, ' diag acc: ', accu_d + print *, ' nondiag acc: ', accu_nd + + deallocate(S) + + ! --- + + if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then + print *, accu_nd, thr_nd + print *, dabs(accu_d-dble(m))/dble(m), thr_d + print *, ' weighted biorthog_binormalize failed !' + stop + endif + +end subroutine check_weighted_biorthog_binormalize + +! --- + +subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R) implicit none @@ -2527,11 +2811,12 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R) call dgemm( 'T', 'N', m, m, n, 1.d0 & , L, size(L, 1), Stmp, size(Stmp, 1) & , 0.d0, S, size(S, 1) ) + deallocate(Stmp) - print *, ' overlap bef SVD: ' - do i = 1, m - write(*, '(1000(F25.16,X))') S(i,:) - enddo + !print *, ' overlap bef SVD: ' + !do i = 1, m + ! write(*, '(1000(F25.16,X))') S(i,:) + !enddo ! --- @@ -2598,10 +2883,7 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R) ! --- - allocate(S(m,m)) -! call dgemm( 'T', 'N', m, m, n, 1.d0 & -! , L, size(L, 1), R, size(R, 1) & -! , 0.d0, S, size(S, 1) ) + allocate(S(m,m),Stmp(n,m)) ! S = C.T x overlap x C call dgemm( 'N', 'N', n, m, n, 1.d0 & , overlap, size(overlap, 1), R, size(R, 1) & @@ -2609,16 +2891,17 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R) call dgemm( 'T', 'N', m, m, n, 1.d0 & , L, size(L, 1), Stmp, size(Stmp, 1) & , 0.d0, S, size(S, 1) ) + deallocate(Stmp) + + !print *, ' overlap aft SVD with overlap: ' + !do i = 1, m + ! write(*, '(1000(F16.10,X))') S(i,:) + !enddo - print *, ' overlap aft SVD with overlap: ' - do i = 1, m - write(*, '(1000(F16.10,X))') S(i,:) - enddo deallocate(S) - ! --- - -end subroutine impose_biorthog_svd + return +end subroutine impose_weighted_biorthog_svd ! --- diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 3b9eaeb4..56a1ed8e 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -29,11 +29,11 @@ END_DOC call write_time(6) - print*,'Energy of the guess = ',SCF_energy + print*,'energy of the guess = ',SCF_energy write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)') & '====','================','================','================','================' write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)') & - ' N ', 'Energy ', 'Energy diff ', 'DIIS error ', 'Level shift ' + ' N ', 'energy ', 'energy diff ', 'DIIS error ', 'Level shift ' write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)') & '====','================','================','================','================' @@ -69,9 +69,9 @@ END_DOC if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) ) then ! Store Fock and error matrices at each iteration + index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1 do j=1,ao_num do i=1,ao_num - index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1 Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO(i,j) error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO(i,j) enddo @@ -106,8 +106,8 @@ END_DOC ! SCF energy energy_SCF = SCF_energy - Delta_Energy_SCF = energy_SCF - energy_SCF_previous - if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then + Delta_energy_SCF = energy_SCF - energy_SCF_previous + if ( (SCF_algorithm == 'DIIS').and.(Delta_energy_SCF > 0.d0) ) then Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS) Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0 Fock_matrix_AO_beta = Fock_matrix_AO*0.5d0 @@ -131,15 +131,17 @@ END_DOC call initialize_mo_coef_begin_iteration endif TOUCH mo_coef - Delta_Energy_SCF = SCF_energy - energy_SCF_previous + Delta_energy_SCF = SCF_energy - energy_SCF_previous energy_SCF = SCF_energy if (level_shift-level_shift_save > 40.d0) then level_shift = level_shift_save * 4.d0 SOFT_TOUCH level_shift exit endif + dim_DIIS=0 enddo + level_shift = level_shift * 0.5d0 SOFT_TOUCH level_shift energy_SCF_previous = energy_SCF @@ -175,7 +177,7 @@ END_DOC call save_mos endif - call write_double(6, Energy_SCF, 'SCF energy') + call write_double(6, energy_SCF, 'SCF energy') call write_time(6) diff --git a/src/tc_bi_ortho/print_he_tc_energy.irp.f b/src/tc_bi_ortho/print_he_tc_energy.irp.f new file mode 100644 index 00000000..84d34bcb --- /dev/null +++ b/src/tc_bi_ortho/print_he_tc_energy.irp.f @@ -0,0 +1,142 @@ + +! --- + +program print_he_tc_energy + + implicit none + + call print_overlap() + + call print_energy1() + +end + +! --- + +subroutine print_overlap() + + implicit none + integer :: i, j, k, l + double precision :: S_ij + + print *, ' ao_overlap:' + do i = 1, ao_num + do j = 1, ao_num + print *, j, i, ao_overlap(j,i) + enddo + enddo + + print *, ' mo_overlap:' + do i = 1, mo_num + do j = 1, mo_num + + S_ij = 0.d0 + do k = 1, ao_num + do l = 1, ao_num + S_ij += mo_l_coef(k,i) * ao_overlap(k,l) * mo_r_coef(l,j) + enddo + enddo + + print *, i, j, S_ij + enddo + enddo + +end subroutine print_overlap + +! --- + +subroutine print_energy1() + + implicit none + integer :: i, j, k, l + double precision :: e, n, e_tmp, n_tmp, e_ns + double precision, external :: ao_two_e_integral + + e = 0.d0 + n = 0.d0 + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! < phi_1 phi_1 | h1 | phi_1 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1) + n_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! < phi_1 phi_1 | h2 | phi_1 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + n_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1) + e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! --- + + e_ns = 0.d0 + + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + + ! ao_two_e_tc_tot(i,j,k,l) = + e += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_two_e_tc_tot(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1) + + e_ns += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_non_hermit_term_chemist(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1) + enddo + enddo + enddo + enddo + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! --- + + ! < phi_1 phi_1 | phi_1 phi_1 > + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1) + n_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1) + enddo + enddo + + n += e_tmp * n_tmp + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + e = e / n + e_ns = e_ns / n + + print *, ' tc energy = ', e + print *, ' non-sym energy = ', e_ns + +end subroutine print_energy1 + +! --- + + diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 4bfb2da3..a56a432f 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -66,7 +66,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'aab = ',accu + !print*,'aab = ',accu ! beta/beta/alpha three-body accu = 0.d0 @@ -83,7 +83,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'abb = ',accu + !print*,'abb = ',accu ! alpha/alpha/alpha three-body accu = 0.d0 @@ -99,7 +99,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'aaa = ',accu + !print*,'aaa = ',accu ! beta/beta/beta three-body accu = 0.d0 @@ -115,7 +115,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) enddo enddo enddo - print*,'bbb = ',accu + !print*,'bbb = ',accu endif end diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index bf1388e5..44e27e7c 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -18,6 +18,10 @@ do j = 1, N_det ! < J | Htilde | I > call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + + !print *, ' hmono = ', hmono + !print *, ' htwoe = ', htwoe + !print *, ' hthree = ', hthree htilde_matrix_elmt_bi_ortho(j,i) = htot enddo enddo diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index 4144fcad..b2ccb091 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -1,125 +1,150 @@ - BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_mo, (mo_num, mo_num)] - &BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_mo, (mo_num, mo_num)] - &BEGIN_PROVIDER [ double precision, natorb_tc_eigval, (mo_num)] +! --- + + BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_mo, (mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_mo, (mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, natorb_tc_eigval, (mo_num)] + + BEGIN_DOC + ! + ! natorb_tc_reigvec_mo : RIGHT eigenvectors of the ground state transition matrix (equivalent of natural orbitals) + ! natorb_tc_leigvec_mo : LEFT eigenvectors of the ground state transition matrix (equivalent of natural orbitals) + ! natorb_tc_eigval : eigenvalues of the ground state transition matrix (equivalent of the occupation numbers). WARNINING :: can be negative !! + ! + END_DOC + implicit none - BEGIN_DOC - ! natorb_tc_reigvec_mo : RIGHT eigenvectors of the ground state transition matrix (equivalent of natural orbitals) - ! natorb_tc_leigvec_mo : LEFT eigenvectors of the ground state transition matrix (equivalent of natural orbitals) - ! natorb_tc_eigval : eigenvalues of the ground state transition matrix (equivalent of the occupation numbers). WARNINING :: can be negative !! - END_DOC - double precision, allocatable :: dm_tmp(:,:),fock_diag(:) - double precision :: thr_deg - integer :: i,j,k,n_real - allocate( dm_tmp(mo_num,mo_num),fock_diag(mo_num)) + integer :: i, j, k, n_real + double precision :: thr_d, thr_nd, thr_deg, accu + double precision :: accu_d, accu_nd + double precision, allocatable :: dm_tmp(:,:), fock_diag(:) + + allocate(dm_tmp(mo_num,mo_num), fock_diag(mo_num)) + dm_tmp(:,:) = -tc_transition_matrix(:,:,1,1) - print*,'dm_tmp' + + print *, ' dm_tmp' do i = 1, mo_num - fock_diag(i) = fock_matrix_tc_mo_tot(i,i) - write(*,'(100(F16.10,X))')-dm_tmp(:,i) + fock_diag(i) = fock_matrix_tc_mo_tot(i,i) + write(*, '(100(F16.10,X))') -dm_tmp(:,i) enddo + + thr_d = 1.d-6 + thr_nd = 1.d-6 thr_deg = 1.d-3 - call diag_mat_per_fock_degen(fock_diag,dm_tmp,mo_num,thr_deg,& - natorb_tc_leigvec_mo,natorb_tc_reigvec_mo,& - natorb_tc_eigval) + call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) ! call non_hrmt_bieig( mo_num, dm_tmp& ! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& ! , n_real, natorb_tc_eigval ) - double precision :: accu + accu = 0.d0 do i = 1, n_real - print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) - accu += -natorb_tc_eigval(i) + print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) + accu += -natorb_tc_eigval(i) enddo - print*,'accu = ',accu + print *, ' accu = ', accu + dm_tmp = 0.d0 do i = 1, n_real - accu = 0.d0 - do k = 1, mo_num - accu += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,i) - enddo - accu = 1.d0/dsqrt(dabs(accu)) - natorb_tc_reigvec_mo(:,i) *= accu - natorb_tc_leigvec_mo(:,i) *= accu - do j = 1, n_real + accu = 0.d0 do k = 1, mo_num - dm_tmp(j,i) += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,j) + accu += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,i) + enddo + accu = 1.d0/dsqrt(dabs(accu)) + natorb_tc_reigvec_mo(:,i) *= accu + natorb_tc_leigvec_mo(:,i) *= accu + do j = 1, n_real + do k = 1, mo_num + dm_tmp(j,i) += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,j) + enddo enddo - enddo enddo - double precision :: accu_d, accu_nd - accu_d = 0.d0 + + accu_d = 0.d0 accu_nd = 0.d0 do i = 1, mo_num - accu_d += dm_tmp(i,i) - ! write(*,'(100(F16.10,X))')dm_tmp(:,i) - do j = 1, mo_num - if(i==j)cycle - accu_nd += dabs(dm_tmp(j,i)) - enddo + accu_d += dm_tmp(i,i) + !write(*,'(100(F16.10,X))')dm_tmp(:,i) + do j = 1, mo_num + if(i==j)cycle + accu_nd += dabs(dm_tmp(j,i)) + enddo enddo - print*,'Trace of the overlap between TC natural orbitals ',accu_d - print*,'L1 norm of extra diagonal elements of overlap matrix ',accu_nd + print *, ' Trace of the overlap between TC natural orbitals ', accu_d + print *, ' L1 norm of extra diagonal elements of overlap matrix ', accu_nd + + deallocate(dm_tmp, fock_diag) +END_PROVIDER + +! --- - END_PROVIDER - - BEGIN_PROVIDER [ double precision, fock_diag_sorted_r_natorb, (mo_num, mo_num)] - &BEGIN_PROVIDER [ double precision, fock_diag_sorted_l_natorb, (mo_num, mo_num)] - &BEGIN_PROVIDER [ double precision, fock_diag_sorted_v_natorb, (mo_num)] + BEGIN_PROVIDER [ double precision, fock_diag_sorted_r_natorb, (mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, fock_diag_sorted_l_natorb, (mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, fock_diag_sorted_v_natorb, (mo_num)] + implicit none - integer ::i,j,k - print*,'Diagonal elements of the Fock matrix before ' - do i = 1, mo_num - write(*,*)i,Fock_matrix_tc_mo_tot(i,i) - enddo + integer :: i,j,k + integer, allocatable :: iorder(:) double precision, allocatable :: fock_diag(:) + + print *, ' Diagonal elements of the Fock matrix before ' + + do i = 1, mo_num + write(*,*) i, Fock_matrix_tc_mo_tot(i,i) + enddo + allocate(fock_diag(mo_num)) fock_diag = 0.d0 do i = 1, mo_num - fock_diag(i) = 0.d0 - do j = 1, mo_num - do k = 1, mo_num - fock_diag(i) += natorb_tc_leigvec_mo(k,i) * Fock_matrix_tc_mo_tot(k,j) * natorb_tc_reigvec_mo(j,i) + fock_diag(i) = 0.d0 + do j = 1, mo_num + do k = 1, mo_num + fock_diag(i) += natorb_tc_leigvec_mo(k,i) * Fock_matrix_tc_mo_tot(k,j) * natorb_tc_reigvec_mo(j,i) + enddo enddo - enddo enddo - integer, allocatable :: iorder(:) + allocate(iorder(mo_num)) do i = 1, mo_num iorder(i) = i enddo - call dsort(fock_diag,iorder,mo_num) - print*,'Diagonal elements of the Fock matrix after ' + call dsort(fock_diag, iorder, mo_num) + + print *, ' Diagonal elements of the Fock matrix after ' do i = 1, mo_num - write(*,*)i,fock_diag(i) + write(*,*) i, fock_diag(i) enddo + deallocate(fock_diag) + do i = 1, mo_num - fock_diag_sorted_v_natorb(i) = natorb_tc_eigval(iorder(i)) - do j = 1, mo_num - fock_diag_sorted_r_natorb(j,i) = natorb_tc_reigvec_mo(j,iorder(i)) - fock_diag_sorted_l_natorb(j,i) = natorb_tc_leigvec_mo(j,iorder(i)) - enddo + fock_diag_sorted_v_natorb(i) = natorb_tc_eigval(iorder(i)) + do j = 1, mo_num + fock_diag_sorted_r_natorb(j,i) = natorb_tc_reigvec_mo(j,iorder(i)) + fock_diag_sorted_l_natorb(j,i) = natorb_tc_leigvec_mo(j,iorder(i)) + enddo enddo + deallocate(iorder) - END_PROVIDER +END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_ao, (ao_num, mo_num)] +&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_ao, (ao_num, mo_num)] +&BEGIN_PROVIDER [ double precision, overlap_natorb_tc_eigvec_ao, (mo_num, mo_num) ] - BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_ao, (ao_num, mo_num)] - &BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_ao, (ao_num, mo_num)] - &BEGIN_PROVIDER [ double precision, overlap_natorb_tc_eigvec_ao, (mo_num, mo_num) ] + BEGIN_DOC + ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP + ! + ! THE OVERLAP SHOULD BE THE SAME AS overlap_natorb_tc_eigvec_mo + END_DOC - BEGIN_DOC - ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP - ! - ! THE OVERLAP SHOULD BE THE SAME AS overlap_natorb_tc_eigvec_mo - END_DOC - - implicit none - integer :: i, j, k, q, p - double precision :: accu, accu_d - double precision, allocatable :: tmp(:,:) + implicit none + integer :: i, j, k, q, p + double precision :: accu, accu_d + double precision, allocatable :: tmp(:,:) ! ! MO_R x R diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 4c35d88c..a206dfa9 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -86,7 +86,7 @@ default: False type: Threshold doc: Threshold on the convergence of the Hartree Fock energy. interface: ezfio,provider,ocaml -default: 1.e-10 +default: 1.e-12 [n_it_tcscf_max] type: Strictly_positive_int @@ -118,7 +118,6 @@ doc: type of 1-body Jastrow interface: ezfio, provider, ocaml default: 0 - [thr_degen_tc] type: Threshold doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue @@ -130,3 +129,40 @@ type: logical doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors interface: ezfio,provider,ocaml default: False + +[ng_fit_jast] +type: integer +doc: nb of Gaussians used to fit Jastrow fcts +interface: ezfio,provider,ocaml +default: 6 + +[max_dim_diis_tcscf] +type: integer +doc: Maximum size of the DIIS extrapolation procedure +interface: ezfio,provider,ocaml +default: 15 + +[threshold_diis_tcscf] +type: Threshold +doc: Threshold on the convergence of the DIIS error vector during a TCSCF calculation. If 0. is chosen, the square root of thresh_tcscf will be used. +interface: ezfio,provider,ocaml +default: 0. + +[level_shift_tcscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve TCSCF convergence +interface: ezfio,provider,ocaml +default: 0. + +[tcscf_algorithm] +type: character*(32) +doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] +interface: ezfio,provider,ocaml +default: DIIS + +[im_thresh_tcscf] +type: Threshold +doc: Thresholds on the Imag part of energy +interface: ezfio,provider,ocaml +default: 1.e-7 + diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f index 00558de6..9c571f8a 100644 --- a/src/tc_scf/diago_bi_ort_tcfock.irp.f +++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f @@ -1,3 +1,5 @@ +! --- + BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)] @@ -9,28 +11,50 @@ implicit none integer :: n_real_tc - integer :: i, k, l + integer :: i, j, k, l double precision :: accu_d, accu_nd, accu_tmp + double precision :: thr_d, thr_nd double precision :: norm double precision, allocatable :: eigval_right_tmp(:) + double precision, allocatable :: F_tmp(:,:) - allocate( eigval_right_tmp(mo_num) ) + thr_d = 1d-6 + thr_nd = 1d-6 + + allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) ) PROVIDE Fock_matrix_tc_mo_tot - call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot & - , fock_tc_leigvec_mo, fock_tc_reigvec_mo & - , n_real_tc, eigval_right_tmp ) + do i = 1, mo_num + do j = 1, mo_num + F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i) + enddo + enddo + ! insert level shift here + do i = elec_beta_num+1, elec_alpha_num + F_tmp(i,i) += 0.5d0 * level_shift_tcscf + enddo + do i = elec_alpha_num+1, mo_num + F_tmp(i,i) += level_shift_tcscf + enddo + + call non_hrmt_bieig( mo_num, F_tmp, thr_d, thr_nd & + , fock_tc_leigvec_mo, fock_tc_reigvec_mo & + , n_real_tc, eigval_right_tmp ) + !if(max_ov_tc_scf)then -! call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot & -! , fock_tc_leigvec_mo, fock_tc_reigvec_mo & -! , n_real_tc, eigval_right_tmp ) + ! call non_hrmt_fock_mat( mo_num, F_tmp, thr_d, thr_nd & + ! , fock_tc_leigvec_mo, fock_tc_reigvec_mo & + ! , n_real_tc, eigval_right_tmp ) !else - ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, Fock_matrix_tc_mo_tot & + ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp & ! , fock_tc_leigvec_mo, fock_tc_reigvec_mo & ! , n_real_tc, eigval_right_tmp ) !endif + deallocate(F_tmp) + + ! if(n_real_tc .ne. mo_num)then ! print*,'n_real_tc ne mo_num ! ',n_real_tc ! stop @@ -38,9 +62,12 @@ eigval_fock_tc_mo = eigval_right_tmp ! print*,'Eigenvalues of Fock_matrix_tc_mo_tot' -! do i = 1, mo_num +! do i = 1, elec_alpha_num ! print*, i, eigval_fock_tc_mo(i) ! enddo +! do i = elec_alpha_num+1, mo_num +! print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf +! enddo ! deallocate( eigval_right_tmp ) ! L.T x R @@ -49,6 +76,8 @@ , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) & , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) ) + ! --- + accu_d = 0.d0 accu_nd = 0.d0 do i = 1, mo_num @@ -59,45 +88,80 @@ else accu_tmp = overlap_fock_tc_eigvec_mo(k,i) accu_nd += accu_tmp * accu_tmp - if(dabs(overlap_fock_tc_eigvec_mo(k,i)).gt.1.d-10)then - print*,'k,i',k,i,overlap_fock_tc_eigvec_mo(k,i) + if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then + print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i) endif endif enddo enddo - accu_nd = dsqrt(accu_nd)/accu_d - - if( accu_nd .gt. 1d-8 ) then + accu_nd = dsqrt(accu_nd) / accu_d + if(accu_nd .gt. thr_nd) then print *, ' bi-orthog failed' - print*,'accu_nd MO = ', accu_nd - print*,'overlap_fock_tc_eigvec_mo = ' + print *, ' accu_nd MO = ', accu_nd, thr_nd + print *, ' overlap_fock_tc_eigvec_mo = ' do i = 1, mo_num write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:) enddo - stop + stop endif - if( dabs(accu_d - dble(mo_num)) .gt. 1e-7 ) then - print *, 'mo_num = ', mo_num - print *, 'accu_d MO = ', accu_d - print *, 'normalizing vectors ...' + ! --- + + if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d) then + + print *, ' mo_num = ', mo_num + print *, ' accu_d MO = ', accu_d, thr_d + print *, ' normalizing vectors ...' do i = 1, mo_num norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i))) - if( norm.gt.1e-7 ) then + if(norm .gt. thr_d) then do k = 1, mo_num fock_tc_reigvec_mo(k,i) *= 1.d0/norm fock_tc_leigvec_mo(k,i) *= 1.d0/norm enddo endif enddo + call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0 & , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) & , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) & , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) ) + + accu_d = 0.d0 + accu_nd = 0.d0 + do i = 1, mo_num + do k = 1, mo_num + if(i==k) then + accu_tmp = overlap_fock_tc_eigvec_mo(k,i) + accu_d += dabs(accu_tmp) + else + accu_tmp = overlap_fock_tc_eigvec_mo(k,i) + accu_nd += accu_tmp * accu_tmp + if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then + print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i) + endif + endif + enddo + enddo + accu_nd = dsqrt(accu_nd) / accu_d + if(accu_nd .gt. thr_nd) then + print *, ' bi-orthog failed' + print *, ' accu_nd MO = ', accu_nd, thr_nd + print *, ' overlap_fock_tc_eigvec_mo = ' + do i = 1, mo_num + write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:) + enddo + stop + endif + endif + ! --- + END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)] &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)] &BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ] diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f new file mode 100644 index 00000000..ff1077f5 --- /dev/null +++ b/src/tc_scf/diis_tcscf.irp.f @@ -0,0 +1,186 @@ +! --- + +BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero_TCSCF ] + + implicit none + + if(threshold_DIIS_TCSCF == 0.d0) then + threshold_DIIS_nonzero_TCSCF = dsqrt(thresh_tcscf) + else + threshold_DIIS_nonzero_TCSCF = threshold_DIIS_TCSCF + endif + ASSERT(threshold_DIIS_nonzero_TCSCF >= 0.d0) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ] + + BEGIN_DOC + ! + ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T + ! + ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk + ! + END_DOC + + implicit none + + Q_alpha = 0.d0 + call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0 & + , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & + , 0.d0, Q_alpha, size(Q_alpha, 1) ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ] + + BEGIN_DOC + ! + ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T + ! + ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk + ! + END_DOC + + implicit none + + Q_beta = 0.d0 + call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0 & + , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & + , 0.d0, Q_beta, size(Q_beta, 1) ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ] + + BEGIN_DOC + ! + ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T + ! + ! with: + ! | 1 if i = j = 1, ..., nb of occ orbitals + ! [eta_occ]_ij = | + ! | 0 otherwise + ! + ! the diis error is defines as: + ! e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao + ! with: + ! mo_l_coef.T x ao_overlap x mo_r_coef = I + ! F_mo = mo_l_coef.T x F_ao x mo_r_coef + ! F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T + ! + ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T + ! + ! at convergence: + ! F_mo x eta_occ - eta_occ x F_mo = 0 + ! ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0 + ! ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir + ! ==> Brillouin conditions + ! + END_DOC + + implicit none + + if(elec_alpha_num == elec_beta_num) then + Q_matrix = Q_alpha + Q_alpha + else + Q_matrix = Q_alpha + Q_beta + endif + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)] + + implicit none + double precision, allocatable :: tmp(:,:) + + allocate(tmp(ao_num,ao_num)) + + ! F x Q + call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0 & + , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), Q_matrix, size(Q_matrix, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + ! F x Q x S + call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0 & + , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) & + , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) ) + + ! S x Q + tmp = 0.d0 + call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0 & + , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) & + , 0.d0, tmp, size(tmp, 1) ) + + ! F x Q x S - S x Q x F + call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0 & + , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) & + , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) ) + + deallocate(tmp) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)] + + implicit none + + call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) & + , FQS_SQF_mo, size(FQS_SQF_mo, 1) ) + +END_PROVIDER + +! --- + +! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ] +!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ] +! +! BEGIN_DOC +! ! +! ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis +! ! +! ! F' = X.T x F x X where X = ao_overlap^(-1/2) +! ! +! ! F' x Cr' = Cr' x E ==> F Cr = Cr x E with Cr = X x Cr' +! ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl' +! ! +! END_DOC +! +! implicit none +! double precision, allocatable :: tmp1(:,:), tmp2(:,:) +! +! ! --- +! ! Fock matrix in orthogonal basis: F' = X.T x F x X +! +! allocate(tmp1(ao_num,ao_num)) +! call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0 & +! , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) & +! , 0.d0, tmp1, size(tmp1, 1) ) +! +! allocate(tmp2(ao_num,ao_num)) +! call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0 & +! , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) & +! , 0.d0, tmp2, size(tmp2, 1) ) +! +! ! --- +! +! ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues +! ! TODO +! +! ! Back-transform eigenvectors: C =X.C' +! +!END_PROVIDER + +! --- + +~ diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index ca500fbb..c3642a7e 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -74,93 +74,109 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)] + two_e_tc_non_hermit_integral_beta END_PROVIDER -! --- - -!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ] -! implicit none -! BEGIN_DOC -! ! Total alpha+beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis -! END_DOC -! Fock_matrix_tc_ao_tot = 0.5d0 * (Fock_matrix_tc_ao_alpha + Fock_matrix_tc_ao_beta) -!END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] - implicit none + BEGIN_DOC - ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis + ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis END_DOC - if(bi_ortho)then - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & - , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) - if(three_body_h_tc)then - Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth - endif + + implicit none + + if(bi_ortho) then + + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & + , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + if(three_body_h_tc) then + Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth + endif + else - call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & + call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + endif + END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] - implicit none + BEGIN_DOC - ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis + ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis END_DOC - if(bi_ortho)then + + implicit none + + if(bi_ortho) then + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) - if(three_body_h_tc)then - Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth - endif + + if(three_body_h_tc) then + Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth + endif + else - call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & + + call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + endif + END_PROVIDER - -!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num, mo_num)] -! implicit none -! BEGIN_DOC -! ! Total alpha+beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis -! END_DOC -! Fock_matrix_tc_mo_tot = 0.5d0 * (Fock_matrix_tc_mo_alpha + Fock_matrix_tc_mo_beta) -! if(three_body_h_tc) then -! Fock_matrix_tc_mo_tot += fock_3_mat -! endif -! !call restore_symmetry(mo_num, mo_num, Fock_matrix_tc_mo_tot, mo_num, 1.d-10) -!END_PROVIDER - ! --- BEGIN_PROVIDER [ double precision, grad_non_hermit_left] &BEGIN_PROVIDER [ double precision, grad_non_hermit_right] &BEGIN_PROVIDER [ double precision, grad_non_hermit] - implicit none + + implicit none integer :: i, k - grad_non_hermit_left = 0.d0 + + grad_non_hermit_left = 0.d0 grad_non_hermit_right = 0.d0 + do i = 1, elec_beta_num ! doc --> SOMO do k = elec_beta_num+1, elec_alpha_num - grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i)) - grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) + grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) enddo enddo + do i = 1, elec_beta_num ! doc --> virt do k = elec_alpha_num+1, mo_num - grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i)) - grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) + grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) enddo enddo + do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt do k = elec_alpha_num+1, mo_num - grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i)) - grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k)) + grad_non_hermit_left += dabs(Fock_matrix_tc_mo_tot(k,i)) + grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k)) enddo enddo - grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right + + grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right + END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ] + + implicit none + + call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) & + , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) ) + +END_PROVIDER + +! --- + + diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f index f73a5049..35b6aac6 100644 --- a/src/tc_scf/fock_three.irp.f +++ b/src/tc_scf/fock_three.irp.f @@ -70,52 +70,72 @@ subroutine give_fock_ia_three_e_total(i,a,contrib) end +! --- + BEGIN_PROVIDER [double precision, diag_three_elem_hf] - implicit none - integer :: i,j,k,ipoint,mm - double precision :: contrib,weight,four_third,one_third,two_third,exchange_int_231 - print*,'providing diag_three_elem_hf' - if(.not.three_body_h_tc)then - diag_three_elem_hf = 0.d0 - else - if(.not.bi_ortho)then - one_third = 1.d0/3.d0 - two_third = 2.d0/3.d0 - four_third = 4.d0/3.d0 - diag_three_elem_hf = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call give_integrals_3_body(k,j,i,j,i,k,exchange_int_231) - diag_three_elem_hf += two_third * exchange_int_231 - enddo - enddo - enddo - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & - -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & - -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) - contrib *= four_third - contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & - - four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) - diag_three_elem_hf += weight * contrib - enddo - enddo - diag_three_elem_hf = - diag_three_elem_hf + + implicit none + integer :: i, j, k, ipoint, mm + double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 + double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb + + !print *, ' providing diag_three_elem_hf' + + if(.not. three_body_h_tc) then + + diag_three_elem_hf = 0.d0 + else - double precision :: integral_aaa,hthree, integral_aab,integral_abb,integral_bbb - provide mo_l_coef mo_r_coef - call give_aaa_contrib(integral_aaa) - call give_aab_contrib(integral_aab) - call give_abb_contrib(integral_abb) - call give_bbb_contrib(integral_bbb) - diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb + + if(.not. bi_ortho) then + + ! --- + + one_third = 1.d0/3.d0 + two_third = 2.d0/3.d0 + four_third = 4.d0/3.d0 + diag_three_elem_hf = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231) + diag_three_elem_hf += two_third * exchange_int_231 + enddo + enddo + enddo + do mm = 1, 3 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) + contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & + - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & + - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) + contrib *= four_third + contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & + -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) + diag_three_elem_hf += weight * contrib + enddo + enddo + + diag_three_elem_hf = - diag_three_elem_hf + + ! --- + + else + + provide mo_l_coef mo_r_coef + call give_aaa_contrib(integral_aaa) + call give_aab_contrib(integral_aab) + call give_abb_contrib(integral_abb) + call give_bbb_contrib(integral_bbb) + diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb + + endif + endif - endif + END_PROVIDER +! --- BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)] implicit none diff --git a/src/tc_scf/print_fit_param.irp.f b/src/tc_scf/print_fit_param.irp.f new file mode 100644 index 00000000..f8bcfa7f --- /dev/null +++ b/src/tc_scf/print_fit_param.irp.f @@ -0,0 +1,60 @@ +program print_fit_param + + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + + implicit none + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 +! my_n_pt_r_grid = 10 ! small grid for quick debug +! my_n_pt_a_grid = 26 ! small grid for quick debug + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + !call create_guess + !call orthonormalize_mos + + call main() + +end + +! --- + +subroutine main() + + implicit none + integer :: i + + mu_erf = 1.d0 + touch mu_erf + + print *, ' fit for (1 - erf(x))^2' + do i = 1, n_max_fit_slat + print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i) + enddo + + print *, '' + print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]' + do i = 1, n_max_fit_slat + print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i) + enddo + + print *, '' + print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2' + do i = 1, n_max_fit_slat + print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i) + enddo + + print *, '' + print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]' + do i = 1, n_max_fit_slat + print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i) + enddo + + return +end subroutine main + +! --- + diff --git a/src/tc_scf/rh_tcscf.irp.f b/src/tc_scf/rh_tcscf.irp.f new file mode 100644 index 00000000..597c3e67 --- /dev/null +++ b/src/tc_scf/rh_tcscf.irp.f @@ -0,0 +1,336 @@ +! --- + +subroutine rh_tcscf() + + BEGIN_DOC + ! + ! Roothaan-Hall algorithm for TC-SCF calculation + ! + END_DOC + + implicit none + + integer :: i, j + integer :: iteration_TCSCF, dim_DIIS, index_dim_DIIS + double precision :: energy_TCSCF, energy_TCSCF_1e, energy_TCSCF_2e, energy_TCSCF_3e, gradie_TCSCF + double precision :: energy_TCSCF_previous, delta_energy_TCSCF + double precision :: gradie_TCSCF_previous, delta_gradie_TCSCF + double precision :: max_error_DIIS_TCSCF + double precision :: level_shift_save + double precision :: delta_energy_tmp, delta_gradie_tmp + double precision, allocatable :: F_DIIS(:,:,:), e_DIIS(:,:,:) + double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:) + + logical, external :: qp_stop + + + !PROVIDE ao_md5 mo_occ + PROVIDE level_shift_TCSCF + + allocate( mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num) & + , F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), e_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF) ) + + F_DIIS = 0.d0 + e_DIIS = 0.d0 + mo_l_coef_save = 0.d0 + mo_r_coef_save = 0.d0 + + call write_time(6) + + ! --- + ! Initialize energies and density matrices + + energy_TCSCF_previous = TC_HF_energy + energy_TCSCF_1e = TC_HF_one_e_energy + energy_TCSCF_2e = TC_HF_two_e_energy + energy_TCSCF_3e = 0.d0 + if(three_body_h_tc) then + energy_TCSCF_3e = diag_three_elem_hf + endif + gradie_TCSCF_previous = grad_non_hermit + delta_energy_TCSCF = 1.d0 + delta_gradie_TCSCF = 1.d0 + iteration_TCSCF = 0 + dim_DIIS = 0 + max_error_DIIS_TCSCF = 1.d0 + + ! --- + + ! Start of main SCF loop + + PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot + + do while( (max_error_DIIS_TCSCF > threshold_DIIS_nonzero_TCSCF) .or. & + !(dabs(delta_energy_TCSCF) > thresh_TCSCF) .or. & + (dabs(gradie_TCSCF_previous) > dsqrt(thresh_TCSCF)) ) + + iteration_TCSCF += 1 + if(iteration_TCSCF > n_it_TCSCF_max) then + print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max + exit + endif + + ! current size of the DIIS space + dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF) + + ! --- + + if((tcscf_algorithm == 'DIIS') .and. (dabs(delta_energy_TCSCF) > 1.d-6)) then + + ! store Fock and error matrices at each iteration + index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1 + do j = 1, ao_num + do i = 1, ao_num + F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j) + e_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao(i,j) + enddo + enddo + + ! Compute the extrapolated Fock matrix + call extrapolate_TC_Fock_matrix( e_DIIS, F_DIIS & + , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) & + , iteration_TCSCF, dim_DIIS ) + + Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot + Fock_matrix_tc_ao_beta = 0.5d0 * Fock_matrix_tc_ao_tot + !TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta + + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & + , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta , size(Fock_matrix_tc_ao_beta , 1) & + , Fock_matrix_tc_mo_beta , size(Fock_matrix_tc_mo_beta , 1) ) + TOUCH Fock_matrix_tc_mo_alpha Fock_matrix_tc_mo_beta + + endif + + ! --- + + mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num) + TOUCH mo_l_coef mo_r_coef + + ! --- + + ! calculate error vectors + max_error_DIIS_TCSCF = maxval(abs(FQS_SQF_mo)) + + ! --- + + delta_energy_tmp = TC_HF_energy - energy_TCSCF_previous + delta_gradie_tmp = grad_non_hermit - gradie_TCSCF_previous + + ! --- + + do while((dabs(delta_energy_tmp) > 0.1d0) .and. (iteration_TCSCF > 1)) +! print *, ' very big step : ', delta_energy_tmp +! print *, ' TC level shift = ', level_shift_TCSCF + + mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) + + if(level_shift_TCSCF <= .1d0) then + level_shift_TCSCF = 1.d0 + else + level_shift_TCSCF = level_shift_TCSCF * 3.0d0 + endif + TOUCH mo_l_coef mo_r_coef level_shift_TCSCF + + mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num) + mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num) + TOUCH mo_l_coef mo_r_coef + + delta_energy_tmp = TC_HF_energy - energy_TCSCF_previous + + if(level_shift_TCSCF - level_shift_save > 40.d0) then + level_shift_TCSCF = level_shift_save * 4.d0 + SOFT_TOUCH level_shift_TCSCF + exit + endif + + dim_DIIS = 0 + enddo +! print *, ' very big step : ', delta_energy_tmp +! print *, ' TC level shift = ', level_shift_TCSCF + + ! --- + + level_shift_TCSCF = 0.d0 + !level_shift_TCSCF = level_shift_TCSCF * 0.5d0 + SOFT_TOUCH level_shift_TCSCF + + gradie_TCSCF = grad_non_hermit + energy_TCSCF = TC_HF_energy + energy_TCSCF_1e = TC_HF_one_e_energy + energy_TCSCF_2e = TC_HF_two_e_energy + energy_TCSCF_3e = 0.d0 + if(three_body_h_tc) then + energy_TCSCF_3e = diag_three_elem_hf + endif + delta_energy_TCSCF = energy_TCSCF - energy_TCSCF_previous + delta_gradie_TCSCF = gradie_TCSCF - gradie_TCSCF_previous + + energy_TCSCF_previous = energy_TCSCF + gradie_TCSCF_previous = gradie_TCSCF + + + level_shift_save = level_shift_TCSCF + mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num) + mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num) + + + print *, ' iteration = ', iteration_TCSCF + print *, ' total TC energy = ', energy_TCSCF + print *, ' 1-e TC energy = ', energy_TCSCF_1e + print *, ' 2-e TC energy = ', energy_TCSCF_2e + print *, ' 3-e TC energy = ', energy_TCSCF_3e + print *, ' |delta TC energy| = ', delta_energy_TCSCF + print *, ' TC gradient = ', gradie_TCSCF + print *, ' delta TC gradient = ', delta_gradie_TCSCF + print *, ' max TC DIIS error = ', max_error_DIIS_TCSCF + print *, ' TC DIIS dim = ', dim_DIIS + print *, ' TC level shift = ', level_shift_TCSCF + print *, ' ' + + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + + if(qp_stop()) exit + enddo + + ! --- + + call write_time(6) + + deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, e_DIIS) + +end + +! --- + +subroutine extrapolate_TC_Fock_matrix(e_DIIS, F_DIIS, F_ao, size_F_ao, iteration_TCSCF, dim_DIIS) + + BEGIN_DOC + ! + ! Compute the extrapolated Fock matrix using the DIIS procedure + ! + ! e = \sum_i c_i e_i and \sum_i c_i = 1 + ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1) + ! + END_DOC + + implicit none + + integer, intent(in) :: iteration_TCSCF, size_F_ao + integer, intent(inout) :: dim_DIIS + double precision, intent(in) :: F_DIIS(ao_num,ao_num,dim_DIIS) + double precision, intent(in) :: e_DIIS(ao_num,ao_num,dim_DIIS) + double precision, intent(inout) :: F_ao(size_F_ao,ao_num) + + double precision, allocatable :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:) + + integer :: i, j, k, l, i_DIIS, j_DIIS + integer :: lwork + double precision :: rcond, ferr, berr + integer, allocatable :: iwork(:) + double precision, allocatable :: scratch(:,:) + + if(dim_DIIS < 1) then + return + endif + + allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) & + , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) ) + + ! Compute the matrices B and X + B_matrix_DIIS(:,:) = 0.d0 + do j = 1, dim_DIIS + j_DIIS = min(dim_DIIS, mod(iteration_TCSCF-j, max_dim_DIIS_TCSCF)+1) + + do i = 1, dim_DIIS + i_DIIS = min(dim_DIIS, mod(iteration_TCSCF-i, max_dim_DIIS_TCSCF)+1) + + ! Compute product of two errors vectors + do l = 1, ao_num + do k = 1, ao_num + B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + e_DIIS(k,l,i_DIIS) * e_DIIS(k,l,j_DIIS) + enddo + enddo + + enddo + enddo + + ! Pad B matrix and build the X matrix + + C_vector_DIIS(:) = 0.d0 + do i = 1, dim_DIIS + B_matrix_DIIS(i,dim_DIIS+1) = -1.d0 + B_matrix_DIIS(dim_DIIS+1,i) = -1.d0 + enddo + C_vector_DIIS(dim_DIIS+1) = -1.d0 + + deallocate(scratch) + + ! Estimate condition number of B + integer :: info + double precision :: anorm + integer, allocatable :: ipiv(:) + double precision, allocatable :: AF(:,:) + double precision, external :: dlange + + lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5) + allocate(AF(dim_DIIS+1,dim_DIIS+1)) + allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) ) + allocate(scratch(lwork,1)) + scratch(:,1) = 0.d0 + + anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1)) + + AF(:,:) = B_matrix_DIIS(:,:) + call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info) + if(info /= 0) then + dim_DIIS = 0 + return + endif + + call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info) + if(info /= 0) then + dim_DIIS = 0 + return + endif + + if(rcond < 1.d-14) then + dim_DIIS = 0 + return + endif + + ! solve the linear system C = B x X + + X_vector_DIIS = C_vector_DIIS + call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info) + + deallocate(scratch, AF, iwork) + if(info < 0) then + stop ' bug in TC-DIIS' + endif + + ! Compute extrapolated Fock matrix + + !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200) + do j = 1, ao_num + do i = 1, ao_num + F_ao(i,j) = 0.d0 + enddo + do k = 1, dim_DIIS + if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle + do i = 1,ao_num + ! FPE here + F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +end + +! --- + diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f index d32d324d..d53991ed 100644 --- a/src/tc_scf/rotate_tcscf_orbitals.irp.f +++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f @@ -73,7 +73,7 @@ subroutine maximize_overlap() ! --- - call rotate_degen_eigvec(n, m, e, C, W, L, R) + call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R) ! --- @@ -115,7 +115,7 @@ end subroutine maximize_overlap ! --- -subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0) +subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0) implicit none @@ -124,7 +124,7 @@ subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0) double precision, intent(inout) :: L0(n,m), R0(n,m) - integer :: i, j, k, kk, mm, id1 + integer :: i, j, k, kk, mm, id1, tot_deg double precision :: ei, ej, de, de_thr integer, allocatable :: deg_num(:) double precision, allocatable :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:) @@ -162,12 +162,19 @@ subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0) enddo enddo + tot_deg = 0 do i = 1, m if(deg_num(i).gt.1) then print *, ' degen on', i, deg_num(i) + tot_deg = tot_deg + 1 endif enddo + if(tot_deg .eq. 0) then + print *, ' no degen' + return + endif + ! --- do i = 1, m @@ -243,6 +250,122 @@ subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0) deallocate(S, Snew, T) -end subroutine rotate_degen_eigvec +end subroutine rotate_degen_eigvec_to_maximize_overlap + +! --- + +subroutine fix_right_to_one() + + implicit none + integer :: i, j, m, n, mm, tot_deg + double precision :: accu_d, accu_nd + double precision :: de_thr, ei, ej, de + double precision :: thr_d, thr_nd + integer, allocatable :: deg_num(:) + double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:) + double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:) + + thr_d = 1d-7 + thr_nd = 1d-7 + + n = ao_num + m = mo_num + + allocate(L0(n,m), R0(n,m), W(n,n), e0(m)) + L0 = mo_l_coef + R0 = mo_r_coef + W = ao_overlap + + print*, ' fock matrix diag elements' + do i = 1, m + e0(i) = Fock_matrix_tc_mo_tot(i,i) + print*, e0(i) + enddo + + ! --- + + allocate( deg_num(m) ) + do i = 1, m + deg_num(i) = 1 + enddo + + de_thr = 1d-6 + + do i = 1, m-1 + ei = e0(i) + + ! already considered in degen vectors + if(deg_num(i).eq.0) cycle + + do j = i+1, m + ej = e0(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + deg_num(i) = deg_num(i) + 1 + deg_num(j) = 0 + endif + + enddo + enddo + + deallocate(e0) + + tot_deg = 0 + do i = 1, m + if(deg_num(i).gt.1) then + print *, ' degen on', i, deg_num(i) + tot_deg = tot_deg + 1 + endif + enddo + + if(tot_deg .eq. 0) then + print *, ' no degen' + return + endif + + ! --- + + do i = 1, m + mm = deg_num(i) + + if(mm .gt. 1) then + + allocate(L(n,mm), R(n,mm)) + do j = 1, mm + L(1:n,j) = L0(1:n,i+j-1) + R(1:n,j) = R0(1:n,i+j-1) + enddo + + ! --- + + call impose_weighted_orthog_svd(n, mm, W, R) + call impose_weighted_biorthog_qr(n, mm, thr_d, thr_nd, R, W, L) + + ! --- + + do j = 1, mm + L0(1:n,i+j-1) = L(1:n,j) + R0(1:n,i+j-1) = R(1:n,j) + enddo + deallocate(L, R) + + endif + enddo + + call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thr_d, thr_nd, .true.) + + deallocate(W, deg_num) + + mo_l_coef = L0 + mo_r_coef = R0 + deallocate(L0, R0) + + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + print *, ' orbitals are rotated ' + + return +end subroutine fix_right_to_one ! --- diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 51c218fa..15264768 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -1,261 +1,335 @@ -subroutine minimize_tc_orb_angles - implicit none - double precision :: thr_deg - logical :: good_angles - integer :: i - good_angles = .False. - thr_deg = thr_degen_tc - call print_energy_and_mos - i = 1 - print*,'Minimizing the angles between the TC orbitals' - do while (.not. good_angles) - print*,'iteration = ',i - call routine_save_rotated_mos(thr_deg,good_angles) - thr_deg *= 10.d0 - i+=1 - if(i.gt.100)then - print*,'minimize_tc_orb_angles does not seem to converge ..' - print*,'Something is weird in the tc orbitals ...' - print*,'STOPPING' - endif - enddo - print*,'Converged ANGLES MINIMIZATION !!' - call print_angles_tc - call print_energy_and_mos + +! --- + +subroutine minimize_tc_orb_angles() + + implicit none + logical :: good_angles + integer :: i + double precision :: thr_deg + + good_angles = .False. + thr_deg = thr_degen_tc + + call print_energy_and_mos() + + print *, ' Minimizing the angles between the TC orbitals' + i = 1 + do while (.not. good_angles) + print *, ' iteration = ', i + call routine_save_rotated_mos(thr_deg, good_angles) + thr_deg *= 10.d0 + i += 1 + if(i .gt. 100) then + print *, ' minimize_tc_orb_angles does not seem to converge ..' + print *, ' Something is weird in the tc orbitals ...' + print *, ' STOPPING' + stop + endif + enddo + print *, ' Converged ANGLES MINIMIZATION !!' + + call print_angles_tc() + call print_energy_and_mos() + end -subroutine routine_save_rotated_mos(thr_deg,good_angles) - implicit none - double precision, intent(in) :: thr_deg - logical, intent(out) :: good_angles - good_angles = .False. - integer :: i,j,k,n_degen_list,m,n,n_degen,ilast,ifirst - double precision, allocatable :: mo_r_coef_good(:,:),mo_l_coef_good(:,:) - allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) - double precision, allocatable :: mo_r_coef_new(:,:) - double precision :: norm - print*,'***************************************' - print*,'***************************************' - print*,'THRESHOLD FOR DEGENERACIES ::: ',thr_deg - print*,'***************************************' - print*,'***************************************' - print*,'Starting with the following TC energy gradient :',grad_non_hermit - mo_r_coef_good = mo_r_coef - mo_l_coef_good = mo_l_coef - allocate(mo_r_coef_new(ao_num, mo_num)) - mo_r_coef_new = mo_r_coef - do i = 1, mo_num - norm = 1.d0/dsqrt(overlap_mo_r(i,i)) - do j = 1, ao_num - mo_r_coef_new(j,i) *= norm +! --- + +subroutine routine_save_rotated_mos(thr_deg, good_angles) + + implicit none + + double precision, intent(in) :: thr_deg + logical, intent(out) :: good_angles + + integer :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst + double precision :: max_angle, norm + integer, allocatable :: list_degen(:,:) + double precision, allocatable :: new_angles(:) + double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:) + double precision, allocatable :: mo_r_coef_new(:,:) + double precision, allocatable :: fock_diag(:),s_mat(:,:) + double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:) + double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:) + + good_angles = .False. + + allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) + + print *, ' ***************************************' + print *, ' ***************************************' + print *, ' THRESHOLD FOR DEGENERACIES ::: ', thr_deg + print *, ' ***************************************' + print *, ' ***************************************' + print *, ' Starting with the following TC energy gradient :', grad_non_hermit + + mo_r_coef_good = mo_r_coef + mo_l_coef_good = mo_l_coef + + allocate(mo_r_coef_new(ao_num, mo_num)) + mo_r_coef_new = mo_r_coef + do i = 1, mo_num + norm = 1.d0/dsqrt(overlap_mo_r(i,i)) + do j = 1, ao_num + mo_r_coef_new(j,i) *= norm + enddo enddo - enddo - double precision, allocatable :: fock_diag(:),s_mat(:,:) - integer, allocatable :: list_degen(:,:) - allocate(list_degen(mo_num,0:mo_num),s_mat(mo_num,mo_num),fock_diag(mo_num)) - do i = 1, mo_num - fock_diag(i) = Fock_matrix_tc_mo_tot(i,i) - enddo + + allocate(list_degen(mo_num,0:mo_num), s_mat(mo_num,mo_num), fock_diag(mo_num)) + do i = 1, mo_num + fock_diag(i) = Fock_matrix_tc_mo_tot(i,i) + enddo + ! compute the overlap between the left and rescaled right - call build_s_matrix(ao_num,mo_num,mo_r_coef_new,mo_r_coef_new,ao_overlap,s_mat) + call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) - call give_degen_full_list(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) - print*,'fock_matrix_mo' - do i = 1, mo_num - print*,i,fock_diag(i),angle_left_right(i) - enddo + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + print *, ' fock_matrix_mo' + do i = 1, mo_num + print *, i, fock_diag(i), angle_left_right(i) + enddo - do i = 1, n_degen_list + do i = 1, n_degen_list ! ifirst = list_degen(1,i) ! ilast = list_degen(2,i) ! n_degen = ilast - ifirst +1 - n_degen = list_degen(i,0) - double precision, allocatable :: stmp(:,:),T(:,:),Snew(:,:),smat2(:,:) - double precision, allocatable :: mo_l_coef_tmp(:,:),mo_r_coef_tmp(:,:),mo_l_coef_new(:,:) - allocate(stmp(n_degen,n_degen),smat2(n_degen,n_degen)) - allocate(mo_r_coef_tmp(ao_num,n_degen),mo_l_coef_tmp(ao_num,n_degen),mo_l_coef_new(ao_num,n_degen)) - allocate(T(n_degen,n_degen),Snew(n_degen,n_degen)) - do j = 1, n_degen - mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) - mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) - enddo - ! Orthogonalization of right functions - print*,'Orthogonalization of RIGHT functions' - print*,'------------------------------------' - call orthog_functions(ao_num,n_degen,mo_r_coef_tmp,ao_overlap) - ! Orthogonalization of left functions - print*,'Orthogonalization of LEFT functions' - print*,'------------------------------------' - call orthog_functions(ao_num,n_degen,mo_l_coef_tmp,ao_overlap) - print*,'Overlap lef-right ' - call build_s_matrix(ao_num,n_degen,mo_r_coef_tmp,mo_l_coef_tmp,ao_overlap,stmp) - do j = 1, n_degen - write(*,'(100(F8.4,X))')stmp(:,j) + n_degen = list_degen(i,0) + if(n_degen .eq. 1) cycle + + allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) + allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen)) + allocate(T(n_degen,n_degen), Snew(n_degen,n_degen)) + + do j = 1, n_degen + mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j)) + mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j)) + enddo + ! Orthogonalization of right functions + print *, ' Orthogonalization of RIGHT functions' + print *, ' ------------------------------------' + call orthog_functions(ao_num, n_degen, mo_r_coef_tmp, ao_overlap) + + ! Orthogonalization of left functions + print *, ' Orthogonalization of LEFT functions' + print *, ' ------------------------------------' + call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap) + + print *, ' Overlap left-right ' + call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp) + do j = 1, n_degen + write(*,'(100(F8.4,X))') stmp(:,j) + enddo + call build_s_matrix(ao_num, n_degen, mo_l_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp) + + !print*,'LEFT/LEFT OVERLAP ' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp) + !print*,'RIGHT/RIGHT OVERLAP ' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + + if(maxovl_tc) then + T = 0.d0 + Snew = 0.d0 + call maxovl(n_degen, n_degen, stmp, T, Snew) + !print*,'overlap after' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')Snew(:,j) + !enddo + call dgemm( 'N', 'N', ao_num, n_degen, n_degen, 1.d0 & + , mo_l_coef_tmp, size(mo_l_coef_tmp, 1), T(1,1), size(T, 1) & + , 0.d0, mo_l_coef_new, size(mo_l_coef_new, 1) ) + call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp) + !print*,'Overlap test' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + else + mo_l_coef_new = mo_l_coef_tmp + endif + + call impose_weighted_biorthog_svd(ao_num, n_degen, ao_overlap, mo_l_coef_new, mo_r_coef_tmp) + + !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp) + !print*,'LAST OVERLAP ' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_l_coef_new, ao_overlap, stmp) + !print*,'LEFT OVERLAP ' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + !call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp) + !print*,'RIGHT OVERLAP ' + !do j = 1, n_degen + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + do j = 1, n_degen +!!! mo_l_coef_good(1:ao_num,j+ifirst-1) = mo_l_coef_new(1:ao_num,j) +!!! mo_r_coef_good(1:ao_num,j+ifirst-1) = mo_r_coef_tmp(1:ao_num,j) + mo_l_coef_good(1:ao_num,list_degen(i,j)) = mo_l_coef_new(1:ao_num,j) + mo_r_coef_good(1:ao_num,list_degen(i,j)) = mo_r_coef_tmp(1:ao_num,j) + enddo + + deallocate(stmp, smat2) + deallocate(mo_r_coef_tmp, mo_l_coef_tmp, mo_l_coef_new) + deallocate(T, Snew) enddo - call build_s_matrix(ao_num,n_degen,mo_l_coef_tmp,mo_l_coef_tmp,ao_overlap,stmp) + + !allocate(stmp(mo_num, mo_num)) + !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_r_coef_good, ao_overlap, stmp) + !print*,'LEFT/RIGHT OVERLAP ' + !do j = 1, mo_num + ! write(*,'(100(F16.10,X))')stmp(:,j) + !enddo + !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_l_coef_good, ao_overlap, stmp) !print*,'LEFT/LEFT OVERLAP ' - !do j = 1, n_degen + !do j = 1, mo_num ! write(*,'(100(F16.10,X))')stmp(:,j) !enddo - call build_s_matrix(ao_num,n_degen,mo_r_coef_tmp,mo_r_coef_tmp,ao_overlap,stmp) + !call build_s_matrix(ao_num, mo_num, mo_r_coef_good, mo_r_coef_good, ao_overlap, stmp) !print*,'RIGHT/RIGHT OVERLAP ' - !do j = 1, n_degen + !do j = 1, mo_num ! write(*,'(100(F16.10,X))')stmp(:,j) !enddo - if(maxovl_tc)then - T = 0.d0 - Snew = 0.d0 - call maxovl(n_degen, n_degen, stmp, T, Snew) - !print*,'overlap after' - !do j = 1, n_degen - ! write(*,'(100(F16.10,X))')Snew(:,j) - !enddo - call dgemm( 'N', 'N', ao_num, n_degen, n_degen, 1.d0 & - , mo_l_coef_tmp, size(mo_l_coef_tmp, 1), T(1,1), size(T, 1) & - , 0.d0, mo_l_coef_new, size(mo_l_coef_new, 1) ) - call build_s_matrix(ao_num,n_degen,mo_l_coef_new,mo_r_coef_tmp,ao_overlap,stmp) - !print*,'Overlap test' - !do j = 1, n_degen - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo - else - mo_l_coef_new = mo_l_coef_tmp - endif - call impose_biorthog_svd_overlap(ao_num, n_degen, ao_overlap, mo_l_coef_new, mo_r_coef_tmp) - call build_s_matrix(ao_num,n_degen,mo_l_coef_new,mo_r_coef_tmp,ao_overlap,stmp) - !print*,'LAST OVERLAP ' - !do j = 1, n_degen - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo - call build_s_matrix(ao_num,n_degen,mo_l_coef_new,mo_l_coef_new,ao_overlap,stmp) - !print*,'LEFT OVERLAP ' - !do j = 1, n_degen - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo - call build_s_matrix(ao_num,n_degen,mo_r_coef_tmp,mo_r_coef_tmp,ao_overlap,stmp) - !print*,'RIGHT OVERLAP ' - !do j = 1, n_degen - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo - do j = 1, n_degen -! mo_l_coef_good(1:ao_num,j+ifirst-1) = mo_l_coef_new(1:ao_num,j) -! mo_r_coef_good(1:ao_num,j+ifirst-1) = mo_r_coef_tmp(1:ao_num,j) - mo_l_coef_good(1:ao_num,list_degen(i,j)) = mo_l_coef_new(1:ao_num,j) - mo_r_coef_good(1:ao_num,list_degen(i,j)) = mo_r_coef_tmp(1:ao_num,j) - enddo - deallocate(stmp,smat2) - deallocate(mo_r_coef_tmp,mo_l_coef_tmp,mo_l_coef_new) - deallocate(T,Snew) - enddo - allocate(stmp(mo_num, mo_num)) - call build_s_matrix(ao_num,mo_num,mo_l_coef_good,mo_r_coef_good,ao_overlap,stmp) - !print*,'LEFT/RIGHT OVERLAP ' - !do j = 1, mo_num - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo - call build_s_matrix(ao_num,mo_num,mo_l_coef_good,mo_l_coef_good,ao_overlap,stmp) - !print*,'LEFT/LEFT OVERLAP ' - !do j = 1, mo_num - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo - call build_s_matrix(ao_num,mo_num,mo_r_coef_good,mo_r_coef_good,ao_overlap,stmp) - !print*,'RIGHT/RIGHT OVERLAP ' - !do j = 1, mo_num - ! write(*,'(100(F16.10,X))')stmp(:,j) - !enddo mo_r_coef = mo_r_coef_good mo_l_coef = mo_l_coef_good call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) TOUCH mo_l_coef mo_r_coef - double precision, allocatable :: new_angles(:) + allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) - double precision :: max_angle max_angle = maxval(new_angles) good_angles = max_angle.lt.45.d0 - print*,'max_angle = ',max_angle + print *, ' max_angle = ', max_angle end -subroutine build_s_matrix(m,n,C1,C2,overlap,smat) - implicit none - integer, intent(in) :: m,n - double precision, intent(in) :: C1(m,n),C2(m,n),overlap(m,m) - double precision, intent(out):: smat(n,n) - integer :: i,j,k,l - smat = 0.D0 - do i = 1, n - do j = 1, n - do k = 1, m - do l = 1, m - smat(i,j) += C1(k,i) * overlap(l,k) * C2(l,j) - enddo - enddo - enddo - enddo +! --- + +subroutine build_s_matrix(m, n, C1, C2, overlap, smat) + + implicit none + integer, intent(in) :: m, n + double precision, intent(in) :: C1(m,n), C2(m,n), overlap(m,m) + double precision, intent(out) :: smat(n,n) + integer :: i, j, k, l + double precision, allocatable :: S_tmp(:,:) + + smat = 0.d0 + + !do i = 1, n + ! do j = 1, n + ! do k = 1, m + ! do l = 1, m + ! smat(i,j) += C1(k,i) * overlap(l,k) * C2(l,j) + ! enddo + ! enddo + ! enddo + !enddo + + ! C1.T x overlap + allocate(S_tmp(n,m)) + call dgemm( 'T', 'N', n, m, m, 1.d0 & + , C1, size(C1, 1), overlap, size(overlap, 1) & + , 0.d0, S_tmp, size(S_tmp, 1) ) + ! C1.T x overlap x C2 + call dgemm( 'N', 'N', n, n, m, 1.d0 & + , S_tmp, size(S_tmp, 1), C2(1,1), size(C2, 1) & + , 0.d0, smat, size(smat, 1) ) + deallocate(S_tmp) + end -subroutine orthog_functions(m,n,coef,overlap) - implicit none - integer, intent(in) :: m,n - double precision, intent(in) :: overlap(m,m) - double precision, intent(inout) :: coef(m,n) - double precision, allocatable :: stmp(:,:) - integer :: j - allocate(stmp(n,n)) - call build_s_matrix(m,n,coef,coef,overlap,stmp) +! --- + +subroutine orthog_functions(m, n, coef, overlap) + + implicit none + + integer, intent(in) :: m, n + double precision, intent(in) :: overlap(m,m) + double precision, intent(inout) :: coef(m,n) + double precision, allocatable :: stmp(:,:) + integer :: j + + allocate(stmp(n,n)) + call build_s_matrix(m, n, coef, coef, overlap, stmp) ! print*,'overlap before' ! do j = 1, n ! write(*,'(100(F16.10,X))')stmp(:,j) ! enddo - call impose_orthog_svd_overlap(m, n, coef,overlap) - call build_s_matrix(m,n,coef,coef,overlap,stmp) + call impose_orthog_svd_overlap(m, n, coef, overlap) + call build_s_matrix(m, n, coef, coef, overlap, stmp) do j = 1, n - coef(1,:m) *= 1.d0/dsqrt(stmp(j,j)) + coef(1,:m) *= 1.d0/dsqrt(stmp(j,j)) enddo - call build_s_matrix(m,n,coef,coef,overlap,stmp) + call build_s_matrix(m, n, coef, coef, overlap, stmp) + !print*,'overlap after' !do j = 1, n ! write(*,'(100(F16.10,X))')stmp(:,j) !enddo + + deallocate(stmp) + end -subroutine print_angles_tc - implicit none - integer :: i,j - double precision :: left,right - print*,'product of norms, angle between vectors' - do i = 1, mo_num - left = overlap_mo_l(i,i) - right = overlap_mo_r(i,i) -! print*,Fock_matrix_tc_mo_tot(i,i),left*right,angle_left_right(i) - print*,left*right,angle_left_right(i) - enddo -end +! --- -subroutine print_energy_and_mos - implicit none - integer :: i - print*,'' - print*,'TC energy = ', TC_HF_energy - print*,'TC SCF energy gradient = ',grad_non_hermit - print*,'Max angle Left/right = ',max_angle_left_right - if(max_angle_left_right.lt.45.d0)then - print*,'Maximum angle BELOW 45 degrees, everthing is OK !' - else if(max_angle_left_right.gt.45.d0.and.max_angle_left_right.lt.75.d0)then - print*,'Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' - else if(max_angle_left_right.gt.75.d0)then - print*,'Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' - endif - print*,'Diag Fock elem, product of left/right norm, angle left/right ' +subroutine print_angles_tc() + + implicit none + integer :: i, j + double precision :: left, right + + print *, ' product of norms, angle between vectors' do i = 1, mo_num - write(*,'(I3,X,100(F16.10,X))')i,Fock_matrix_tc_mo_tot(i,i),overlap_mo_l(i,i)*overlap_mo_r(i,i),angle_left_right(i) + left = overlap_mo_l(i,i) + right = overlap_mo_r(i,i) +! print*,Fock_matrix_tc_mo_tot(i,i),left*right,angle_left_right(i) + print *, left*right, angle_left_right(i) enddo + end +! --- + +subroutine print_energy_and_mos() + + implicit none + integer :: i + + print *, ' ' + print *, ' TC energy = ', TC_HF_energy + print *, ' TC SCF energy gradient = ', grad_non_hermit + print *, ' Max angle Left/right = ', max_angle_left_right + + if(max_angle_left_right .lt. 45.d0) then + print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' + else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then + print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + else if(max_angle_left_right .gt. 75.d0) then + print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' + endif + + print *, ' Diag Fock elem, product of left/right norm, angle left/right ' + do i = 1, mo_num + write(*, '(I3,X,100(F16.10,X))') i, Fock_matrix_tc_mo_tot(i,i), overlap_mo_l(i,i)*overlap_mo_r(i,i), angle_left_right(i) + enddo + +end + +! --- + subroutine sort_by_tc_fock implicit none integer, allocatable :: iorder(:) @@ -276,3 +350,4 @@ subroutine sort_by_tc_fock touch mo_l_coef mo_r_coef end + diff --git a/src/tc_scf/tc_petermann_factor.irp.f b/src/tc_scf/tc_petermann_factor.irp.f new file mode 100644 index 00000000..d3722098 --- /dev/null +++ b/src/tc_scf/tc_petermann_factor.irp.f @@ -0,0 +1,78 @@ + +! --- + +program tc_petermann_factor + + BEGIN_DOC + ! TODO : Put the documentation of the program here + END_DOC + + implicit none + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 +! my_n_pt_r_grid = 10 ! small grid for quick debug +! my_n_pt_a_grid = 26 ! small grid for quick debug + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call main() + +end + +! --- + +subroutine main() + + implicit none + integer :: i, j + double precision :: Pf_diag_av + double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:) + + allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num)) + + call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & + , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & + , 0.d0, Sl, size(Sl, 1) ) + + print *, '' + print *, ' left-orthog matrix:' + do i = 1, mo_num + write(*,'(100(F8.4,X))') Sl(:,i) + enddo + + call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & + , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) & + , 0.d0, Sr, size(Sr, 1) ) + + print *, '' + print *, ' right-orthog matrix:' + do i = 1, mo_num + write(*,'(100(F8.4,X))') Sr(:,i) + enddo + + print *, '' + print *, ' Petermann matrix:' + do i = 1, mo_num + do j = 1, mo_num + Pf(j,i) = Sl(j,i) * Sr(j,i) + enddo + write(*,'(100(F8.4,X))') Pf(:,i) + enddo + + Pf_diag_av = 0.d0 + do i = 1, mo_num + Pf_diag_av = Pf_diag_av + Pf(i,i) + enddo + Pf_diag_av = Pf_diag_av / dble(mo_num) + + print *, '' + print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av + + deallocate(Sl, Sr, Pf) + + return +end subroutine + +! --- + diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 4a875b59..283ec2ae 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -15,13 +15,18 @@ program tc_scf ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - !call create_guess - !call orthonormalize_mos + call create_guess() + call orthonormalize_mos() - call routine_scf() - call minimize_tc_orb_angles - call print_energy_and_mos + PROVIDE tcscf_algorithm + if(tcscf_algorithm == 'DIIS') then + call rh_tcscf() + else + call simple_tcscf() + endif + call minimize_tc_orb_angles() + call print_energy_and_mos() end @@ -37,7 +42,8 @@ subroutine create_guess logical :: exists PROVIDE ezfio_filename - call ezfio_has_mo_basis_mo_coef(exists) + !call ezfio_has_mo_basis_mo_coef(exists) + exists = .false. if (.not.exists) then mo_label = 'Guess' @@ -64,7 +70,7 @@ end subroutine create_guess ! --- -subroutine routine_scf() +subroutine simple_tcscf() implicit none integer :: i, j, it @@ -79,9 +85,9 @@ subroutine routine_scf() !print*,'grad_hermit = ', grad_hermit print*,'***' print*,'TC HF total energy = ', TC_HF_energy - print*,'TC HF 1 e energy = ', TC_HF_one_electron_energy + print*,'TC HF 1 e energy = ', TC_HF_one_e_energy print*,'TC HF 2 e energy = ', TC_HF_two_e_energy - if(three_body_h_tc)then + if(three_body_h_tc) then print*,'TC HF 3 body = ', diag_three_elem_hf endif print*,'***' @@ -99,10 +105,9 @@ subroutine routine_scf() call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) TOUCH mo_l_coef mo_r_coef - else - print*,'grad_hermit = ',grad_hermit + print *, ' grad_hermit = ', grad_hermit call save_good_hermit_tc_eigvectors TOUCH mo_coef call save_mos @@ -113,58 +118,70 @@ subroutine routine_scf() if(bi_ortho) then - !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. dsqrt(thresh_tcscf)) ) - !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. thresh_tcscf) ) - !do while( it .lt. n_it_tcscf_max .and. (rho_delta .gt. thresh_tcscf) ) - do while( it .lt. n_it_tcscf_max .and. (grad_non_hermit_right.gt. dsqrt(thresh_tcscf)) ) + !do while(e_delta .gt. dsqrt(thresh_tcscf)) ) + !do while(e_delta .gt. thresh_tcscf) ) + !do while(rho_delta .gt. thresh_tcscf) ) + !do while(grad_non_hermit_right .gt. dsqrt(thresh_tcscf)) + do while(grad_non_hermit .gt. dsqrt(thresh_tcscf)) it += 1 - print*,'iteration = ', it - print*,'***' - print*,'TC HF total energy = ', TC_HF_energy - print*,'TC HF 1 e energy = ', TC_HF_one_electron_energy - print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy - if(three_body_h_tc)then - print*,'TC HF 3 body = ', diag_three_elem_hf + if(it > n_it_tcscf_max) then + print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max + exit endif - print*,'***' - e_delta = dabs( TC_HF_energy - e_save ) - print*, 'it, delta E = ', it, e_delta - print*, 'it, gradient= ',grad_non_hermit_right + + + print *, ' ***' + print *, ' iteration = ', it + + print *, ' TC HF total energy = ', TC_HF_energy + print *, ' TC HF 1 e energy = ', TC_HF_one_e_energy + print *, ' TC HF 2 non hermit = ', TC_HF_two_e_energy + if(three_body_h_tc) then + print *, ' TC HF 3 body = ', diag_three_elem_hf + endif + e_delta = dabs(TC_HF_energy - e_save) + + print *, ' delta E = ', e_delta + print *, ' gradient = ', grad_non_hermit + !print *, ' gradient= ', grad_non_hermit_right + + !rho_new = TCSCF_bi_ort_dm_ao + !!print*, rho_new + !rho_delta = 0.d0 + !do i = 1, ao_num + ! do j = 1, ao_num + ! rho_delta += dabs(rho_new(j,i) - rho_old(j,i)) + ! enddo + !enddo + !print *, ' rho_delta =', rho_delta + !rho_old = rho_new + e_save = TC_HF_energy mo_l_coef = fock_tc_leigvec_ao mo_r_coef = fock_tc_reigvec_ao - - rho_new = TCSCF_bi_ort_dm_ao - !print*, rho_new - rho_delta = 0.d0 - do i = 1, ao_num - do j = 1, ao_num - rho_delta += dabs(rho_new(j,i) - rho_old(j,i)) - enddo - enddo - print*, ' rho_delta =', rho_delta - rho_old = rho_new - call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) TOUCH mo_l_coef mo_r_coef - call ezfio_set_tc_scf_bitc_energy(TC_HF_energy) + print *, ' ***' + print *, '' + enddo else do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. it .lt. n_it_tcscf_max ) print*,'grad_hermit = ',grad_hermit it += 1 - print*,'iteration = ', it - print*,'***' - print*,'TC HF total energy = ', TC_HF_energy - print*,'TC HF 1 e energy = ', TC_HF_one_electron_energy - print*,'TC HF 2 e energy = ', TC_HF_two_e_energy - print*,'TC HF 3 body = ', diag_three_elem_hf - print*,'***' + print *, 'iteration = ', it + print *, '***' + print *, 'TC HF total energy = ', TC_HF_energy + print *, 'TC HF 1 e energy = ', TC_HF_one_e_energy + print *, 'TC HF 2 e energy = ', TC_HF_two_e_energy + print *, 'TC HF 3 body = ', diag_three_elem_hf + print *, '***' + print *, '' call save_good_hermit_tc_eigvectors TOUCH mo_coef call save_mos @@ -174,11 +191,11 @@ subroutine routine_scf() endif print*,'Energy converged !' - call print_energy_and_mos + call print_energy_and_mos() deallocate(rho_old, rho_new) -end subroutine routine_scf +end subroutine simple_tcscf ! --- diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f index f6ae3e1f..1f054a30 100644 --- a/src/tc_scf/tc_scf_dm.irp.f +++ b/src/tc_scf/tc_scf_dm.irp.f @@ -1,25 +1,31 @@ +! --- + BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ] - implicit none - if(bi_ortho)then - TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta - else - TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta - endif + implicit none + if(bi_ortho) then + TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta + else + TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta + endif END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ] - implicit none - if(bi_ortho)then - TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha - else - TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha - endif + implicit none + if(bi_ortho)then + TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha + else + TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha + endif END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ] - implicit none - TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha + implicit none + TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha END_PROVIDER diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f index aa2a16ff..c60ce761 100644 --- a/src/tc_scf/tc_scf_energy.irp.f +++ b/src/tc_scf/tc_scf_energy.irp.f @@ -1,6 +1,6 @@ BEGIN_PROVIDER [ double precision, TC_HF_energy] -&BEGIN_PROVIDER [ double precision, TC_HF_one_electron_energy] +&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy] &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy] BEGIN_DOC @@ -11,19 +11,19 @@ integer :: i, j TC_HF_energy = nuclear_repulsion - TC_HF_one_electron_energy = 0.d0 + TC_HF_one_e_energy = 0.d0 TC_HF_two_e_energy = 0.d0 do j = 1, ao_num do i = 1, ao_num TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) & - + two_e_tc_non_hermit_integral_beta(i,j) * TCSCF_density_matrix_ao_beta(i,j) ) - TC_HF_one_electron_energy += ao_one_e_integrals_tc_tot(i,j) & - * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) ) + + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) ) + TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) & + * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) ) enddo enddo - TC_HF_energy += TC_HF_one_electron_energy + TC_HF_two_e_energy + TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy TC_HF_energy += diag_three_elem_hf END_PROVIDER diff --git a/src/tc_scf/tc_scf_utils.irp.f b/src/tc_scf/tc_scf_utils.irp.f index 09a4a1b9..dde477c4 100644 --- a/src/tc_scf/tc_scf_utils.irp.f +++ b/src/tc_scf/tc_scf_utils.irp.f @@ -40,3 +40,4 @@ subroutine LTxSxR(n, m, L, S, R, C) end subroutine LTxR ! --- + diff --git a/src/tools/print_he_energy.irp.f b/src/tools/print_he_energy.irp.f new file mode 100644 index 00000000..87488fba --- /dev/null +++ b/src/tools/print_he_energy.irp.f @@ -0,0 +1,365 @@ + +! --- + +program print_he_energy + + implicit none + + call print_overlap() + + call print_energy1() + call print_energy2() + +end + +! --- + +subroutine print_overlap() + + implicit none + integer :: i, j, k, l + double precision :: S_ij + + print *, ' ao_overlap:' + do i = 1, ao_num + do j = 1, ao_num + print *, j, i, ao_overlap(j,i) + enddo + enddo + + + print *, ' mo_overlap:' + do i = 1, mo_num + do j = 1, mo_num + + S_ij = 0.d0 + do k = 1, ao_num + do l = 1, ao_num + S_ij += mo_coef(k,i) * ao_overlap(k,l) * mo_coef(l,j) + enddo + enddo + + print *, i, j, S_ij + enddo + enddo + +end subroutine print_overlap + +! --- + +subroutine print_energy1() + + implicit none + integer :: i, j, k, l + double precision :: e, n, e_tmp, n_tmp + double precision, external :: ao_two_e_integral + + e = 0.d0 + n = 0.d0 + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! < phi_1 phi_1 | h1 | phi_1 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1) + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! < phi_1 phi_1 | h2 | phi_1 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! --- + + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + + ! ( phi_1 phi_1 | phi_1 phi_1 ) + e += mo_coef(i,1) * mo_coef(j,1) * ao_two_e_integral(i,j,k,l) * mo_coef(k,1) * mo_coef(l,1) + + enddo + enddo + enddo + enddo + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! --- + + ! < phi_1 phi_1 | phi_1 phi_1 > + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + enddo + enddo + + n += e_tmp * n_tmp + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + e = e / n + print *, ' energy = ', e + +end subroutine print_energy1 + +! --- + +subroutine print_energy2() + + implicit none + integer :: i, j, k, l + double precision :: e, n, e_tmp, n_tmp + double precision, external :: ao_two_e_integral + + e = 0.d0 + n = 0.d0 + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! < phi_1 phi_2 | h1 | phi_1 phi_2 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1) + n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,2) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! < phi_1 phi_2 | h1 | phi_2 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,2) + n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,1) + enddo + enddo + + e -= e_tmp * n_tmp + + ! --- + + ! < phi_2 phi_1 | h1 | phi_1 phi_2 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,1) + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,2) + enddo + enddo + + e -= e_tmp * n_tmp + + ! --- + + ! < phi_2 phi_1 | h1 | phi_2 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,2) + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! < phi_1 phi_2 | h2 | phi_1 phi_2 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,2) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! < phi_1 phi_2 | h2 | phi_2 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,2) + e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,1) + enddo + enddo + + e -= e_tmp * n_tmp + + ! --- + + ! < phi_2 phi_1 | h2 | phi_1 phi_2 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,1) + e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,2) + enddo + enddo + + e -= e_tmp * n_tmp + + ! --- + + ! < phi_2 phi_1 | h2 | phi_2 phi_1 > + + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,2) + e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1) + enddo + enddo + + e += e_tmp * n_tmp + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! --- + + do i = 1, ao_num + do j = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + + ! ( phi_1 phi_1 | phi_2 phi_2 ) + e += mo_coef(i,1) * mo_coef(j,1) * ao_two_e_integral(i,j,k,l) * mo_coef(k,2) * mo_coef(l,2) + + ! ( phi_1 phi_2 | phi_2 phi_1 ) + e -= mo_coef(i,1) * mo_coef(j,2) * ao_two_e_integral(i,j,k,l) * mo_coef(k,2) * mo_coef(l,1) + + ! ( phi_2 phi_1 | phi_1 phi_2 ) + e -= mo_coef(i,2) * mo_coef(j,1) * ao_two_e_integral(i,j,k,l) * mo_coef(k,1) * mo_coef(l,2) + + ! ( phi_2 phi_2 | phi_1 phi_1 ) + e += mo_coef(i,2) * mo_coef(j,2) * ao_two_e_integral(i,j,k,l) * mo_coef(k,1) * mo_coef(l,1) + + enddo + enddo + enddo + enddo + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + ! --- + + ! < phi_1 phi_2 | phi_1 phi_2 > + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,2) + enddo + enddo + + n += e_tmp * n_tmp + + ! --- + + ! < phi_1 phi_2 | phi_2 phi_1 > + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,2) + n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,1) + enddo + enddo + + n -= e_tmp * n_tmp + + ! --- + + ! < phi_2 phi_1 | phi_1 phi_2 > + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,1) + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,2) + enddo + enddo + + n -= e_tmp * n_tmp + + ! --- + + ! < phi_2 phi_1 | phi_2 phi_1 > + e_tmp = 0.d0 + n_tmp = 0.d0 + do i = 1, ao_num + do j = 1, ao_num + e_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,2) + n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1) + enddo + enddo + + n += e_tmp * n_tmp + + ! --- + + ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- + + e = e / n + print *, ' energy = ', e + +end subroutine print_energy2 + +! --- diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f index f6906e23..188bfa58 100644 --- a/src/utils/block_diag_degen.irp.f +++ b/src/utils/block_diag_degen.irp.f @@ -1,177 +1,218 @@ -subroutine diag_mat_per_fock_degen(fock_diag,mat_ref,n,thr_deg,leigvec,reigvec,eigval) - implicit none - integer, intent(in) :: n - double precision, intent(in) :: fock_diag(n),mat_ref(n,n),thr_deg - double precision, intent(out):: leigvec(n,n),reigvec(n,n),eigval(n) - BEGIN_DOC -! subroutine that diagonalizes a matrix mat_ref BY BLOCK -! -! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag" -! -! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together -! -! : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together -! -! : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together -! -! etc... the advantage is to guarentee no spurious mixing because of numerical problems. - END_DOC - double precision, allocatable :: leigvec_unsrtd(:,:),reigvec_unsrtd(:,:),eigval_unsrtd(:) - integer, allocatable :: list_degen(:,:),list_same_degen(:) - integer, allocatable :: iorder(:),list_degen_sorted(:) - integer :: n_degen_list,n_degen,size_mat,i,j,k,icount,m,index_degen - integer :: ii,jj,i_good,j_good,n_real - double precision, allocatable :: mat_tmp(:,:),eigval_tmp(:),leigvec_tmp(:,:),reigvec_tmp(:,:) +subroutine diag_mat_per_fock_degen(fock_diag, mat_ref, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval) - allocate(leigvec_unsrtd(n,n),reigvec_unsrtd(n,n),eigval_unsrtd(n)) - leigvec_unsrtd = 0.d0 - reigvec_unsrtd = 0.d0 - eigval_unsrtd = 0.d0 - allocate(list_degen(n,0:n)) + BEGIN_DOC + ! + ! subroutine that diagonalizes a matrix mat_ref BY BLOCK + ! + ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag" + ! + ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together + ! + ! : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. + ! + END_DOC - ! obtain degeneracies - call give_degen_full_list(fock_diag,n,thr_deg,list_degen,n_degen_list) - allocate(iorder(n_degen_list),list_degen_sorted(n_degen_list)) - do i =1, n_degen_list - n_degen = list_degen(i,0) - list_degen_sorted(i) = n_degen - iorder(i)=i - enddo - ! sort by number of degeneracies - call isort(list_degen_sorted,iorder,n_degen_list) - integer :: icount_eigval - logical, allocatable :: is_ok(:) - allocate(is_ok(n_degen_list)) - is_ok = .True. - icount_eigval = 0 - ! loop over degeneracies - do i = 1, n_degen_list - if(.not.is_ok(i))cycle - is_ok(i) = .False. - n_degen = list_degen_sorted(i) - print*,'diagonalizing for n_degen = ',n_degen - k = 1 - ! group all the entries having the same degeneracies -! do while (list_degen_sorted(i+k)==n_degen) - do m = i+1,n_degen_list - if(list_degen_sorted(m)==n_degen)then - is_ok(i+k)=.False. - k += 1 - endif + implicit none + integer, intent(in) :: n + double precision, intent(in) :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg + double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n) + + integer :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen + integer :: ii, jj, i_good, j_good, n_real + integer :: icount_eigval + logical, allocatable :: is_ok(:) + integer, allocatable :: list_degen(:,:), list_same_degen(:) + integer, allocatable :: iorder(:), list_degen_sorted(:) + double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:) + double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:) + + allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n)) + leigvec_unsrtd = 0.d0 + reigvec_unsrtd = 0.d0 + eigval_unsrtd = 0.d0 + + ! obtain degeneracies + allocate(list_degen(n,0:n)) + call give_degen_full_list(fock_diag, n, thr_deg, list_degen, n_degen_list) + + allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list)) + do i = 1, n_degen_list + n_degen = list_degen(i,0) + list_degen_sorted(i) = n_degen + iorder(i) = i enddo - print*,'number of identical degeneracies = ',k - size_mat = k*n_degen - print*,'size_mat = ',size_mat - allocate(mat_tmp(size_mat,size_mat),list_same_degen(size_mat)) - allocate(eigval_tmp(size_mat),leigvec_tmp(size_mat,size_mat),reigvec_tmp(size_mat,size_mat)) - ! group all the elements sharing the same degeneracy - icount = 0 - do j = 1, k ! jth set of degeneracy - index_degen = iorder(i+j-1) - do m = 1, n_degen - icount += 1 - list_same_degen(icount) = list_degen(index_degen,m) - enddo + + ! sort by number of degeneracies + call isort(list_degen_sorted, iorder, n_degen_list) + + allocate(is_ok(n_degen_list)) + is_ok = .True. + icount_eigval = 0 + + ! loop over degeneracies + do i = 1, n_degen_list + if(.not.is_ok(i)) cycle + + is_ok(i) = .False. + n_degen = list_degen_sorted(i) + + print *, ' diagonalizing for n_degen = ', n_degen + + k = 1 + + ! group all the entries having the same degeneracies +!! do while (list_degen_sorted(i+k)==n_degen) + do m = i+1, n_degen_list + if(list_degen_sorted(m)==n_degen) then + is_ok(i+k) = .False. + k += 1 + endif + enddo + + print *, ' number of identical degeneracies = ', k + size_mat = k*n_degen + print *, ' size_mat = ', size_mat + allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat)) + allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat)) + ! group all the elements sharing the same degeneracy + icount = 0 + do j = 1, k ! jth set of degeneracy + index_degen = iorder(i+j-1) + do m = 1, n_degen + icount += 1 + list_same_degen(icount) = list_degen(index_degen,m) + enddo + enddo + + print *, ' list of elements ' + do icount = 1, size_mat + print *, icount, list_same_degen(icount) + enddo + + ! you copy subset of matrix elements having all the same degeneracy in mat_tmp + do ii = 1, size_mat + i_good = list_same_degen(ii) + do jj = 1, size_mat + j_good = list_same_degen(jj) + mat_tmp(jj,ii) = mat_ref(j_good,i_good) + enddo + enddo + + call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd & + , leigvec_tmp, reigvec_tmp & + , n_real, eigval_tmp ) + + do ii = 1, size_mat + icount_eigval += 1 + eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues + do jj = 1, size_mat ! copy the eigenvectors + j_good = list_same_degen(jj) + leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii) + reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii) + enddo + enddo + + deallocate(mat_tmp, list_same_degen) + deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp) enddo - print*,'list of elements ' - do icount = 1, size_mat - print*,icount,list_same_degen(icount) - enddo - ! you copy subset of matrix elements having all the same degeneracy in mat_tmp - do ii = 1, size_mat - i_good = list_same_degen(ii) - do jj = 1, size_mat - j_good = list_same_degen(jj) - mat_tmp(jj,ii) = mat_ref(j_good,i_good) - enddo - enddo - call non_hrmt_bieig( size_mat, mat_tmp& - , leigvec_tmp, reigvec_tmp& - , n_real, eigval_tmp) - do ii = 1, size_mat - icount_eigval += 1 - eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues - do jj = 1, size_mat ! copy the eigenvectors - j_good = list_same_degen(jj) - leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii) - reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii) - enddo - enddo - deallocate(mat_tmp,list_same_degen) - deallocate(eigval_tmp,leigvec_tmp,reigvec_tmp) - enddo - if(icount_eigval.ne.n)then - print*,'pb !! (icount_eigval.ne.n)' - print*,'icount_eigval,n',icount_eigval,n - stop - endif + + if(icount_eigval .ne. n) then + print *, ' pb !! (icount_eigval.ne.n)' + print *, ' icount_eigval,n', icount_eigval, n + stop + endif - deallocate(iorder) - allocate(iorder(n)) - do i = 1, n - iorder(i) = i - enddo - call dsort(eigval_unsrtd,iorder,n) - do i = 1, n - print*,'sorted eigenvalues ' - i_good = iorder(i) - eigval(i) = eigval_unsrtd(i) - print*,'i,eigval(i) = ',i,eigval(i) - do j = 1, n - leigvec(j,i) = leigvec_unsrtd(j,i_good) - reigvec(j,i) = reigvec_unsrtd(j,i_good) + deallocate(iorder) + allocate(iorder(n)) + do i = 1, n + iorder(i) = i enddo - enddo + call dsort(eigval_unsrtd, iorder, n) + + do i = 1, n + print*,'sorted eigenvalues ' + i_good = iorder(i) + eigval(i) = eigval_unsrtd(i) + print*,'i,eigval(i) = ',i,eigval(i) + do j = 1, n + leigvec(j,i) = leigvec_unsrtd(j,i_good) + reigvec(j,i) = reigvec_unsrtd(j,i_good) + enddo + enddo + + deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd) + deallocate(list_degen) + deallocate(iorder, list_degen_sorted) + deallocate(is_ok) + end -subroutine give_degen_full_list(A,n,thr,list_degen,n_degen_list) - implicit none - BEGIN_DOC - ! you enter with an array A(n) and spits out all the elements degenerated up to thr - ! - ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL - ! - ! list_degen(i,0) = number of degenerate entries - ! - ! list_degen(i,1) = index of the first degenerate entry - ! - ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries - ! - ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element - END_DOC - double precision,intent(in) :: A(n) - double precision,intent(in) :: thr - integer, intent(in) :: n - integer, intent(out) :: list_degen(n,0:n),n_degen_list - logical, allocatable :: is_ok(:) - allocate(is_ok(n)) - integer :: i,j,icount,icheck - n_degen_list = 0 - is_ok = .True. - do i = 1, n - if(.not.is_ok(i))cycle - n_degen_list +=1 - is_ok(i) = .False. - list_degen(n_degen_list,1) = i - icount = 1 - do j = i+1, n - if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j))then - is_ok(j) = .False. - icount += 1 - list_degen(n_degen_list,icount) = j - endif +! --- + +subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list) + + BEGIN_DOC + ! you enter with an array A(n) and spits out all the elements degenerated up to thr + ! + ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL + ! + ! list_degen(i,0) = number of degenerate entries + ! + ! list_degen(i,1) = index of the first degenerate entry + ! + ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries + ! + ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: thr + integer, intent(in) :: n + integer, intent(out) :: list_degen(n,0:n), n_degen_list + integer :: i, j, icount, icheck + logical, allocatable :: is_ok(:) + + + allocate(is_ok(n)) + n_degen_list = 0 + is_ok = .True. + do i = 1, n + if(.not.is_ok(i)) cycle + n_degen_list +=1 + is_ok(i) = .False. + list_degen(n_degen_list,1) = i + icount = 1 + do j = i+1, n + if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + is_ok(j) = .False. + icount += 1 + list_degen(n_degen_list,icount) = j + endif + enddo + + list_degen(n_degen_list,0) = icount enddo - list_degen(n_degen_list,0) = icount - enddo - icheck = 0 - do i = 1, n_degen_list - icheck += list_degen(i,0) - enddo - if(icheck.ne.n)then - print*,'pb ! :: icheck.ne.n' - print*,icheck,n - stop - endif + + icheck = 0 + do i = 1, n_degen_list + icheck += list_degen(i,0) + enddo + + if(icheck.ne.n)then + print *, ' pb ! :: icheck.ne.n' + print *, icheck, n + stop + endif + end + +! --- + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index f68465c7..f593cefb 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -47,7 +47,9 @@ subroutine give_explicit_poly_and_gaussian_x(P_new,P_center,p,fact_k,iorder,alph end +! TODO remove dim subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,dim) + BEGIN_DOC ! Transforms the product of ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) @@ -60,6 +62,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC + implicit none include 'constants.include.F' integer, intent(in) :: dim @@ -129,7 +132,8 @@ end !--- -subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points) +subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, LD_A, B_center, n_points) + BEGIN_DOC ! Transforms the product of ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) @@ -142,24 +146,26 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC - implicit none + include 'constants.include.F' - integer, intent(in) :: n_points, ldp - integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) - double precision, intent(in) :: alpha, beta ! exponents - double precision, intent(in) :: A_center(n_points,3) ! A center - double precision, intent(in) :: B_center (3) ! B center - double precision, intent(out) :: P_center(n_points,3) ! new center - double precision, intent(out) :: p ! new exponent - double precision, intent(out) :: fact_k(n_points) ! constant factor - double precision, intent(out) :: P_new(n_points,0:ldp,3)! polynomial - integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials - double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) + implicit none + integer, intent(in) :: n_points, ldp, LD_A + integer, intent(in) :: a(3), b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) + double precision, intent(in) :: alpha, beta ! exponents + double precision, intent(in) :: A_center(LD_A,3) ! A center + double precision, intent(in) :: B_center(3) ! B center + integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials + double precision, intent(out) :: P_center(n_points,3) ! new center + double precision, intent(out) :: p ! new exponent + double precision, intent(out) :: fact_k(n_points) ! constant factor + double precision, intent(out) :: P_new(n_points,0:ldp,3) ! polynomial - integer :: n_new,i,j, ipoint, lda, ldb, xyz + integer :: n_new, i, j, ipoint, lda, ldb, xyz + double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) - call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) + + call gaussian_product_v(alpha, A_center, LD_A, beta, B_center, fact_k, p, P_center, n_points) if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) @@ -167,13 +173,13 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde ldb = 0 allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3)) - call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) + call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points) iorder(1:3) = a(1:3) - do ipoint=1,n_points - do xyz=1,3 + do ipoint = 1, n_points + do xyz = 1, 3 P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) - do i=1,a(xyz) + do i = 1, a(xyz) P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo @@ -187,31 +193,31 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde ldb = maxval(b) allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3)) - call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) + call recentered_poly2_v(P_a, lda, A_center, LD_A, P_center, a, P_b, ldb, B_center, P_center, b, n_points) iorder(1:3) = a(1:3) + b(1:3) - do xyz=1,3 + do xyz = 1, 3 if (b(xyz) == 0) then - do ipoint=1,n_points + do ipoint = 1, n_points P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) - do i=1,a(xyz) + do i = 1, a(xyz) P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo else - do i=0,iorder(xyz) - do ipoint=1,n_points + do i = 0, iorder(xyz) + do ipoint = 1, n_points P_new(ipoint,i,xyz) = 0.d0 enddo enddo - call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points) + call multiply_poly_v(P_a(1,0,xyz), a(xyz), P_b(1,0,xyz), b(xyz), P_new(1,0,xyz), ldp, n_points) endif enddo -end +end subroutine give_explicit_poly_and_gaussian_v -!- +! --- subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) BEGIN_DOC @@ -273,15 +279,16 @@ subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder end +! --- +subroutine gaussian_product(a, xa, b, xb, k, p, xp) -subroutine gaussian_product(a,xa,b,xb,k,p,xp) - implicit none BEGIN_DOC ! Gaussian product in 1D. ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} END_DOC + implicit none double precision, intent(in) :: a,b ! Exponents double precision, intent(in) :: xa(3),xb(3) ! Centers double precision, intent(out) :: p ! New exponent @@ -312,33 +319,39 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp) xp(1) = (a*xa(1)+b*xb(1))*p_inv xp(2) = (a*xa(2)+b*xb(2))*p_inv xp(3) = (a*xa(3)+b*xb(3))*p_inv + end subroutine !--- -subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) - implicit none + +subroutine gaussian_product_v(a, xa, LD_xa, b, xb, k, p, xp, n_points) + BEGIN_DOC + ! ! Gaussian product in 1D. ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + ! ! Using multiple A centers + ! END_DOC - integer, intent(in) :: n_points - double precision, intent(in) :: a,b ! Exponents - double precision, intent(in) :: xa(n_points,3),xb(3) ! Centers - double precision, intent(out) :: p ! New exponent - double precision, intent(out) :: xp(n_points,3) ! New center - double precision, intent(out) :: k(n_points) ! Constant + implicit none - double precision :: p_inv + integer, intent(in) :: LD_xa, n_points + double precision, intent(in) :: a, b ! Exponents + double precision, intent(in) :: xa(LD_xa,3), xb(3) ! Centers + double precision, intent(out) :: p ! New exponent + double precision, intent(out) :: xp(n_points,3) ! New center + double precision, intent(out) :: k(n_points) ! Constant + + integer :: ipoint + double precision :: p_inv + double precision :: xab(3), ab, ap, bp, bpxb(3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab - integer :: ipoint ASSERT (a>0.) ASSERT (b>0.) - double precision :: xab(3), ab, ap, bp, bpxb(3) - !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab - p = a+b p_inv = 1.d0/(a+b) ab = a*b*p_inv @@ -348,7 +361,7 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) bpxb(2) = bp*xb(2) bpxb(3) = bp*xb(3) - do ipoint=1,n_points + do ipoint = 1, n_points xab(1) = xa(ipoint,1)-xb(1) xab(2) = xa(ipoint,2)-xb(2) xab(3) = xa(ipoint,3)-xb(3) @@ -365,18 +378,19 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3) endif enddo -end subroutine +end subroutine gaussian_product_v +! --- +subroutine gaussian_product_x(a, xa, b, xb, k, p, xp) -subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) - implicit none BEGIN_DOC ! Gaussian product in 1D. ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} END_DOC + implicit none double precision , intent(in) :: a,b ! Exponents double precision , intent(in) :: xa,xb ! Centers double precision , intent(out) :: p ! New exponent @@ -625,16 +639,20 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b) do i = 21,b P_new2(i) = binom_func(b,b-i) * pows_b(b-i) enddo + end -!- -subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) - implicit none +! --- + +subroutine recentered_poly2_v(P_new, lda, x_A, LD_xA, x_P, a, P_new2, ldb, x_B, x_Q, b, n_points) + BEGIN_DOC ! Recenter two polynomials END_DOC - integer, intent(in) :: a(3),b(3), n_points, lda, ldb - double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) + + implicit none + integer, intent(in) :: a(3), b(3), n_points, lda, ldb, LD_xA + double precision, intent(in) :: x_A(LD_xA,3), x_P(n_points,3), x_B(3), x_Q(n_points,3) double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3) double precision :: binom_func integer :: i,j,k,l, minab(3), maxab(3),ipoint, xyz @@ -646,7 +664,6 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) - do xyz=1,3 if ((a(xyz)<0).or.(b(xyz)<0) ) cycle do ipoint=1,n_points @@ -698,27 +715,34 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) enddo enddo enddo -end +end subroutine recentered_poly2_v + +! --- + +subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, P_new2, x_B, x_Q, n_points) -subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) - implicit none BEGIN_DOC ! Recenter two polynomials. Special case for b=(0,0,0) END_DOC - integer, intent(in) :: a(3), n_points, lda - double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) - double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,3) - double precision :: binom_func - integer :: i,j,k,l, xyz, ipoint, maxab(3) - double precision, allocatable :: pows_a(:,:), pows_b(:,:) - double precision :: fa + + implicit none + integer, intent(in) :: a(3), n_points, lda, LD_xA + double precision, intent(in) :: x_A(LD_xA,3) + double precision, intent(in) :: x_B(3) + double precision, intent(in) :: x_P(n_points,3), x_Q(n_points,3) + double precision, intent(out) :: P_new(n_points,0:lda,3), P_new2(n_points,3) + integer :: i, j, k, l, xyz, ipoint, maxab(3) + double precision :: fa + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + + double precision :: binom_func maxab(1:3) = max(a(1:3),(/0,0,0/)) allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) - do xyz=1,3 + do xyz = 1, 3 if (a(xyz)<0) cycle do ipoint=1,n_points pows_a(ipoint,0) = 1.d0 @@ -736,25 +760,25 @@ subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) P_new (ipoint,0,xyz) = pows_a(ipoint,a(xyz)) P_new2(ipoint,xyz) = pows_b(ipoint,0) enddo - do i = 1,min(a(xyz),20) - fa = binom_transp(a(xyz)-i,a(xyz)) - do ipoint=1,n_points - P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + do i = 1, min(a(xyz), 20) + fa = binom_transp(a(xyz)-i, a(xyz)) + do ipoint = 1, n_points + P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo - do i = 21,a(xyz) - fa = binom_func(a(xyz),a(xyz)-i) - do ipoint=1,n_points - P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + do i = 21, a(xyz) + fa = binom_func(a(xyz), a(xyz)-i) + do ipoint = 1, n_points + P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) enddo enddo enddo !xyz deallocate(pows_a, pows_b) -end -!-- +end subroutine recentered_poly2_v0 + !-- subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index a62c657e..c797c87e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -32,9 +32,8 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_ end -subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& - power_B,overlap_x,overlap_y,overlap_z,overlap,dim) - implicit none +subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim) + BEGIN_DOC !.. math:: ! @@ -42,7 +41,10 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& ! S = S_x S_y S_z ! END_DOC + include 'constants.include.F' + + implicit none integer,intent(in) :: dim ! dimension maximum for the arrays representing the polynomials double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta @@ -51,17 +53,18 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p double precision :: F_integral_tab(0:max_dim) integer :: iorder_p(3) - - call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) - if(fact_p.lt.1d-20)then - overlap_x = 1.d-10 - overlap_y = 1.d-10 - overlap_z = 1.d-10 - overlap = 1.d-10 - return - endif integer :: nmax double precision :: F_integral + + call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim) + if(fact_p.lt.1d-20)then + overlap_x = 1.d-10 + overlap_y = 1.d-10 + overlap_z = 1.d-10 + overlap = 1.d-10 + return + endif + nmax = maxval(iorder_p) do i = 0,nmax F_integral_tab(i) = F_integral(i,p) @@ -150,72 +153,74 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over overlap_x = factor * dx * overlap_x end - ! --- -subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& - power_B,overlap,dim, n_points) - implicit none +subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points) + BEGIN_DOC !.. math:: ! - ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\ + ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\ ! S = S_x S_y S_z ! END_DOC + include 'constants.include.F' - integer,intent(in) :: dim, n_points - double precision,intent(in) :: A_center(n_points,3),B_center(3) ! center of the x1 functions - double precision, intent(in) :: alpha,beta - integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions - double precision, intent(out) :: overlap(n_points) - double precision :: F_integral_tab(0:max_dim) - double precision :: p, overlap_x, overlap_y, overlap_z - double precision, allocatable :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:) - integer :: iorder_p(3), ipoint, ldp - integer :: nmax - double precision :: F_integral + + implicit none + + integer, intent(in) :: n_points + integer, intent(in) :: power_A(3), power_B(3) ! power of the x1 functions + double precision, intent(in) :: A_center(n_points,3), B_center(3) ! center of the x1 functions + double precision, intent(in) :: alpha, beta + double precision, intent(out) :: overlap(n_points) + + integer :: i + integer :: iorder_p(3), ipoint, ldp + integer :: nmax + double precision :: F_integral_tab(0:max_dim) + double precision :: p, overlap_x, overlap_y, overlap_z + double precision :: F_integral + double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:) ldp = maxval( power_A(1:3) + power_B(1:3) ) - allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), & - fact_pp(n_points), pp(n_points)) - call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) + allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points)) + + call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, n_points, B_center, n_points) nmax = maxval(iorder_p) - do i=0, nmax + do i = 0, nmax F_integral_tab(i) = F_integral(i,p) enddo - integer :: i + do ipoint = 1, n_points - call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points) - - do ipoint=1,n_points - if(fact_p(ipoint).lt.1d-20)then + if(fact_p(ipoint) .lt. 1d-20) then overlap(ipoint) = 1.d-10 cycle endif overlap_x = P_new(ipoint,0,1) * F_integral_tab(0) - do i = 1,iorder_p(1) + do i = 1, iorder_p(1) overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i) enddo overlap_y = P_new(ipoint,0,2) * F_integral_tab(0) - do i = 1,iorder_p(2) + do i = 1, iorder_p(2) overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i) enddo overlap_z = P_new(ipoint,0,3) * F_integral_tab(0) - do i = 1,iorder_p(3) + do i = 1, iorder_p(3) overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i) enddo - overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint) + overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_p(ipoint) enddo - deallocate(P_new, P_center, fact_p, pp, fact_pp) -end + deallocate(P_new, P_center, fact_p) + +end subroutine overlap_gaussian_xyz_v ! --- diff --git a/src/utils/qsort.c b/src/utils/qsort.c deleted file mode 100644 index c011b35a..00000000 --- a/src/utils/qsort.c +++ /dev/null @@ -1,373 +0,0 @@ -/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ -#include -#include - -struct int16_t_comp { - int16_t x; - int32_t i; -}; - -int compare_int16_t( const void * l, const void * r ) -{ - const int16_t * restrict _l= l; - const int16_t * restrict _r= r; - if( *_l > *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct double_comp* A = malloc(isize * sizeof(struct double_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct float_comp* A = malloc(isize * sizeof(struct float_comp)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { - struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i *_r ) return 1; - if( *_l < *_r ) return -1; - return 0; -} - -void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { - struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); - if (A == NULL) return; - - for (int i=0 ; i> -""" -for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: - print( data.replace("TYPE", typ).replace("_big", "") ) - print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) -#+end_src - -#+NAME: replaced_f -#+begin_src python :results output :noweb yes -data = """ -<> -""" -c1 = { - "int16_t": "i2", - "int32_t": "i", - "int64_t": "i8", - "double": "d", - "float": "" -} -c2 = { - "int16_t": "integer", - "int32_t": "integer", - "int64_t": "integer", - "double": "real", - "float": "real" -} - -for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) -#+end_src - -#+NAME: replaced_f2 -#+begin_src python :results output :noweb yes -data = """ -<> -""" -c1 = { - "int16_t": "i2", - "int32_t": "i", - "int64_t": "i8", - "double": "d", - "float": "" -} -c2 = { - "int16_t": "integer", - "int32_t": "integer", - "int64_t": "integer", - "double": "real", - "float": "real" -} - -for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) - print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) -#+end_src - -* Generated C file - -#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes -#include -#include -<> -#+END_SRC - -* Generated Fortran file - -#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes -module qsort_module - use iso_c_binding - - interface - <> - end interface - -end module qsort_module - -<> - -#+END_SRC - diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 deleted file mode 100644 index a72a4f9e..00000000 --- a/src/utils/qsort_module.f90 +++ /dev/null @@ -1,347 +0,0 @@ -module qsort_module - use iso_c_binding - - interface - - subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - end subroutine i2sort_c - - subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - integer (c_int16_t) :: A(isize) - end subroutine i2sort_noidx_c - - - - subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - end subroutine i2sort_big_c - - subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer (c_int16_t) :: A(isize) - end subroutine i2sort_noidx_big_c - - - - subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - end subroutine isort_c - - subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - integer (c_int32_t) :: A(isize) - end subroutine isort_noidx_c - - - - subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - end subroutine isort_big_c - - subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer (c_int32_t) :: A(isize) - end subroutine isort_noidx_big_c - - - - subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - end subroutine i8sort_c - - subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - integer (c_int64_t) :: A(isize) - end subroutine i8sort_noidx_c - - - - subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - end subroutine i8sort_big_c - - subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer (c_int64_t) :: A(isize) - end subroutine i8sort_noidx_big_c - - - - subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - real (c_double) :: A(isize) - end subroutine dsort_c - - subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - real (c_double) :: A(isize) - end subroutine dsort_noidx_c - - - - subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - real (c_double) :: A(isize) - end subroutine dsort_big_c - - subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - real (c_double) :: A(isize) - end subroutine dsort_noidx_big_c - - - - subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") - use iso_c_binding - integer(c_int32_t), value :: isize - integer(c_int32_t) :: iorder(isize) - real (c_float) :: A(isize) - end subroutine sort_c - - subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") - use iso_c_binding - integer(c_int32_t), value :: isize - real (c_float) :: A(isize) - end subroutine sort_noidx_c - - - - subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") - use iso_c_binding - integer(c_int64_t), value :: isize - integer(c_int64_t) :: iorder(isize) - real (c_float) :: A(isize) - end subroutine sort_big_c - - subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") - use iso_c_binding - integer(c_int64_t), value :: isize - real (c_float) :: A(isize) - end subroutine sort_noidx_big_c - - - - end interface - -end module qsort_module - - -subroutine i2sort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - call i2sort_c(A, iorder, isize) -end subroutine i2sort - -subroutine i2sort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - integer (c_int16_t) :: A(isize) - call i2sort_noidx_c(A, isize) -end subroutine i2sort_noidx - - - -subroutine i2sort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int16_t) :: A(isize) - call i2sort_big_c(A, iorder, isize) -end subroutine i2sort_big - -subroutine i2sort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - integer (c_int16_t) :: A(isize) - call i2sort_noidx_big_c(A, isize) -end subroutine i2sort_noidx_big - - - -subroutine isort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - call isort_c(A, iorder, isize) -end subroutine isort - -subroutine isort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - integer (c_int32_t) :: A(isize) - call isort_noidx_c(A, isize) -end subroutine isort_noidx - - - -subroutine isort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int32_t) :: A(isize) - call isort_big_c(A, iorder, isize) -end subroutine isort_big - -subroutine isort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - integer (c_int32_t) :: A(isize) - call isort_noidx_big_c(A, isize) -end subroutine isort_noidx_big - - - -subroutine i8sort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - call i8sort_c(A, iorder, isize) -end subroutine i8sort - -subroutine i8sort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - integer (c_int64_t) :: A(isize) - call i8sort_noidx_c(A, isize) -end subroutine i8sort_noidx - - - -subroutine i8sort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - integer (c_int64_t) :: A(isize) - call i8sort_big_c(A, iorder, isize) -end subroutine i8sort_big - -subroutine i8sort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - integer (c_int64_t) :: A(isize) - call i8sort_noidx_big_c(A, isize) -end subroutine i8sort_noidx_big - - - -subroutine dsort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - real (c_double) :: A(isize) - call dsort_c(A, iorder, isize) -end subroutine dsort - -subroutine dsort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - real (c_double) :: A(isize) - call dsort_noidx_c(A, isize) -end subroutine dsort_noidx - - - -subroutine dsort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - real (c_double) :: A(isize) - call dsort_big_c(A, iorder, isize) -end subroutine dsort_big - -subroutine dsort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - real (c_double) :: A(isize) - call dsort_noidx_big_c(A, isize) -end subroutine dsort_noidx_big - - - -subroutine sort(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int32_t) :: isize - integer(c_int32_t) :: iorder(isize) - real (c_float) :: A(isize) - call sort_c(A, iorder, isize) -end subroutine sort - -subroutine sort_noidx(A, isize) - use iso_c_binding - use qsort_module - integer(c_int32_t) :: isize - real (c_float) :: A(isize) - call sort_noidx_c(A, isize) -end subroutine sort_noidx - - - -subroutine sort_big(A, iorder, isize) - use qsort_module - use iso_c_binding - integer(c_int64_t) :: isize - integer(c_int64_t) :: iorder(isize) - real (c_float) :: A(isize) - call sort_big_c(A, iorder, isize) -end subroutine sort_big - -subroutine sort_noidx_big(A, isize) - use iso_c_binding - use qsort_module - integer(c_int64_t) :: isize - real (c_float) :: A(isize) - call sort_noidx_big_c(A, isize) -end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index 089c3871..ff40263c 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,4 +1,222 @@ BEGIN_TEMPLATE + subroutine insertion_$Xsort (x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize) using the insertion sort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + $type :: xtmp + integer :: i, i0, j, jmax + + do i=2,isize + xtmp = x(i) + i0 = iorder(i) + j=i-1 + do while (j>0) + if ((x(j) <= xtmp)) exit + x(j+1) = x(j) + iorder(j+1) = iorder(j) + j=j-1 + enddo + x(j+1) = xtmp + iorder(j+1) = i0 + enddo + end subroutine insertion_$Xsort + + subroutine quick_$Xsort(x, iorder, isize) + implicit none + BEGIN_DOC + ! Sort array x(isize) using the quicksort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + integer, external :: omp_get_num_threads + call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) + end + + recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) + implicit none + integer, intent(in) :: isize, first, last, level + integer,intent(inout) :: iorder(isize) + $type, intent(inout) :: x(isize) + $type :: c, tmp + integer :: itmp + integer :: i, j + + if(isize<2)return + + c = x( shiftr(first+last,1) ) + i = first + j = last + do + do while (x(i) < c) + i=i+1 + end do + do while (c < x(j)) + j=j-1 + end do + if (i >= j) exit + tmp = x(i) + x(i) = x(j) + x(j) = tmp + itmp = iorder(i) + iorder(i) = iorder(j) + iorder(j) = itmp + i=i+1 + j=j-1 + enddo + if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then + if (first < i-1) then + call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) + endif + if (j+1 < last) then + call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) + endif + else + if (first < i-1) then + call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) + endif + if (j+1 < last) then + call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) + endif + endif + end + + subroutine heap_$Xsort(x,iorder,isize) + implicit none + BEGIN_DOC + ! Sort array x(isize) using the heap sort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + END_DOC + integer,intent(in) :: isize + $type,intent(inout) :: x(isize) + integer,intent(inout) :: iorder(isize) + + integer :: i, k, j, l, i0 + $type :: xtemp + + l = isize/2+1 + k = isize + do while (.True.) + if (l>1) then + l=l-1 + xtemp = x(l) + i0 = iorder(l) + else + xtemp = x(k) + i0 = iorder(k) + x(k) = x(1) + iorder(k) = iorder(1) + k = k-1 + if (k == 1) then + x(1) = xtemp + iorder(1) = i0 + exit + endif + endif + i=l + j = shiftl(l,1) + do while (j1) then + l=l-1 + xtemp = x(l) + i0 = iorder(l) + else + xtemp = x(k) + i0 = iorder(k) + x(k) = x(1) + iorder(k) = iorder(1) + k = k-1 + if (k == 1) then + x(1) = xtemp + iorder(1) = i0 + exit + endif + endif + i=l + j = shiftl(l,1) + do while (j0_8) + if (x(j)<=xtmp) exit + x(j+1_8) = x(j) + iorder(j+1_8) = iorder(j) + j = j-1_8 + enddo + x(j+1_8) = xtmp + iorder(j+1_8) = i0 + enddo + + end subroutine insertion_$Xsort_big + subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -90,3 +565,223 @@ SUBST [ X, type ] END_TEMPLATE +BEGIN_TEMPLATE + +recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) + implicit none + + BEGIN_DOC + ! Sort integer array x(isize) using the radix sort algorithm. + ! iorder in input should be (1,2,3,...,isize), and in output + ! contains the new order of the elements. + ! iradix should be -1 in input. + END_DOC + integer*$int_type, intent(in) :: isize + integer*$int_type, intent(inout) :: iorder(isize) + integer*$type, intent(inout) :: x(isize) + integer, intent(in) :: iradix + integer :: iradix_new + integer*$type, allocatable :: x2(:), x1(:) + integer*$type :: i4 ! data type + integer*$int_type, allocatable :: iorder1(:),iorder2(:) + integer*$int_type :: i0, i1, i2, i3, i ! index type + integer*$type :: mask + integer :: err + !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 + + if (isize < 2) then + return + endif + + if (iradix == -1) then ! Sort Positive and negative + + allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays' + stop + endif + + i1=1_$int_type + i2=1_$int_type + do i=1_$int_type,isize + if (x(i) < 0_$type) then + iorder1(i1) = iorder(i) + x1(i1) = -x(i) + i1 = i1+1_$int_type + else + iorder2(i2) = iorder(i) + x2(i2) = x(i) + i2 = i2+1_$int_type + endif + enddo + i1=i1-1_$int_type + i2=i2-1_$int_type + + do i=1_$int_type,i2 + iorder(i1+i) = iorder2(i) + x(i1+i) = x2(i) + enddo + deallocate(x2,iorder2,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x2, iorder2' + stop + endif + + + if (i1 > 1_$int_type) then + call $Xradix_sort$big(x1,iorder1,i1,-2) + do i=1_$int_type,i1 + x(i) = -x1(1_$int_type+i1-i) + iorder(i) = iorder1(1_$int_type+i1-i) + enddo + endif + + if (i2>1_$int_type) then + call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) + endif + + deallocate(x1,iorder1,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x1, iorder1' + stop + endif + return + + else if (iradix == -2) then ! Positive + + ! Find most significant bit + + i0 = 0_$int_type + i4 = maxval(x) + + iradix_new = max($integer_size-1-leadz(i4),1) + mask = ibset(0_$type,iradix_new) + + allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays' + stop + endif + + i1=1_$int_type + i2=1_$int_type + + do i=1_$int_type,isize + if (iand(mask,x(i)) == 0_$type) then + iorder1(i1) = iorder(i) + x1(i1) = x(i) + i1 = i1+1_$int_type + else + iorder2(i2) = iorder(i) + x2(i2) = x(i) + i2 = i2+1_$int_type + endif + enddo + i1=i1-1_$int_type + i2=i2-1_$int_type + + do i=1_$int_type,i1 + iorder(i0+i) = iorder1(i) + x(i0+i) = x1(i) + enddo + i0 = i0+i1 + i3 = i0 + deallocate(x1,iorder1,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x1, iorder1' + stop + endif + + + do i=1_$int_type,i2 + iorder(i0+i) = iorder2(i) + x(i0+i) = x2(i) + enddo + i0 = i0+i2 + deallocate(x2,iorder2,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to deallocate arrays x2, iorder2' + stop + endif + + + if (i3>1_$int_type) then + call $Xradix_sort$big(x,iorder,i3,iradix_new-1) + endif + + if (isize-i3>1_$int_type) then + call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) + endif + + return + endif + + ASSERT (iradix >= 0) + + if (isize < 48) then + call insertion_$Xsort$big(x,iorder,isize) + return + endif + + + allocate(x2(isize),iorder2(isize),stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays x1, iorder1' + stop + endif + + + mask = ibset(0_$type,iradix) + i0=1_$int_type + i1=1_$int_type + + do i=1_$int_type,isize + if (iand(mask,x(i)) == 0_$type) then + iorder(i0) = iorder(i) + x(i0) = x(i) + i0 = i0+1_$int_type + else + iorder2(i1) = iorder(i) + x2(i1) = x(i) + i1 = i1+1_$int_type + endif + enddo + i0=i0-1_$int_type + i1=i1-1_$int_type + + do i=1_$int_type,i1 + iorder(i0+i) = iorder2(i) + x(i0+i) = x2(i) + enddo + + deallocate(x2,iorder2,stat=err) + if (err /= 0) then + print *, irp_here, ': Unable to allocate arrays x2, iorder2' + stop + endif + + + if (iradix == 0) then + return + endif + + + if (i1>1_$int_type) then + call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) + endif + if (i0>1) then + call $Xradix_sort$big(x,iorder,i0,iradix-1) + endif + + end + +SUBST [ X, type, integer_size, is_big, big, int_type ] + i ; 4 ; 32 ; .False. ; ; 4 ;; + i8 ; 8 ; 64 ; .False. ; ; 4 ;; + i2 ; 2 ; 16 ; .False. ; ; 4 ;; + i ; 4 ; 32 ; .True. ; _big ; 8 ;; + i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; +END_TEMPLATE + + +