10
0
mirror of https://github.com/LCPQ/quantum_package synced 2025-01-09 20:48:47 +01:00

Acelerated pseudo integrals

This commit is contained in:
Anthony Scemama 2015-11-17 12:13:33 +01:00
parent 4679023df9
commit 8af721f452
2 changed files with 149 additions and 141 deletions

View File

@ -35,76 +35,74 @@ END_PROVIDER
allocate(n_k_dump(1:pseudo_klocmax), v_k_dump(1:pseudo_klocmax), dz_k_dump(1:pseudo_klocmax)) allocate(n_k_dump(1:pseudo_klocmax), v_k_dump(1:pseudo_klocmax), dz_k_dump(1:pseudo_klocmax))
print*, 'Providing the nuclear electron pseudo integrals (local)'
! _
! / _. | _ |
! \_ (_| | (_ |_| |
!
print*, 'Providing the nuclear electron pseudo integrals '
call wall_time(wall_1) call wall_time(wall_1)
call cpu_time(cpu_1) call cpu_time(cpu_1)
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B, & !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
!$OMP num_A,num_B,Z,c,n_pt_in, & !$OMP num_A,num_B,Z,c,n_pt_in, &
!$OMP v_k_dump,n_k_dump, dz_k_dump, & !$OMP v_k_dump,n_k_dump, dz_k_dump, &
!$OMP wall_0,wall_2,thread_num) & !$OMP wall_0,wall_2,thread_num) &
!$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp, & !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,&
!$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, &
!$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k,pseudo_n_k, pseudo_dz_k, & !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k,pseudo_n_k, pseudo_dz_k,&
!$OMP wall_1) !$OMP wall_1)
!$OMP DO SCHEDULE (guided) !$OMP DO SCHEDULE (static,1)
do j = 1, ao_num do j = 1, ao_num
num_A = ao_nucl(j) num_A = ao_nucl(j)
power_A(1:3)= ao_power(j,1:3) power_A(1:3)= ao_power(j,1:3)
A_center(1:3) = nucl_coord(num_A,1:3) A_center(1:3) = nucl_coord(num_A,1:3)
do i = 1, ao_num do i = 1, ao_num
num_B = ao_nucl(i) num_B = ao_nucl(i)
power_B(1:3)= ao_power(i,1:3) power_B(1:3)= ao_power(i,1:3)
B_center(1:3) = nucl_coord(num_B,1:3) B_center(1:3) = nucl_coord(num_B,1:3)
do l=1,ao_prim_num(j) do l=1,ao_prim_num(j)
alpha = ao_expo_ordered_transp(l,j) alpha = ao_expo_ordered_transp(l,j)
do m=1,ao_prim_num(i) do m=1,ao_prim_num(i)
beta = ao_expo_ordered_transp(m,i) beta = ao_expo_ordered_transp(m,i)
double precision :: c double precision :: c
c = 0.d0 c = 0.d0
do k = 1, nucl_num if (dabs(ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i))&
double precision :: Z < 1.d-10) then
Z = nucl_charge(k) cycle
endif
do k = 1, nucl_num
double precision :: Z
Z = nucl_charge(k)
C_center(1:3) = nucl_coord(k,1:3) C_center(1:3) = nucl_coord(k,1:3)
v_k_dump = pseudo_v_k(k,1:pseudo_klocmax) v_k_dump = pseudo_v_k(k,1:pseudo_klocmax)
n_k_dump = pseudo_n_k(k,1:pseudo_klocmax) n_k_dump = pseudo_n_k(k,1:pseudo_klocmax)
dz_k_dump = pseudo_dz_k(k,1:pseudo_klocmax) dz_k_dump = pseudo_dz_k(k,1:pseudo_klocmax)
c = c + Vloc(pseudo_klocmax, v_k_dump,n_k_dump, dz_k_dump, & c = c + Vloc(pseudo_klocmax, v_k_dump,n_k_dump, dz_k_dump,&
A_center,power_A,alpha,B_center,power_B,beta,C_center) A_center,power_A,alpha,B_center,power_B,beta,C_center)
enddo
ao_pseudo_integral_local(i,j) = ao_pseudo_integral_local(i,j) +&
ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c
enddo
enddo enddo
ao_pseudo_integral_local(i,j) = ao_pseudo_integral_local(i,j) + & enddo
ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c
enddo
enddo
enddo
call wall_time(wall_2) call wall_time(wall_2)
if (thread_num == 0) then if (thread_num == 0) then
if (wall_2 - wall_0 > 1.d0) then if (wall_2 - wall_0 > 1.d0) then
wall_0 = wall_2 wall_0 = wall_2
print*, 100.*float(j)/float(ao_num), '% in ', & print*, 100.*float(j)/float(ao_num), '% in ', &
wall_2-wall_1, 's' wall_2-wall_1, 's'
endif endif
endif endif
enddo enddo
@ -141,80 +139,80 @@ END_PROVIDER
allocate(n_kl_dump(pseudo_kmax,0:pseudo_lmax), v_kl_dump(pseudo_kmax,0:pseudo_lmax), dz_kl_dump(pseudo_kmax,0:pseudo_lmax)) allocate(n_kl_dump(pseudo_kmax,0:pseudo_lmax), v_kl_dump(pseudo_kmax,0:pseudo_lmax), dz_kl_dump(pseudo_kmax,0:pseudo_lmax))
! _ print*, 'Providing the nuclear electron pseudo integrals (non-local)'
! / _. | _ |
! \_ (_| | (_ |_| |
!
print*, 'Providing the nuclear electron pseudo integrals '
call wall_time(wall_1) call wall_time(wall_1)
call cpu_time(cpu_1) call cpu_time(cpu_1)
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B, & !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
!$OMP num_A,num_B,Z,c,n_pt_in, & !$OMP num_A,num_B,Z,c,n_pt_in, &
!$OMP n_kl_dump, v_kl_dump, dz_kl_dump, & !$OMP n_kl_dump, v_kl_dump, dz_kl_dump, &
!$OMP wall_0,wall_2,thread_num) & !$OMP wall_0,wall_2,thread_num) &
!$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp, & !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,&
!$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge, & !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge,&
!$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl, pseudo_v_kl, pseudo_dz_kl, & !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl, pseudo_v_kl, pseudo_dz_kl,&
!$OMP wall_1) !$OMP wall_1)
!$OMP DO SCHEDULE (guided) !$OMP DO SCHEDULE (static,1)
do j = 1, ao_num do j = 1, ao_num
num_A = ao_nucl(j) num_A = ao_nucl(j)
power_A(1:3)= ao_power(j,1:3) power_A(1:3)= ao_power(j,1:3)
A_center(1:3) = nucl_coord(num_A,1:3) A_center(1:3) = nucl_coord(num_A,1:3)
do i = 1, ao_num do i = 1, ao_num
num_B = ao_nucl(i) num_B = ao_nucl(i)
power_B(1:3)= ao_power(i,1:3) power_B(1:3)= ao_power(i,1:3)
B_center(1:3) = nucl_coord(num_B,1:3) B_center(1:3) = nucl_coord(num_B,1:3)
do l=1,ao_prim_num(j) do l=1,ao_prim_num(j)
alpha = ao_expo_ordered_transp(l,j) alpha = ao_expo_ordered_transp(l,j)
do m=1,ao_prim_num(i) do m=1,ao_prim_num(i)
beta = ao_expo_ordered_transp(m,i) beta = ao_expo_ordered_transp(m,i)
double precision :: c double precision :: c
c = 0.d0 c = 0.d0
do k = 1, nucl_num if (dabs(ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i))&
double precision :: Z < 1.d-10) then
Z = nucl_charge(k) cycle
endif
C_center(1:3) = nucl_coord(k,1:3) do k = 1, nucl_num
double precision :: Z
Z = nucl_charge(k)
n_kl_dump = pseudo_n_kl(k,1:pseudo_kmax,0:pseudo_lmax) C_center(1:3) = nucl_coord(k,1:3)
v_kl_dump = pseudo_v_kl(k,1:pseudo_kmax,0:pseudo_lmax)
dz_kl_dump = pseudo_dz_kl(k,1:pseudo_kmax,0:pseudo_lmax)
c = c + Vpseudo(pseudo_lmax,pseudo_kmax,v_kl_dump,n_kl_dump,dz_kl_dump,A_center,power_A,alpha,B_center,power_B,beta,C_center) n_kl_dump = pseudo_n_kl(k,1:pseudo_kmax,0:pseudo_lmax)
v_kl_dump = pseudo_v_kl(k,1:pseudo_kmax,0:pseudo_lmax)
dz_kl_dump = pseudo_dz_kl(k,1:pseudo_kmax,0:pseudo_lmax)
c = c + Vpseudo(pseudo_lmax,pseudo_kmax,v_kl_dump,n_kl_dump,dz_kl_dump,A_center,power_A,alpha,B_center,power_B,beta,C_center)
enddo
ao_pseudo_integral_non_local(i,j) = ao_pseudo_integral_non_local(i,j) +&
ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c
enddo
enddo enddo
ao_pseudo_integral_non_local(i,j) = ao_pseudo_integral_non_local(i,j) + & enddo
ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c
enddo
enddo
enddo
call wall_time(wall_2) call wall_time(wall_2)
if (thread_num == 0) then if (thread_num == 0) then
if (wall_2 - wall_0 > 1.d0) then if (wall_2 - wall_0 > 1.d0) then
wall_0 = wall_2 wall_0 = wall_2
print*, 100.*float(j)/float(ao_num), '% in ', & print*, 100.*float(j)/float(ao_num), '% in ', &
wall_2-wall_1, 's' wall_2-wall_1, 's'
endif endif
endif endif
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
deallocate(n_kl_dump,v_kl_dump, dz_kl_dump) deallocate(n_kl_dump,v_kl_dump, dz_kl_dump)

View File

@ -197,8 +197,8 @@ integer, intent(in) :: n_a(3),n_b(3)
double precision, intent(in) :: v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax) double precision, intent(in) :: v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax)
! !
! | _ _ _. | _ ! | _ _ _. |
! |_ (_) (_ (_| | (/_ ! |_ (_) (_ (_| |
! !
double precision :: fourpi,f,prod,prodp,binom_func,accu,bigR,bigI,ylm double precision :: fourpi,f,prod,prodp,binom_func,accu,bigR,bigI,ylm
@ -223,11 +223,6 @@ double precision, allocatable :: array_I_B(:,:,:,:,:)
double precision :: f1, f2, f3 double precision :: f1, f2, f3
! _
! / _. | _ |
! \_ (_| | (_ |_| |
!
if (kmax.eq.1.and.lmax.eq.0.and.v_kl(1,0).eq.0.d0) then if (kmax.eq.1.and.lmax.eq.0.and.v_kl(1,0).eq.0.d0) then
Vpseudo=0.d0 Vpseudo=0.d0
return return
@ -236,7 +231,7 @@ end if
fourpi=4.d0*dacos(-1.d0) fourpi=4.d0*dacos(-1.d0)
ac=dsqrt((a(1)-c(1))**2+(a(2)-c(2))**2+(a(3)-c(3))**2) ac=dsqrt((a(1)-c(1))**2+(a(2)-c(2))**2+(a(3)-c(3))**2)
bc=dsqrt((b(1)-c(1))**2+(b(2)-c(2))**2+(b(3)-c(3))**2) bc=dsqrt((b(1)-c(1))**2+(b(2)-c(2))**2+(b(3)-c(3))**2)
arg=g_a*ac**2+g_b*bc**2 arg= g_a*ac*ac + g_b*bc*bc
if(arg.gt.-dlog(1.d-20))then if(arg.gt.-dlog(1.d-20))then
Vpseudo=0.d0 Vpseudo=0.d0
@ -290,6 +285,21 @@ if(ac.eq.0.d0.and.bc.eq.0.d0)then
enddo enddo
enddo enddo
enddo enddo
! do k=1,kmax
! do l=0,lmax
! ktot=ntot+n_kl(k,l)
! do m=-l,l
! prod =bigI(0,0,l,m,n_a(1),n_a(2),n_a(3))*v_kl(k,l)
! prodp=bigI(0,0,l,m,n_b(1),n_b(2),n_b(3))*prod
! if (dabs (prodp) < 1.d-15) then
! cycle
! endif
!
! accu=accu+prodp*int_prod_bessel(ktot+2,g_a+g_b+dz_kl(k,l),0,0,areal,breal,arg)
!
! enddo
! enddo
! enddo
!=!=!=!=! !=!=!=!=!
! E n d ! ! E n d !
@ -625,8 +635,8 @@ double precision, intent(in) :: v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax)
double precision, intent(in) :: rmax double precision, intent(in) :: rmax
! !
! | _ _ _. | _ ! | _ _ _. |
! |_ (_) (_ (_| | (/_ ! |_ (_) (_ (_| |
! !
integer :: l,m,k,kk integer :: l,m,k,kk
@ -1950,6 +1960,7 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg)
double precision :: s_q_0, s_q_k, s_0_0, a_over_b_square double precision :: s_q_0, s_q_k, s_0_0, a_over_b_square
double precision :: int_prod_bessel_loc double precision :: int_prod_bessel_loc
double precision :: inverses(0:300) double precision :: inverses(0:300)
double precision :: two_qkmp1, qk
logical done logical done
@ -2008,19 +2019,18 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg)
stop 'pseudopot.f90 : q > 300' stop 'pseudopot.f90 : q > 300'
endif endif
two_qkmp1 = dble(2*(q+m)+1)
qk = dble(q)
do k=0,q-1 do k=0,q-1
s_q_k = ( dble(2*(q-k+m)+1)*dble(q-k)*inverses(k) ) * s_q_k s_q_k = ( two_qkmp1*qk*inverses(k) ) * s_q_k
sum=sum+s_q_k sum=sum+s_q_k
two_qkmp1 = two_qkmp1-2.d0
qk = qk-1.d0
enddo enddo
inverses(q) = a_over_b_square/(dble(2*(q+n)+3) * dble(q+1)) inverses(q) = a_over_b_square/(dble(2*(q+n)+3) * dble(q+1))
! do k=0,q ! do k=0,q
! sum=sum+s_q_k ! sum=sum+s_q_k
! s_q_k = a_over_b_square * ( dble(2*(q-k+m)+1)*dble(q-k)/(dble(2*(k+n)+3) * dble(k+1)) ) * s_q_k ! s_q_k = a_over_b_square * ( dble(2*(q-k+m)+1)*dble(q-k)/(dble(2*(k+n)+3) * dble(k+1)) ) * s_q_k
! enddo
! Iteration of k
! do k=0,q
! sum=sum+s_q_k
! s_q_k = a_over_b_square * ( dble(2*(q-k+m)+1)*dble(q-k)/(dble(2*(k+n)+3) * dble(k+1)) ) * s_q_k
! enddo ! enddo
int=int+sum int=int+sum