From 8af721f452d0f0421524650f10c111042f83ea5d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 17 Nov 2015 12:13:33 +0100 Subject: [PATCH] Acelerated pseudo integrals --- .../pot_ao_pseudo_ints.irp.f | 248 +++++++++--------- src/Integrals_Monoelec/pseudopot.f90 | 42 +-- 2 files changed, 149 insertions(+), 141 deletions(-) diff --git a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f index 95023177..a59ec00c 100644 --- a/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f +++ b/src/Integrals_Monoelec/pot_ao_pseudo_ints.irp.f @@ -33,78 +33,76 @@ END_PROVIDER integer, allocatable :: n_k_dump(:) double precision, allocatable :: v_k_dump(:), dz_k_dump(:) - allocate(n_k_dump(1:pseudo_klocmax), v_k_dump(1:pseudo_klocmax), dz_k_dump(1:pseudo_klocmax)) - - - ! _ - ! / _. | _ | - ! \_ (_| | (_ |_| | - ! - - print*, 'Providing the nuclear electron pseudo integrals ' - + allocate(n_k_dump(1:pseudo_klocmax), v_k_dump(1:pseudo_klocmax), dz_k_dump(1:pseudo_klocmax)) + + print*, 'Providing the nuclear electron pseudo integrals (local)' + call wall_time(wall_1) call cpu_time(cpu_1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B, & - !$OMP num_A,num_B,Z,c,n_pt_in, & - !$OMP v_k_dump,n_k_dump, dz_k_dump, & - !$OMP wall_0,wall_2,thread_num) & - !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp, & - !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & - !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k,pseudo_n_k, pseudo_dz_k, & - !$OMP wall_1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,n_pt_in, & + !$OMP v_k_dump,n_k_dump, dz_k_dump, & + !$OMP wall_0,wall_2,thread_num) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& + !$OMP ao_pseudo_integral_local,nucl_num,nucl_charge, & + !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_v_k,pseudo_n_k, pseudo_dz_k,& + !$OMP wall_1) + + !$OMP DO SCHEDULE (static,1) - !$OMP DO SCHEDULE (guided) - do j = 1, ao_num - - num_A = ao_nucl(j) - power_A(1:3)= ao_power(j,1:3) - A_center(1:3) = nucl_coord(num_A,1:3) - - do i = 1, ao_num - - num_B = ao_nucl(i) - power_B(1:3)= ao_power(i,1:3) - B_center(1:3) = nucl_coord(num_B,1:3) - - do l=1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(l,j) - - do m=1,ao_prim_num(i) - beta = ao_expo_ordered_transp(m,i) - double precision :: c - c = 0.d0 - - do k = 1, nucl_num - double precision :: Z - Z = nucl_charge(k) - - C_center(1:3) = nucl_coord(k,1:3) - - v_k_dump = pseudo_v_k(k,1:pseudo_klocmax) - n_k_dump = pseudo_n_k(k,1:pseudo_klocmax) - dz_k_dump = pseudo_dz_k(k,1:pseudo_klocmax) - - c = c + Vloc(pseudo_klocmax, v_k_dump,n_k_dump, dz_k_dump, & - A_center,power_A,alpha,B_center,power_B,beta,C_center) - + + num_A = ao_nucl(j) + power_A(1:3)= ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + + do i = 1, ao_num + + num_B = ao_nucl(i) + power_B(1:3)= ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + do l=1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(l,j) + + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + double precision :: c + c = 0.d0 + + if (dabs(ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i))& + < 1.d-10) then + cycle + endif + do k = 1, nucl_num + double precision :: Z + Z = nucl_charge(k) + + C_center(1:3) = nucl_coord(k,1:3) + + v_k_dump = pseudo_v_k(k,1:pseudo_klocmax) + n_k_dump = pseudo_n_k(k,1:pseudo_klocmax) + dz_k_dump = pseudo_dz_k(k,1:pseudo_klocmax) + + c = c + Vloc(pseudo_klocmax, v_k_dump,n_k_dump, dz_k_dump,& + A_center,power_A,alpha,B_center,power_B,beta,C_center) + + enddo + ao_pseudo_integral_local(i,j) = ao_pseudo_integral_local(i,j) +& + ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c + enddo enddo - ao_pseudo_integral_local(i,j) = ao_pseudo_integral_local(i,j) + & - ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c - enddo - enddo - enddo - + enddo + call wall_time(wall_2) if (thread_num == 0) then if (wall_2 - wall_0 > 1.d0) then wall_0 = wall_2 - print*, 100.*float(j)/float(ao_num), '% in ', & - wall_2-wall_1, 's' + print*, 100.*float(j)/float(ao_num), '% in ', & + wall_2-wall_1, 's' endif endif enddo @@ -141,82 +139,82 @@ END_PROVIDER allocate(n_kl_dump(pseudo_kmax,0:pseudo_lmax), v_kl_dump(pseudo_kmax,0:pseudo_lmax), dz_kl_dump(pseudo_kmax,0:pseudo_lmax)) - ! _ - ! / _. | _ | - ! \_ (_| | (_ |_| | - ! - - print*, 'Providing the nuclear electron pseudo integrals ' - + print*, 'Providing the nuclear electron pseudo integrals (non-local)' + call wall_time(wall_1) call cpu_time(cpu_1) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B, & - !$OMP num_A,num_B,Z,c,n_pt_in, & - !$OMP n_kl_dump, v_kl_dump, dz_kl_dump, & - !$OMP wall_0,wall_2,thread_num) & - !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp, & - !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge, & - !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl, pseudo_v_kl, pseudo_dz_kl, & - !$OMP wall_1) - !$OMP DO SCHEDULE (guided) - + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,n_pt_in, & + !$OMP n_kl_dump, v_kl_dump, dz_kl_dump, & + !$OMP wall_0,wall_2,thread_num) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& + !$OMP ao_pseudo_integral_non_local,nucl_num,nucl_charge,& + !$OMP pseudo_klocmax,pseudo_lmax,pseudo_kmax,pseudo_n_kl, pseudo_v_kl, pseudo_dz_kl,& + !$OMP wall_1) + + !$OMP DO SCHEDULE (static,1) + do j = 1, ao_num + + num_A = ao_nucl(j) + power_A(1:3)= ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + + do i = 1, ao_num + + num_B = ao_nucl(i) + power_B(1:3)= ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + do l=1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(l,j) + + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + double precision :: c + c = 0.d0 + + if (dabs(ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i))& + < 1.d-10) then + cycle + endif - num_A = ao_nucl(j) - power_A(1:3)= ao_power(j,1:3) - A_center(1:3) = nucl_coord(num_A,1:3) - - do i = 1, ao_num - - num_B = ao_nucl(i) - power_B(1:3)= ao_power(i,1:3) - B_center(1:3) = nucl_coord(num_B,1:3) - - do l=1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(l,j) - - do m=1,ao_prim_num(i) - beta = ao_expo_ordered_transp(m,i) - double precision :: c - c = 0.d0 - - do k = 1, nucl_num - double precision :: Z - Z = nucl_charge(k) - - C_center(1:3) = nucl_coord(k,1:3) - - n_kl_dump = pseudo_n_kl(k,1:pseudo_kmax,0:pseudo_lmax) - v_kl_dump = pseudo_v_kl(k,1:pseudo_kmax,0:pseudo_lmax) - dz_kl_dump = pseudo_dz_kl(k,1:pseudo_kmax,0:pseudo_lmax) - - c = c + Vpseudo(pseudo_lmax,pseudo_kmax,v_kl_dump,n_kl_dump,dz_kl_dump,A_center,power_A,alpha,B_center,power_B,beta,C_center) - + do k = 1, nucl_num + double precision :: Z + Z = nucl_charge(k) + + C_center(1:3) = nucl_coord(k,1:3) + + n_kl_dump = pseudo_n_kl(k,1:pseudo_kmax,0:pseudo_lmax) + v_kl_dump = pseudo_v_kl(k,1:pseudo_kmax,0:pseudo_lmax) + dz_kl_dump = pseudo_dz_kl(k,1:pseudo_kmax,0:pseudo_lmax) + + c = c + Vpseudo(pseudo_lmax,pseudo_kmax,v_kl_dump,n_kl_dump,dz_kl_dump,A_center,power_A,alpha,B_center,power_B,beta,C_center) + + enddo + ao_pseudo_integral_non_local(i,j) = ao_pseudo_integral_non_local(i,j) +& + ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c + enddo enddo - ao_pseudo_integral_non_local(i,j) = ao_pseudo_integral_non_local(i,j) + & - ao_coef_normalized_ordered_transp(l,j)*ao_coef_normalized_ordered_transp(m,i)*c - enddo - enddo - enddo - + enddo + call wall_time(wall_2) if (thread_num == 0) then if (wall_2 - wall_0 > 1.d0) then wall_0 = wall_2 - print*, 100.*float(j)/float(ao_num), '% in ', & - wall_2-wall_1, 's' + print*, 100.*float(j)/float(ao_num), '% in ', & + wall_2-wall_1, 's' endif endif enddo - - !$OMP END DO - !$OMP END PARALLEL - - + + !$OMP END DO + !$OMP END PARALLEL + + deallocate(n_kl_dump,v_kl_dump, dz_kl_dump) diff --git a/src/Integrals_Monoelec/pseudopot.f90 b/src/Integrals_Monoelec/pseudopot.f90 index e05b883b..c262105f 100644 --- a/src/Integrals_Monoelec/pseudopot.f90 +++ b/src/Integrals_Monoelec/pseudopot.f90 @@ -197,8 +197,8 @@ integer, intent(in) :: n_a(3),n_b(3) double precision, intent(in) :: v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax) ! -! | _ _ _. | _ -! |_ (_) (_ (_| | (/_ +! | _ _ _. | +! |_ (_) (_ (_| | ! double precision :: fourpi,f,prod,prodp,binom_func,accu,bigR,bigI,ylm @@ -223,11 +223,6 @@ double precision, allocatable :: array_I_B(:,:,:,:,:) double precision :: f1, f2, f3 -! _ -! / _. | _ | -! \_ (_| | (_ |_| | -! - if (kmax.eq.1.and.lmax.eq.0.and.v_kl(1,0).eq.0.d0) then Vpseudo=0.d0 return @@ -236,7 +231,7 @@ end if fourpi=4.d0*dacos(-1.d0) ac=dsqrt((a(1)-c(1))**2+(a(2)-c(2))**2+(a(3)-c(3))**2) bc=dsqrt((b(1)-c(1))**2+(b(2)-c(2))**2+(b(3)-c(3))**2) -arg=g_a*ac**2+g_b*bc**2 +arg= g_a*ac*ac + g_b*bc*bc if(arg.gt.-dlog(1.d-20))then Vpseudo=0.d0 @@ -290,6 +285,21 @@ if(ac.eq.0.d0.and.bc.eq.0.d0)then enddo enddo enddo +! do k=1,kmax +! do l=0,lmax +! ktot=ntot+n_kl(k,l) +! do m=-l,l +! prod =bigI(0,0,l,m,n_a(1),n_a(2),n_a(3))*v_kl(k,l) +! prodp=bigI(0,0,l,m,n_b(1),n_b(2),n_b(3))*prod +! if (dabs (prodp) < 1.d-15) then +! cycle +! endif +! +! accu=accu+prodp*int_prod_bessel(ktot+2,g_a+g_b+dz_kl(k,l),0,0,areal,breal,arg) +! +! enddo +! enddo +! enddo !=!=!=!=! ! E n d ! @@ -625,8 +635,8 @@ double precision, intent(in) :: v_kl(kmax,0:lmax),dz_kl(kmax,0:lmax) double precision, intent(in) :: rmax ! -! | _ _ _. | _ -! |_ (_) (_ (_| | (/_ +! | _ _ _. | +! |_ (_) (_ (_| | ! integer :: l,m,k,kk @@ -1950,6 +1960,7 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg) double precision :: s_q_0, s_q_k, s_0_0, a_over_b_square double precision :: int_prod_bessel_loc double precision :: inverses(0:300) + double precision :: two_qkmp1, qk logical done @@ -2008,19 +2019,18 @@ double precision function int_prod_bessel(l,gam,n,m,a,b,arg) stop 'pseudopot.f90 : q > 300' endif + two_qkmp1 = dble(2*(q+m)+1) + qk = dble(q) do k=0,q-1 - s_q_k = ( dble(2*(q-k+m)+1)*dble(q-k)*inverses(k) ) * s_q_k + s_q_k = ( two_qkmp1*qk*inverses(k) ) * s_q_k sum=sum+s_q_k + two_qkmp1 = two_qkmp1-2.d0 + qk = qk-1.d0 enddo inverses(q) = a_over_b_square/(dble(2*(q+n)+3) * dble(q+1)) ! do k=0,q ! sum=sum+s_q_k ! s_q_k = a_over_b_square * ( dble(2*(q-k+m)+1)*dble(q-k)/(dble(2*(k+n)+3) * dble(k+1)) ) * s_q_k -! enddo - ! Iteration of k -! do k=0,q -! sum=sum+s_q_k -! s_q_k = a_over_b_square * ( dble(2*(q-k+m)+1)*dble(q-k)/(dble(2*(k+n)+3) * dble(k+1)) ) * s_q_k ! enddo int=int+sum