diff --git a/src/mo.irp.f b/src/mo.irp.f index 3825835..d1de6c2 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -76,7 +76,6 @@ BEGIN_PROVIDER [ real, mo_coef, (ao_num_8,mo_num_8) ] f = 1./mo_scale do j=1,mo_num !DIR$ VECTOR ALIGNED - !DIR$ LOOP COUNT (2000) do i=1,ao_num_8 mo_coef(i,j) *= f enddo @@ -218,7 +217,6 @@ END_PROVIDER cycle endif r_inv = nucl_elec_dist_inv(k,i) - !DIR$ LOOP COUNT (500) do j=1,mo_num mo_value_transp(j,i) = mo_value_transp(j,i) + nucl_fitcusp_param(1,j,k) +& r * (nucl_fitcusp_param(2,j,k) + & @@ -250,7 +248,6 @@ END_PROVIDER ! Scale off-diagonal elements t = prepare_walkers_t do i=1,mo_num - !DIR$ LOOP COUNT (100) do j=1,elec_alpha_num if (i /= j) then mo_value_transp(i,j) *= t @@ -260,7 +257,6 @@ END_PROVIDER mo_lapl_transp(i,j) *= t endif enddo - !DIR$ LOOP COUNT (100) do j=1,elec_beta_num if (i /= j) then mo_value_transp(i,j+elec_alpha_num) *= t @@ -284,7 +280,6 @@ END_PROVIDER enddo END_PROVIDER - BEGIN_PROVIDER [ real, mo_value, (elec_num_8,mo_num) ] implicit none BEGIN_DOC @@ -342,10 +337,27 @@ BEGIN_PROVIDER [ double precision, mo_grad_lapl, (4,elec_num,mo_num) ] integer :: i,j do j=1,mo_num do i=1,elec_num - mo_grad_lapl(1,i,j) = mo_grad_x(i,j) - mo_grad_lapl(2,i,j) = mo_grad_y(i,j) - mo_grad_lapl(3,i,j) = mo_grad_z(i,j) - mo_grad_lapl(4,i,j) = mo_lapl (i,j) + mo_grad_lapl(1,i,j) = mo_grad_transp_x(j,i) + mo_grad_lapl(2,i,j) = mo_grad_transp_y(j,i) + mo_grad_lapl(3,i,j) = mo_grad_transp_z(j,i) + mo_grad_lapl(4,i,j) = mo_lapl_transp (j,i) + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mo_grad_lapl_transp, (4,mo_num,elec_num) ] + implicit none + BEGIN_DOC +! Gradients and laplacian + END_DOC + integer :: i,j + do i=1,elec_num + do j=1,mo_num + mo_grad_lapl_transp(1,j,i) = mo_grad_transp_x(j,i) + mo_grad_lapl_transp(2,j,i) = mo_grad_transp_y(j,i) + mo_grad_lapl_transp(3,j,i) = mo_grad_transp_z(j,i) + mo_grad_lapl_transp(4,j,i) = mo_lapl_transp (j,i) enddo enddo @@ -419,7 +431,6 @@ BEGIN_PROVIDER [ double precision , mo_value_at_nucl, (mo_num_8,nucl_num) ] PROVIDE ao_value_p - !DIR$ LOOP COUNT (2000) do i=1,ao_num if (ao_nucl(i) /= k) then ao_value_at_nucl_no_S(i) = ao_value_p(i) @@ -461,7 +472,6 @@ END_PROVIDER point(3) = nucl_coord(k,3)+ nucl_fitcusp_radius(k) TOUCH point - !DIR$ LOOP COUNT (2000) do j=1,ao_num ao_value_at_fitcusp_radius(j,k) = ao_value_p(j) ao_grad_at_fitcusp_radius(j,k) = ao_grad_p(j,3) @@ -615,7 +625,6 @@ BEGIN_PROVIDER [ real, nucl_fitcusp_param, (4,mo_num,nucl_num) ] cycle endif R = nucl_fitcusp_radius(k) - !DIR$ LOOP COUNT (500) do i=1,mo_num double precision :: lap_phi, grad_phi, phi, eta lap_phi = mo_lapl_at_fitcusp_radius(i,k) @@ -728,22 +737,29 @@ subroutine sparse_full_mv(A,LDA, & do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 + IRP_IF NO_PREFETCH + IRP_ELSE + call MM_PREFETCH (A(j+k,indices(kao+4)),3) + call MM_PREFETCH (A(j+k,indices(kao+5)),3) + call MM_PREFETCH (A(j+k,indices(kao+6)),3) + call MM_PREFETCH (A(j+k,indices(kao+7)),3) + IRP_ENDIF C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 + A(j+k,k_vec(2))*d21& + A(j+k,k_vec(3))*d31 + A(j+k,k_vec(4))*d41 + enddo + + !DIR$ VECTOR ALIGNED + do j=1,$IRP_ALIGN/4 C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22& + A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42 - enddo - - !DIR$ VECTOR ALIGNED - do j=1,$IRP_ALIGN/4 C3(j+k) = C3(j+k) + A(j+k,k_vec(1))*d13 + A(j+k,k_vec(2))*d23& + A(j+k,k_vec(3))*d33 + A(j+k,k_vec(4))*d43 - C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& - + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 enddo !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 + C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& + + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 C5(j+k) = C5(j+k) + A(j+k,k_vec(1))*d15 + A(j+k,k_vec(2))*d25& + A(j+k,k_vec(3))*d35 + A(j+k,k_vec(4))*d45 enddo