diff --git a/src/det.irp.f b/src/det.irp.f index c488e12..e2d72c6 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -321,20 +321,24 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0) !DIR$ ASSUME (LDS >= $n) integer :: i,j + double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED + !DIR$ SIMD do i=1,$n u(i) = m(i) - S(i,l) enddo - z(l) = 0.d0 + zj = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n - z(l) = z(l) + S_inv(i,l)*u(i) + !DIR$ SIMD REDUCTION(+:zj) + do i=1,$n-1,4 + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) enddo d_inv = 1.d0/d - d = d+z(l) + d = d+zj lambda = d*d_inv if (dabs(lambda) < 1.d-3) then d = 0.d0 @@ -343,20 +347,26 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ VECTOR ALIGNED do j=1,$n,4 - z(j ) = 0.d0 - z(j+1) = 0.d0 - z(j+2) = 0.d0 - z(j+3) = 0.d0 + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 + zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,$n - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) - z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) + zj3 = zj3 + S_inv(i,j+3)*u(i) enddo + z(j ) = zj + z(j+1) = zj1 + z(j+2) = zj2 + z(j+3) = zj3 enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -364,11 +374,12 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n,4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n - S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) - S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) - S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) - S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) + S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) + S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) + S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) + S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) enddo enddo