Optimized det (mod4)

This commit is contained in:
Anthony Scemama 2016-06-03 23:45:01 +02:00
parent 56cd62957a
commit 2efdfa26e9
1 changed files with 80 additions and 34 deletions

View File

@ -327,9 +327,9 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
u(i) = m(i) - S(i,l)
enddo
z(l) = S_inv($n,l)*u($n)
z(l) = 0.d0
!DIR$ VECTOR ALIGNED
do i=1,$n-1
do i=1,$n
z(l) = z(l) + S_inv(i,l)*u(i)
enddo
@ -343,12 +343,12 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
!DIR$ VECTOR ALIGNED
do j=1,$n,4
z(j ) = S_inv($n,j )*u($n)
z(j+1) = S_inv($n,j+1)*u($n)
z(j+2) = S_inv($n,j+2)*u($n)
z(j+3) = S_inv($n,j+3)*u($n)
z(j ) = 0.d0
z(j+1) = 0.d0
z(j+2) = 0.d0
z(j+3) = 0.d0
!DIR$ VECTOR ALIGNED
do i=1,$n-1
do i=1,$n
z(j ) = z(j ) + S_inv(i,j )*u(i)
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
@ -454,12 +454,10 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
return
endif
!DIR$ VECTOR ALIGNED
z = 0.d0
!DIR$ VECTOR ALIGNED
do j=1,$n-1,4
z(j ) = 0.d0
z(j+1) = 0.d0
z(j+2) = 0.d0
z(j+3) = 0.d0
!DIR$ VECTOR ALIGNED
do i=1,$n-1
z(j ) = z(j ) + S_inv(i,j )*u(i)
@ -473,11 +471,11 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
enddo
z($n) = 0.d0
!DIR$ VECTOR ALIGNED
do i=1,$n
z($n) = z($n) + S_inv(i,$n)*u(i)
enddo
!DIR$ VECTOR ALIGNED
do i=1,$n
w(i) = S_inv(i,l)*d_inv
@ -574,11 +572,14 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
u(i) = m(i) - S(i,l)
enddo
z(l) = S_inv($n,l)*u($n)
z(l) = 0.d0
!DIR$ VECTOR ALIGNED
do i=1,$n-1
z(l) = z(l) + S_inv(i,l)*u(i)
do i=1,$n-2,4
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
enddo
i=$n-1
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1)
d_inv = 1.d0/d
d = d+z(l)
@ -588,29 +589,37 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
return
endif
!DIR$ VECTOR ALIGNED
z = 0.d0
!DIR$ VECTOR ALIGNED
do j=1,$n-2,4
z(j ) = S_inv($n,j )*u($n)
z(j+1) = S_inv($n,j+1)*u($n)
z(j+2) = S_inv($n,j+2)*u($n)
z(j+3) = S_inv($n,j+3)*u($n)
!DIR$ VECTOR ALIGNED
do i=1,$n-1
do i=1,$n-2
z(j ) = z(j ) + S_inv(i,j )*u(i)
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
z(j+3) = z(j+3) + S_inv(i,j+3)*u(i)
enddo
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
z(j ) = z(j ) + S_inv($n,j )*u($n)
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1)
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1)
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
enddo
j=$n-1
z(j ) = S_inv($n,j )*u($n)
z(j+1) = S_inv($n,j+1)*u($n)
!DIR$ VECTOR ALIGNED
do i=1,$n-1
do i=1,$n-2
z(j ) = z(j ) + S_inv(i,j )*u(i)
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
enddo
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
z(j ) = z(j ) + S_inv($n,j )*u($n)
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
!DIR$ VECTOR ALIGNED
do i=1,$n
@ -620,20 +629,32 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
do i=1,$n-2,4
!DIR$ VECTOR ALIGNED
do j=1,$n
do j=1,$n-2
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j)
S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j)
enddo
S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1)
S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n )
S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1)
S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n )
S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -z(i+2)*w($n-1)
S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -z(i+2)*w($n )
S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -z(i+3)*w($n-1)
S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -z(i+3)*w($n )
enddo
i=$n-1
!DIR$ VECTOR ALIGNED
do j=1,$n
do j=1,$n-2
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
enddo
S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1)
S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1)
S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n )
S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n )
end
@ -704,11 +725,14 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
u(i) = m(i) - S(i,l)
enddo
z(l) = S_inv($n,l)*u($n)
z(l) = 0.d0
!DIR$ VECTOR ALIGNED
do i=1,$n-1
z(l) = z(l) + S_inv(i,l)*u(i)
do i=1,$n-3,4
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
enddo
i=$n-2
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + S_inv(i+2,l)*u(i+2)
d_inv = 1.d0/d
@ -719,19 +743,29 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
return
endif
!DIR$ VECTOR ALIGNED
z = 0.d0
!DIR$ VECTOR ALIGNED
do j=1,$n-3,4
z(j ) = S_inv($n,j )*u($n)
z(j+1) = S_inv($n,j+1)*u($n)
z(j+2) = S_inv($n,j+2)*u($n)
z(j+3) = S_inv($n,j+3)*u($n)
!DIR$ VECTOR ALIGNED
do i=1,$n-1
do i=1,$n-3
z(j ) = z(j ) + S_inv(i,j )*u(i)
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
z(j+3) = z(j+3) + S_inv(i,j+3)*u(i)
enddo
z(j ) = z(j ) + S_inv($n-2,j )*u($n-2)
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
z(j ) = z(j ) + S_inv($n,j )*u($n)
z(j+1) = z(j+1) + S_inv($n-2,j+1)*u($n-2)
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
z(j+2) = z(j+2) + S_inv($n-2,j+2)*u($n-2)
z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1)
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
z(j+3) = z(j+3) + S_inv($n-2,j+3)*u($n-2)
z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1)
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
enddo
j=$n-2
@ -753,12 +787,24 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
do i=1,$n-3,4
!DIR$ VECTOR ALIGNED
do j=1,$n
do j=1,$n-3
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2)
S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3)
enddo
S_inv($n-2,i ) = S_inv($n-2,i )*lambda -z(i )*w($n-2)
S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1)
S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n )
S_inv($n-2,i+1) = S_inv($n-2,i+1)*lambda -z(i+1)*w($n-2)
S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1)
S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n )
S_inv($n-2,i+2) = S_inv($n-2,i+2)*lambda -z(i+2)*w($n-2)
S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -z(i+2)*w($n-1)
S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -z(i+2)*w($n )
S_inv($n-2,i+3) = S_inv($n-2,i+3)*lambda -z(i+3)*w($n-2)
S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -z(i+3)*w($n-1)
S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -z(i+3)*w($n )
enddo
i=$n-2