mirror of
https://gitlab.com/scemama/qmcchem.git
synced 2025-01-03 01:55:39 +01:00
DIR$ SIMD
This commit is contained in:
parent
2efdfa26e9
commit
b5f2e62421
196
src/det.irp.f
196
src/det.irp.f
@ -434,49 +434,60 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
!DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0)
|
!DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0)
|
||||||
!DIR$ ASSUME (LDS >= $n)
|
!DIR$ ASSUME (LDS >= $n)
|
||||||
integer :: i,j
|
integer :: i,j
|
||||||
double precision :: zl, ui
|
double precision :: zj, zj1, zj2, zj3
|
||||||
|
|
||||||
zl = 0.d0
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do i=1,$n-1
|
!DIR$ SIMD
|
||||||
ui = m(i) - S(i,l)
|
do i=1,$n
|
||||||
u(i) = ui
|
u(i) = m(i) - S(i,l)
|
||||||
zl = zl + S_inv(i,l)*ui
|
|
||||||
enddo
|
enddo
|
||||||
u($n) = m($n) - S($n,l)
|
|
||||||
zl = zl + S_inv($n,l)*u($n)
|
zj = 0.d0
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj)
|
||||||
|
do i=1,$n-1,4
|
||||||
|
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
|
||||||
|
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
|
||||||
|
enddo
|
||||||
|
zj = zj + S_inv($n,l)*u($n)
|
||||||
|
|
||||||
d_inv = 1.d0/d
|
d_inv = 1.d0/d
|
||||||
d = d+zl
|
d = d+zj
|
||||||
lambda = d*d_inv
|
lambda = d*d_inv
|
||||||
if (dabs(lambda) < 1.d-3) then
|
if (dabs(lambda) < 1.d-3) then
|
||||||
d = 0.d0
|
d = 0.d0
|
||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
|
||||||
z = 0.d0
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do j=1,$n-1,4
|
do j=1,$n-1,4
|
||||||
|
zj = 0.d0
|
||||||
|
zj1 = 0.d0
|
||||||
|
zj2 = 0.d0
|
||||||
|
zj3 = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3)
|
||||||
do i=1,$n-1
|
do i=1,$n-1
|
||||||
z(j ) = z(j ) + S_inv(i,j )*u(i)
|
zj = zj + S_inv(i,j )*u(i)
|
||||||
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
|
zj1 = zj1 + S_inv(i,j+1)*u(i)
|
||||||
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
|
zj2 = zj2 + S_inv(i,j+2)*u(i)
|
||||||
z(j+3) = z(j+3) + S_inv(i,j+3)*u(i)
|
zj3 = zj3 + S_inv(i,j+3)*u(i)
|
||||||
enddo
|
enddo
|
||||||
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
z(j ) = zj + S_inv($n,j )*u($n)
|
||||||
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
z(j+1) = zj1 + S_inv($n,j+1)*u($n)
|
||||||
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
|
z(j+2) = zj2 + S_inv($n,j+2)*u($n)
|
||||||
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
|
z(j+3) = zj3 + S_inv($n,j+3)*u($n)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
|
zj = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj)
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
z($n) = z($n) + S_inv(i,$n)*u(i)
|
zj = zj + S_inv(i,$n)*u(i)
|
||||||
enddo
|
enddo
|
||||||
|
z($n) = zj
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d_inv)
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
w(i) = S_inv(i,l)*d_inv
|
w(i) = S_inv(i,l)*d_inv
|
||||||
S(i,l) = m(i)
|
S(i,l) = m(i)
|
||||||
@ -484,6 +495,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
do i=1,$n-1,4
|
do i=1,$n-1,4
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,$n-1
|
do j=1,$n-1
|
||||||
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
|
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
|
||||||
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
|
||||||
@ -497,6 +509,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
enddo
|
enddo
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*z($n)
|
S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*z($n)
|
||||||
enddo
|
enddo
|
||||||
@ -567,61 +580,71 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
!DIR$ ASSUME (LDS >= $n)
|
!DIR$ ASSUME (LDS >= $n)
|
||||||
integer :: i,j
|
integer :: i,j
|
||||||
|
|
||||||
|
double precision :: zj, zj1, zj2, zj3
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
u(i) = m(i) - S(i,l)
|
u(i) = m(i) - S(i,l)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
z(l) = 0.d0
|
zj = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj)
|
||||||
do i=1,$n-2,4
|
do i=1,$n-2,4
|
||||||
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
|
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
|
||||||
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
|
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
|
||||||
enddo
|
enddo
|
||||||
i=$n-1
|
i=$n-1
|
||||||
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1)
|
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1)
|
||||||
|
|
||||||
d_inv = 1.d0/d
|
d_inv = 1.d0/d
|
||||||
d = d+z(l)
|
d = d+zj
|
||||||
lambda = d*d_inv
|
lambda = d*d_inv
|
||||||
if (dabs(lambda) < 1.d-3) then
|
if (dabs(lambda) < 1.d-3) then
|
||||||
d = 0.d0
|
d = 0.d0
|
||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
|
||||||
z = 0.d0
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do j=1,$n-2,4
|
do j=1,$n-2,4
|
||||||
|
zj = 0.d0
|
||||||
|
zj1 = 0.d0
|
||||||
|
zj2 = 0.d0
|
||||||
|
zj3 = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3)
|
||||||
do i=1,$n-2
|
do i=1,$n-2
|
||||||
z(j ) = z(j ) + S_inv(i,j )*u(i)
|
zj = zj + S_inv(i,j )*u(i)
|
||||||
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
|
zj1 = zj1 + S_inv(i,j+1)*u(i)
|
||||||
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
|
zj2 = zj2 + S_inv(i,j+2)*u(i)
|
||||||
z(j+3) = z(j+3) + S_inv(i,j+3)*u(i)
|
zj3 = zj3 + S_inv(i,j+3)*u(i)
|
||||||
enddo
|
enddo
|
||||||
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
|
z(j ) = zj + S_inv($n-1,j )*u($n-1)
|
||||||
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
||||||
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
|
z(j+1) = zj1 + S_inv($n-1,j+1)*u($n-1)
|
||||||
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
||||||
z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1)
|
z(j+2) = zj2 + S_inv($n-1,j+2)*u($n-1)
|
||||||
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
|
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
|
||||||
z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1)
|
z(j+3) = zj3 + S_inv($n-1,j+3)*u($n-1)
|
||||||
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
|
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
j=$n-1
|
j=$n-1
|
||||||
|
zj = 0.d0
|
||||||
|
zj1 = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj,zj1)
|
||||||
do i=1,$n-2
|
do i=1,$n-2
|
||||||
z(j ) = z(j ) + S_inv(i,j )*u(i)
|
zj = zj + S_inv(i,j )*u(i)
|
||||||
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
|
zj1 = zj1 + S_inv(i,j+1)*u(i)
|
||||||
enddo
|
enddo
|
||||||
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
|
z(j ) = zj + S_inv($n-1,j )*u($n-1)
|
||||||
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
||||||
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
|
z(j+1) = zj1 + S_inv($n-1,j+1)*u($n-1)
|
||||||
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d_inv)
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
w(i) = S_inv(i,l)*d_inv
|
w(i) = S_inv(i,l)*d_inv
|
||||||
S(i,l) = m(i)
|
S(i,l) = m(i)
|
||||||
@ -629,6 +652,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
do i=1,$n-2,4
|
do i=1,$n-2,4
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,$n-2
|
do j=1,$n-2
|
||||||
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
|
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
|
||||||
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
|
||||||
@ -647,6 +671,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
i=$n-1
|
i=$n-1
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,$n-2
|
do j=1,$n-2
|
||||||
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
|
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
|
||||||
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
|
||||||
@ -720,66 +745,74 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
!DIR$ ASSUME (LDS >= $n)
|
!DIR$ ASSUME (LDS >= $n)
|
||||||
integer :: i,j
|
integer :: i,j
|
||||||
|
|
||||||
|
double precision :: zj, zj1, zj2, zj3
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
u(i) = m(i) - S(i,l)
|
u(i) = m(i) - S(i,l)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
z(l) = 0.d0
|
zj = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj)
|
||||||
do i=1,$n-3,4
|
do i=1,$n-3,4
|
||||||
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
|
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
|
||||||
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
|
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3)
|
||||||
enddo
|
enddo
|
||||||
i=$n-2
|
i=$n-2
|
||||||
z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + S_inv(i+2,l)*u(i+2)
|
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + S_inv(i+2,l)*u(i+2)
|
||||||
|
|
||||||
|
|
||||||
d_inv = 1.d0/d
|
d_inv = 1.d0/d
|
||||||
d = d+z(l)
|
d = d+zj
|
||||||
lambda = d*d_inv
|
lambda = d*d_inv
|
||||||
if (dabs(lambda) < 1.d-3) then
|
if (dabs(lambda) < 1.d-3) then
|
||||||
d = 0.d0
|
d = 0.d0
|
||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
|
||||||
z = 0.d0
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do j=1,$n-3,4
|
do j=1,$n-3,4
|
||||||
|
zj = 0.d0
|
||||||
|
zj1 = 0.d0
|
||||||
|
zj2 = 0.d0
|
||||||
|
zj3 = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3)
|
||||||
do i=1,$n-3
|
do i=1,$n-3
|
||||||
z(j ) = z(j ) + S_inv(i,j )*u(i)
|
zj = zj + S_inv(i,j )*u(i)
|
||||||
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
|
zj1 = zj1 + S_inv(i,j+1)*u(i)
|
||||||
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
|
zj2 = zj2 + S_inv(i,j+2)*u(i)
|
||||||
z(j+3) = z(j+3) + S_inv(i,j+3)*u(i)
|
zj3 = zj3 + S_inv(i,j+3)*u(i)
|
||||||
enddo
|
enddo
|
||||||
z(j ) = z(j ) + S_inv($n-2,j )*u($n-2)
|
z(j ) = zj + S_inv($n-2,j )*u($n-2)
|
||||||
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
|
z(j ) = z(j ) + S_inv($n-1,j )*u($n-1)
|
||||||
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
z(j ) = z(j ) + S_inv($n,j )*u($n)
|
||||||
z(j+1) = z(j+1) + S_inv($n-2,j+1)*u($n-2)
|
z(j+1) = zj1 + S_inv($n-2,j+1)*u($n-2)
|
||||||
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
|
z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1)
|
||||||
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
z(j+1) = z(j+1) + S_inv($n,j+1)*u($n)
|
||||||
z(j+2) = z(j+2) + S_inv($n-2,j+2)*u($n-2)
|
z(j+2) = zj2 + S_inv($n-2,j+2)*u($n-2)
|
||||||
z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1)
|
z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1)
|
||||||
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
|
z(j+2) = z(j+2) + S_inv($n,j+2)*u($n)
|
||||||
z(j+3) = z(j+3) + S_inv($n-2,j+3)*u($n-2)
|
z(j+3) = zj3 + S_inv($n-2,j+3)*u($n-2)
|
||||||
z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1)
|
z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1)
|
||||||
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
|
z(j+3) = z(j+3) + S_inv($n,j+3)*u($n)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
j=$n-2
|
j=$n-2
|
||||||
z(j ) = S_inv($n,j )*u($n)
|
zj = 0.d0
|
||||||
z(j+1) = S_inv($n,j+1)*u($n)
|
zj1 = 0.d0
|
||||||
z(j+2) = S_inv($n,j+2)*u($n)
|
zj2 = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do i=1,$n-1
|
!DIR$ SIMD REDUCTION(+:zj,zj1,zj2)
|
||||||
z(j ) = z(j ) + S_inv(i,j )*u(i)
|
do i=1,$n
|
||||||
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
|
zj = zj + S_inv(i,j )*u(i)
|
||||||
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
|
zj1 = zj1 + S_inv(i,j+1)*u(i)
|
||||||
|
zj2 = zj2 + S_inv(i,j+2)*u(i)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d_inv)
|
||||||
do i=1,$n
|
do i=1,$n
|
||||||
w(i) = S_inv(i,l)*d_inv
|
w(i) = S_inv(i,l)*d_inv
|
||||||
S(i,l) = m(i)
|
S(i,l) = m(i)
|
||||||
@ -787,6 +820,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
do i=1,$n-3,4
|
do i=1,$n-3,4
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,$n-3
|
do j=1,$n-3
|
||||||
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
|
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
|
||||||
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
|
||||||
@ -809,6 +843,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
i=$n-2
|
i=$n-2
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,$n
|
do j=1,$n
|
||||||
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
|
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i )
|
||||||
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1)
|
||||||
@ -881,20 +916,23 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d)
|
|||||||
!DIR$ ASSUME (n>150)
|
!DIR$ ASSUME (n>150)
|
||||||
|
|
||||||
integer :: i,j,n4
|
integer :: i,j,n4
|
||||||
|
double precision :: zl
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD
|
||||||
do i=1,n
|
do i=1,n
|
||||||
u(i) = m(i) - S(i,l)
|
u(i) = m(i) - S(i,l)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
z(l) = 0.d0
|
zl = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zl)
|
||||||
do i=1,n
|
do i=1,n
|
||||||
z(l) = z(l) + S_inv(i,l)*u(i)
|
zl = zl + S_inv(i,l)*u(i)
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
d_inv = 1.d0/d
|
d_inv = 1.d0/d
|
||||||
d = d+z(l)
|
d = d+zl
|
||||||
lambda = d*d_inv
|
lambda = d*d_inv
|
||||||
|
|
||||||
if ( dabs(lambda) < 1.d-3 ) then
|
if ( dabs(lambda) < 1.d-3 ) then
|
||||||
@ -902,31 +940,41 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d)
|
|||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
double precision :: zj, zj1, zj2, zj3
|
||||||
|
|
||||||
n4 = iand(n,not(3))
|
n4 = iand(n,not(3))
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do j=1,n4,4
|
do j=1,n4,4
|
||||||
z(j ) = 0.d0
|
zj = 0.d0
|
||||||
z(j+1) = 0.d0
|
zj1 = 0.d0
|
||||||
z(j+2) = 0.d0
|
zj2 = 0.d0
|
||||||
z(j+3) = 0.d0
|
zj3 = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3)
|
||||||
do i=1,n
|
do i=1,n
|
||||||
z(j ) = z(j ) + S_inv(i,j )*u(i)
|
zj = zj + S_inv(i,j )*u(i)
|
||||||
z(j+1) = z(j+1) + S_inv(i,j+1)*u(i)
|
zj1 = zj1 + S_inv(i,j+1)*u(i)
|
||||||
z(j+2) = z(j+2) + S_inv(i,j+2)*u(i)
|
zj2 = zj2 + S_inv(i,j+2)*u(i)
|
||||||
z(j+3) = z(j+3) + S_inv(i,j+3)*u(i)
|
zj3 = zj3 + S_inv(i,j+3)*u(i)
|
||||||
enddo
|
enddo
|
||||||
|
z(j ) = zj
|
||||||
|
z(j+1) = zj1
|
||||||
|
z(j+2) = zj2
|
||||||
|
z(j+3) = zj3
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
do j=n4+1,n
|
do j=n4+1,n
|
||||||
z(j) = 0.d0
|
zj = 0.d0
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD REDUCTION(+:zj)
|
||||||
do i=1,n
|
do i=1,n
|
||||||
z(j) = z(j) + S_inv(i,j)*u(i)
|
zj = zj + S_inv(i,j)*u(i)
|
||||||
enddo
|
enddo
|
||||||
|
z(j ) = zj
|
||||||
enddo
|
enddo
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d_inv)
|
||||||
do i=1,n
|
do i=1,n
|
||||||
w(i) = S_inv(i,l)*d_inv
|
w(i) = S_inv(i,l)*d_inv
|
||||||
S(i,l) = m(i)
|
S(i,l) = m(i)
|
||||||
@ -934,7 +982,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
do i=1,n4,4
|
do i=1,n4,4
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
!DIR$ VECTOR ALWAYS
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,n
|
do j=1,n
|
||||||
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
|
S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j)
|
||||||
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j)
|
||||||
@ -945,7 +993,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d)
|
|||||||
|
|
||||||
do i=n4+1,n
|
do i=n4+1,n
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
!DIR$ VECTOR ALWAYS
|
!DIR$ SIMD FIRSTPRIVATE(lambda,z)
|
||||||
do j=1,n
|
do j=1,n
|
||||||
S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j)
|
S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j)
|
||||||
enddo
|
enddo
|
||||||
|
@ -709,7 +709,7 @@ subroutine sparse_full_mv(A,LDA, &
|
|||||||
call MM_PREFETCH (A(j,indices(4)),3)
|
call MM_PREFETCH (A(j,indices(4)),3)
|
||||||
IRP_ENDIF
|
IRP_ENDIF
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ SIMD
|
||||||
do j=1,LDC
|
do j=1,LDC
|
||||||
C1(j) = 0.
|
C1(j) = 0.
|
||||||
C2(j) = 0.
|
C2(j) = 0.
|
||||||
@ -755,6 +755,7 @@ subroutine sparse_full_mv(A,LDA, &
|
|||||||
|
|
||||||
do k=0,LDA-1,$IRP_ALIGN/4
|
do k=0,LDA-1,$IRP_ALIGN/4
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d11,d21,d31,d41)
|
||||||
do j=1,$IRP_ALIGN/4
|
do j=1,$IRP_ALIGN/4
|
||||||
IRP_IF NO_PREFETCH
|
IRP_IF NO_PREFETCH
|
||||||
IRP_ELSE
|
IRP_ELSE
|
||||||
@ -768,6 +769,7 @@ subroutine sparse_full_mv(A,LDA, &
|
|||||||
enddo
|
enddo
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d12,d22,d32,d42,d13,d23,d33,d43)
|
||||||
do j=1,$IRP_ALIGN/4
|
do j=1,$IRP_ALIGN/4
|
||||||
C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22&
|
C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22&
|
||||||
+ A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42
|
+ A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42
|
||||||
@ -776,6 +778,7 @@ subroutine sparse_full_mv(A,LDA, &
|
|||||||
enddo
|
enddo
|
||||||
|
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d14,d24,d34,d44,d15,d25,d35,d45)
|
||||||
do j=1,$IRP_ALIGN/4
|
do j=1,$IRP_ALIGN/4
|
||||||
C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24&
|
C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24&
|
||||||
+ A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44
|
+ A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44
|
||||||
@ -796,6 +799,7 @@ subroutine sparse_full_mv(A,LDA, &
|
|||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
do k=0,LDA-1,$IRP_ALIGN/4
|
do k=0,LDA-1,$IRP_ALIGN/4
|
||||||
!DIR$ VECTOR ALIGNED
|
!DIR$ VECTOR ALIGNED
|
||||||
|
!DIR$ SIMD FIRSTPRIVATE(d11,d12,d13,d14,d15)
|
||||||
do j=1,$IRP_ALIGN/4
|
do j=1,$IRP_ALIGN/4
|
||||||
C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11
|
C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11
|
||||||
C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12
|
C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12
|
||||||
|
Loading…
Reference in New Issue
Block a user