From 837c5bbbf7163adf51cdbf90208c664f7ffdaa66 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 11:52:24 +0200 Subject: [PATCH 01/15] Removed LOOP_COUNT directives --- src/electrons.irp.f | 6 +----- src/mo.irp.f | 19 ++----------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/electrons.irp.f b/src/electrons.irp.f index f120b39..65ec86e 100644 --- a/src/electrons.irp.f +++ b/src/electrons.irp.f @@ -171,19 +171,15 @@ END_PROVIDER do ie2 = 1,elec_num real :: x, y, z + real :: x2, y2, z2 x = elec_coord(ie2,1) y = elec_coord(ie2,2) z = elec_coord(ie2,3) !DIR$ VECTOR ALIGNED - !DIR$ LOOP COUNT (200) do ie1 = 1,elec_num elec_dist_vec_x(ie1,ie2) = elec_coord(ie1,1) - x elec_dist_vec_y(ie1,ie2) = elec_coord(ie1,2) - y elec_dist_vec_z(ie1,ie2) = elec_coord(ie1,3) - z - enddo - !DIR$ VECTOR ALIGNED - !DIR$ LOOP COUNT (200) - do ie1 = 1,elec_num elec_dist(ie1,ie2) = sqrt( & elec_dist_vec_x(ie1,ie2)*elec_dist_vec_x(ie1,ie2) + & elec_dist_vec_y(ie1,ie2)*elec_dist_vec_y(ie1,ie2) + & diff --git a/src/mo.irp.f b/src/mo.irp.f index ee2c2fc..0ad7f5b 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -682,8 +682,6 @@ subroutine sparse_full_mv(A,LDA, & ! LDC and LDA have to be factors of simd_sp !DIR$ VECTOR ALIGNED - !DIR$ LOOP COUNT (256) - !$OMP SIMD do j=1,LDC C1(j) = 0. C2(j) = 0. @@ -691,11 +689,11 @@ subroutine sparse_full_mv(A,LDA, & C4(j) = 0. C5(j) = 0. enddo - !$OMP END SIMD + kmax2 = ishft(indices(0),-2) kmax2 = ishft(kmax2,2) kmax3 = indices(0) - !DIR$ LOOP COUNT (200) + do kao=1,kmax2,4 k_vec(1) = indices(kao ) k_vec(2) = indices(kao+1) @@ -712,17 +710,14 @@ subroutine sparse_full_mv(A,LDA, & d32 = B2(kao+2) d42 = B2(kao+3) - !DIR$ LOOP COUNT (256) do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$IRP_ALIGN/4 C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 + A(j+k,k_vec(2))*d21& + A(j+k,k_vec(3))*d31 + A(j+k,k_vec(4))*d41 C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22& + A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42 enddo - !$OMP END SIMD enddo d13 = B3(kao ) @@ -735,17 +730,14 @@ subroutine sparse_full_mv(A,LDA, & d34 = B4(kao+2) d44 = B4(kao+3) - !DIR$ LOOP COUNT (256) do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$IRP_ALIGN/4 C3(j+k) = C3(j+k) + A(j+k,k_vec(1))*d13 + A(j+k,k_vec(2))*d23& + A(j+k,k_vec(3))*d33 + A(j+k,k_vec(4))*d43 C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 enddo - !$OMP END SIMD enddo d15 = B5(kao ) @@ -753,20 +745,16 @@ subroutine sparse_full_mv(A,LDA, & d35 = B5(kao+2) d45 = B5(kao+3) - !DIR$ LOOP COUNT (256) do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$IRP_ALIGN/4 C5(j+k) = C5(j+k) + A(j+k,k_vec(1))*d15 + A(j+k,k_vec(2))*d25& + A(j+k,k_vec(3))*d35 + A(j+k,k_vec(4))*d45 enddo - !$OMP END SIMD enddo enddo - !DIR$ LOOP COUNT (200) do kao = kmax2+1, kmax3 k_vec(1) = indices(kao) d11 = B1(kao) @@ -775,10 +763,8 @@ subroutine sparse_full_mv(A,LDA, & d14 = B4(kao) d15 = B5(kao) !DIR$ VECTOR ALIGNED - !DIR$ LOOP COUNT (256) do k=0,LDA-1,simd_sp !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$IRP_ALIGN/4 C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 @@ -786,7 +772,6 @@ subroutine sparse_full_mv(A,LDA, & C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 C5(j+k) = C5(j+k) + A(j+k,k_vec(1))*d15 enddo - !$OMP END SIMD enddo enddo From 0e243a8afc1f91951e4c79ad978d1e668281f1f2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 11:53:34 +0200 Subject: [PATCH 02/15] Merged outer loop in sparse_full_mv --- src/mo.irp.f | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/mo.irp.f b/src/mo.irp.f index 0ad7f5b..3825835 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -710,16 +710,6 @@ subroutine sparse_full_mv(A,LDA, & d32 = B2(kao+2) d42 = B2(kao+3) - do k=0,LDA-1,$IRP_ALIGN/4 - !DIR$ VECTOR ALIGNED - do j=1,$IRP_ALIGN/4 - C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 + A(j+k,k_vec(2))*d21& - + A(j+k,k_vec(3))*d31 + A(j+k,k_vec(4))*d41 - C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22& - + A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42 - enddo - enddo - d13 = B3(kao ) d23 = B3(kao+1) d33 = B3(kao+2) @@ -730,7 +720,20 @@ subroutine sparse_full_mv(A,LDA, & d34 = B4(kao+2) d44 = B4(kao+3) + d15 = B5(kao ) + d25 = B5(kao+1) + d35 = B5(kao+2) + d45 = B5(kao+3) + do k=0,LDA-1,$IRP_ALIGN/4 + !DIR$ VECTOR ALIGNED + do j=1,$IRP_ALIGN/4 + C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 + A(j+k,k_vec(2))*d21& + + A(j+k,k_vec(3))*d31 + A(j+k,k_vec(4))*d41 + C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22& + + A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42 + enddo + !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 C3(j+k) = C3(j+k) + A(j+k,k_vec(1))*d13 + A(j+k,k_vec(2))*d23& @@ -738,14 +741,7 @@ subroutine sparse_full_mv(A,LDA, & C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 enddo - enddo - d15 = B5(kao ) - d25 = B5(kao+1) - d35 = B5(kao+2) - d45 = B5(kao+3) - - do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 C5(j+k) = C5(j+k) + A(j+k,k_vec(1))*d15 + A(j+k,k_vec(2))*d25& From 3ccee1b3f010b57dcaa08992d3420bda4ad9c27b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 13:51:01 +0200 Subject: [PATCH 03/15] Prefetch in sparse-full-mv --- src/mo.irp.f | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/mo.irp.f b/src/mo.irp.f index 3825835..d1de6c2 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -76,7 +76,6 @@ BEGIN_PROVIDER [ real, mo_coef, (ao_num_8,mo_num_8) ] f = 1./mo_scale do j=1,mo_num !DIR$ VECTOR ALIGNED - !DIR$ LOOP COUNT (2000) do i=1,ao_num_8 mo_coef(i,j) *= f enddo @@ -218,7 +217,6 @@ END_PROVIDER cycle endif r_inv = nucl_elec_dist_inv(k,i) - !DIR$ LOOP COUNT (500) do j=1,mo_num mo_value_transp(j,i) = mo_value_transp(j,i) + nucl_fitcusp_param(1,j,k) +& r * (nucl_fitcusp_param(2,j,k) + & @@ -250,7 +248,6 @@ END_PROVIDER ! Scale off-diagonal elements t = prepare_walkers_t do i=1,mo_num - !DIR$ LOOP COUNT (100) do j=1,elec_alpha_num if (i /= j) then mo_value_transp(i,j) *= t @@ -260,7 +257,6 @@ END_PROVIDER mo_lapl_transp(i,j) *= t endif enddo - !DIR$ LOOP COUNT (100) do j=1,elec_beta_num if (i /= j) then mo_value_transp(i,j+elec_alpha_num) *= t @@ -284,7 +280,6 @@ END_PROVIDER enddo END_PROVIDER - BEGIN_PROVIDER [ real, mo_value, (elec_num_8,mo_num) ] implicit none BEGIN_DOC @@ -342,10 +337,27 @@ BEGIN_PROVIDER [ double precision, mo_grad_lapl, (4,elec_num,mo_num) ] integer :: i,j do j=1,mo_num do i=1,elec_num - mo_grad_lapl(1,i,j) = mo_grad_x(i,j) - mo_grad_lapl(2,i,j) = mo_grad_y(i,j) - mo_grad_lapl(3,i,j) = mo_grad_z(i,j) - mo_grad_lapl(4,i,j) = mo_lapl (i,j) + mo_grad_lapl(1,i,j) = mo_grad_transp_x(j,i) + mo_grad_lapl(2,i,j) = mo_grad_transp_y(j,i) + mo_grad_lapl(3,i,j) = mo_grad_transp_z(j,i) + mo_grad_lapl(4,i,j) = mo_lapl_transp (j,i) + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, mo_grad_lapl_transp, (4,mo_num,elec_num) ] + implicit none + BEGIN_DOC +! Gradients and laplacian + END_DOC + integer :: i,j + do i=1,elec_num + do j=1,mo_num + mo_grad_lapl_transp(1,j,i) = mo_grad_transp_x(j,i) + mo_grad_lapl_transp(2,j,i) = mo_grad_transp_y(j,i) + mo_grad_lapl_transp(3,j,i) = mo_grad_transp_z(j,i) + mo_grad_lapl_transp(4,j,i) = mo_lapl_transp (j,i) enddo enddo @@ -419,7 +431,6 @@ BEGIN_PROVIDER [ double precision , mo_value_at_nucl, (mo_num_8,nucl_num) ] PROVIDE ao_value_p - !DIR$ LOOP COUNT (2000) do i=1,ao_num if (ao_nucl(i) /= k) then ao_value_at_nucl_no_S(i) = ao_value_p(i) @@ -461,7 +472,6 @@ END_PROVIDER point(3) = nucl_coord(k,3)+ nucl_fitcusp_radius(k) TOUCH point - !DIR$ LOOP COUNT (2000) do j=1,ao_num ao_value_at_fitcusp_radius(j,k) = ao_value_p(j) ao_grad_at_fitcusp_radius(j,k) = ao_grad_p(j,3) @@ -615,7 +625,6 @@ BEGIN_PROVIDER [ real, nucl_fitcusp_param, (4,mo_num,nucl_num) ] cycle endif R = nucl_fitcusp_radius(k) - !DIR$ LOOP COUNT (500) do i=1,mo_num double precision :: lap_phi, grad_phi, phi, eta lap_phi = mo_lapl_at_fitcusp_radius(i,k) @@ -728,22 +737,29 @@ subroutine sparse_full_mv(A,LDA, & do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 + IRP_IF NO_PREFETCH + IRP_ELSE + call MM_PREFETCH (A(j+k,indices(kao+4)),3) + call MM_PREFETCH (A(j+k,indices(kao+5)),3) + call MM_PREFETCH (A(j+k,indices(kao+6)),3) + call MM_PREFETCH (A(j+k,indices(kao+7)),3) + IRP_ENDIF C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 + A(j+k,k_vec(2))*d21& + A(j+k,k_vec(3))*d31 + A(j+k,k_vec(4))*d41 + enddo + + !DIR$ VECTOR ALIGNED + do j=1,$IRP_ALIGN/4 C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22& + A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42 - enddo - - !DIR$ VECTOR ALIGNED - do j=1,$IRP_ALIGN/4 C3(j+k) = C3(j+k) + A(j+k,k_vec(1))*d13 + A(j+k,k_vec(2))*d23& + A(j+k,k_vec(3))*d33 + A(j+k,k_vec(4))*d43 - C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& - + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 enddo !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 + C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& + + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 C5(j+k) = C5(j+k) + A(j+k,k_vec(1))*d15 + A(j+k,k_vec(2))*d25& + A(j+k,k_vec(3))*d35 + A(j+k,k_vec(4))*d45 enddo From 033025c0eaa62ecb2ede97824ad31e38dca0f240 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 13:54:23 +0200 Subject: [PATCH 04/15] Prefetch in sparse-full-mv --- src/mo.irp.f | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mo.irp.f b/src/mo.irp.f index d1de6c2..f84378b 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -690,6 +690,14 @@ subroutine sparse_full_mv(A,LDA, & ! LDC and LDA have to be factors of simd_sp + IRP_IF NO_PREFETCH + IRP_ELSE + call MM_PREFETCH (A(j,indices(1)),3) + call MM_PREFETCH (A(j,indices(2)),3) + call MM_PREFETCH (A(j,indices(3)),3) + call MM_PREFETCH (A(j,indices(4)),3) + IRP_ENDIF + !DIR$ VECTOR ALIGNED do j=1,LDC C1(j) = 0. From 052b2db389661299c74912d8f58f1eb7847a7e62 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 14:14:27 +0200 Subject: [PATCH 05/15] Removed padding in sparse_full_mv --- src/AO/ao_axis.irp.f | 16 ++++++------ src/electrons.irp.f | 4 +-- src/mo.irp.f | 58 +++++++++++++++++++++++--------------------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/AO/ao_axis.irp.f b/src/AO/ao_axis.irp.f index 5216ded..9cfde0f 100644 --- a/src/AO/ao_axis.irp.f +++ b/src/AO/ao_axis.irp.f @@ -48,11 +48,11 @@ subroutine pow_l(r,a,x1,x2,x3) end function - BEGIN_PROVIDER [ real, ao_axis_block, ((-2*simd_sp+1):ao_block_num_8) ] -&BEGIN_PROVIDER [ real, ao_axis_grad_block_x, ((-2*simd_sp+1):ao_block_num_8) ] -&BEGIN_PROVIDER [ real, ao_axis_grad_block_y, ((-2*simd_sp+1):ao_block_num_8) ] -&BEGIN_PROVIDER [ real, ao_axis_grad_block_z, ((-2*simd_sp+1):ao_block_num_8) ] -&BEGIN_PROVIDER [ real, ao_axis_lapl_block, ((-2*simd_sp+1):ao_block_num_8) ] + BEGIN_PROVIDER [ real, ao_axis_block, (ao_block_num_8) ] +&BEGIN_PROVIDER [ real, ao_axis_grad_block_x, (ao_block_num_8) ] +&BEGIN_PROVIDER [ real, ao_axis_grad_block_y, (ao_block_num_8) ] +&BEGIN_PROVIDER [ real, ao_axis_grad_block_z, (ao_block_num_8) ] +&BEGIN_PROVIDER [ real, ao_axis_lapl_block, (ao_block_num_8) ] implicit none include '../types.F' @@ -111,13 +111,13 @@ end function ao_axis_block(idx) = p023 * p10 p023 = real_of_int(pow1) * p023 + ao_axis_grad_block_x(idx) = p023 * p11 + ao_axis_grad_block_y(idx) = p013 * p21 + ao_axis_grad_block_z(idx) = p012 * p31 ao_axis_lapl_block(idx) = real_of_int(pow1-1) * p023 * p12 & + real_of_int(pow2-1) * p013 * p22 & + real_of_int(pow3-1) * p012 * p32 - ao_axis_grad_block_x(idx) = p023 * p11 - ao_axis_grad_block_y(idx) = p013 * p21 - ao_axis_grad_block_z(idx) = p012 * p31 enddo diff --git a/src/electrons.irp.f b/src/electrons.irp.f index 65ec86e..1170603 100644 --- a/src/electrons.irp.f +++ b/src/electrons.irp.f @@ -148,8 +148,8 @@ END_PROVIDER BEGIN_PROVIDER [ real, elec_dist, (elec_num_8,elec_num) ] &BEGIN_PROVIDER [ real, elec_dist_vec_x, (elec_num_8,elec_num) ] -&BEGIN_PROVIDER [ real, elec_dist_vec_y, ((-simd_sp+1):elec_num_8,elec_num) ] -&BEGIN_PROVIDER [ real, elec_dist_vec_z, ((-2*simd_sp+1):elec_num_8,elec_num) ] +&BEGIN_PROVIDER [ real, elec_dist_vec_y, (elec_num_8,elec_num) ] +&BEGIN_PROVIDER [ real, elec_dist_vec_z, (elec_num_8,elec_num) ] implicit none BEGIN_DOC ! Electron-electron distances diff --git a/src/mo.irp.f b/src/mo.irp.f index f84378b..91d0b63 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -137,11 +137,11 @@ BEGIN_PROVIDER [ real, mo_coef_transp_present, (num_present_mos_8,ao_num_8) ] END_PROVIDER - BEGIN_PROVIDER [ real, mo_value_transp, ((-simd_sp+1):mo_num_8,elec_num) ] -&BEGIN_PROVIDER [ real, mo_grad_transp_x, ((-2*simd_sp+1):mo_num_8,elec_num) ] -&BEGIN_PROVIDER [ real, mo_grad_transp_y, ((-3*simd_sp+1):mo_num_8,elec_num) ] -&BEGIN_PROVIDER [ real, mo_grad_transp_z, ((-4*simd_sp+1):mo_num_8,elec_num) ] -&BEGIN_PROVIDER [ real, mo_lapl_transp, ((-5*simd_sp+1):mo_num_8,elec_num) ] + BEGIN_PROVIDER [ real, mo_value_transp, (mo_num_8,elec_num) ] +&BEGIN_PROVIDER [ real, mo_grad_transp_x, (mo_num_8,elec_num) ] +&BEGIN_PROVIDER [ real, mo_grad_transp_y, (mo_num_8,elec_num) ] +&BEGIN_PROVIDER [ real, mo_grad_transp_z, (mo_num_8,elec_num) ] +&BEGIN_PROVIDER [ real, mo_lapl_transp, (mo_num_8,elec_num) ] implicit none BEGIN_DOC @@ -295,7 +295,7 @@ BEGIN_PROVIDER [ real, mo_value, (elec_num_8,mo_num) ] !DIR$ VECTOR ALIGNED mo_value = 0. endif - call transpose(mo_value_transp(1,1),mo_num_8+simd_sp,mo_value,elec_num_8,mo_num,elec_num) + call transpose(mo_value_transp(1,1),mo_num_8,mo_value,elec_num_8,mo_num,elec_num) END_PROVIDER @@ -323,12 +323,32 @@ END_PROVIDER PROVIDE primitives_reduced endif ! Transpose x last for cache efficiency - call transpose_to_dp(mo_grad_transp_y(1,1),mo_num_8+3*simd_sp,mo_grad_y(1,1),elec_num_8,mo_num,elec_num) - call transpose_to_dp(mo_grad_transp_z(1,1),mo_num_8+4*simd_sp,mo_grad_z(1,1),elec_num_8,mo_num,elec_num) - call transpose_to_dp(mo_grad_transp_x(1,1),mo_num_8+2*simd_sp,mo_grad_x(1,1),elec_num_8,mo_num,elec_num) + call transpose_to_dp(mo_grad_transp_y(1,1),mo_num_8,mo_grad_y(1,1),elec_num_8,mo_num,elec_num) + call transpose_to_dp(mo_grad_transp_z(1,1),mo_num_8,mo_grad_z(1,1),elec_num_8,mo_num,elec_num) + call transpose_to_dp(mo_grad_transp_x(1,1),mo_num_8,mo_grad_x(1,1),elec_num_8,mo_num,elec_num) + END_PROVIDER +BEGIN_PROVIDER [ double precision, mo_lapl, (elec_num_8,mo_num) ] + implicit none + BEGIN_DOC +! Laplacians of the molecular orbitals + END_DOC + + integer :: i,j + integer, save :: ifirst = 0 + if (ifirst == 0) then + ifirst = 1 + PROVIDE primitives_reduced + !DIR$ VECTOR ALIGNED + mo_lapl = 0.d0 + endif + call transpose_to_dp(mo_lapl_transp(1,1),mo_num_8,mo_lapl,elec_num_8,mo_num,elec_num) + +END_PROVIDER + + BEGIN_PROVIDER [ double precision, mo_grad_lapl, (4,elec_num,mo_num) ] implicit none BEGIN_DOC @@ -363,24 +383,6 @@ BEGIN_PROVIDER [ double precision, mo_grad_lapl_transp, (4,mo_num,elec_num) ] END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_lapl, (elec_num_8,mo_num) ] - implicit none - BEGIN_DOC -! Laplacians of the molecular orbitals - END_DOC - - integer :: i,j - integer, save :: ifirst = 0 - if (ifirst == 0) then - ifirst = 1 - PROVIDE primitives_reduced - !DIR$ VECTOR ALIGNED - mo_lapl = 0.d0 - endif - call transpose_to_dp(mo_lapl_transp(1,1),mo_num_8+5*simd_sp,mo_lapl,elec_num_8,mo_num,elec_num) - -END_PROVIDER - BEGIN_PROVIDER [ real, prepare_walkers_t ] implicit none @@ -783,7 +785,7 @@ subroutine sparse_full_mv(A,LDA, & d14 = B4(kao) d15 = B5(kao) !DIR$ VECTOR ALIGNED - do k=0,LDA-1,simd_sp + do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED do j=1,$IRP_ALIGN/4 C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 From 6208018b4aef4e6a5223cec7be6095434248afa6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 14:19:35 +0200 Subject: [PATCH 06/15] Split mo_grad_lapl in alpha/beta --- src/det.irp.f | 102 ++++++++++++-------------------------------------- src/mo.irp.f | 23 ++++++++---- 2 files changed, 39 insertions(+), 86 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index ef2db3d..9ab89a8 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -221,11 +221,9 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j - !$OMP SIMD do i=1,$n u(i) = m(i) - S(i,l) enddo - !$OMP END SIMD z(l) = S_inv($n,l)*u($n) !DIR$ VECTOR ALIGNED @@ -255,23 +253,19 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo enddo - !$OMP SIMD do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) enddo - !$OMP END SIMD do i=1,$n,4 !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) enddo - !$OMP END SIMD enddo end @@ -354,14 +348,12 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo do i=1,$n-1,4 - !$OMP SIMD do j=1,$n-1 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) enddo - !$OMP END SIMD enddo do i=1,$n-1,4 @@ -416,11 +408,9 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j - !$OMP SIMD do i=1,$n u(i) = m(i) - S(i,l) enddo - !$OMP END SIMD z(l) = S_inv($n,l)*u($n) !DIR$ VECTOR ALIGNED @@ -459,33 +449,27 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) enddo - !$OMP SIMD do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) enddo - !$OMP END SIMD do i=1,$n-2,4 !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) enddo - !$OMP END SIMD enddo i=$n-1 !DIR$ VECTOR ALIGNED - !$OMP SIMD do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) enddo - !$OMP END SIMD end @@ -575,24 +559,20 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo do i=1,$n-3,4 - !$OMP SIMD do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) enddo - !$OMP END SIMD enddo i=$n-2 - !$OMP SIMD do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) enddo - !$OMP END SIMD end @@ -634,11 +614,9 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (MOD(LDS,$IRP_ALIGN/8) == 0) integer :: i,j,n4 - !$OMP SIMD do i=1,n u(i) = m(i) - S(i,l) enddo - !$OMP END SIMD z(l) = 0.d0 !DIR$ VECTOR ALIGNED @@ -686,20 +664,16 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) enddo enddo - !$OMP SIMD do i=1,n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) enddo - !$OMP END SIMD do i=1,n !DIR$ VECTOR ALIGNED - !$OMP SIMD aligned(S_inv,z) do j=1,n S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j) enddo - !$OMP END SIMD enddo end @@ -1034,11 +1008,9 @@ END_PROVIDER det_alpha_value(det_i) = det_alpha_value_curr do i=1,elec_alpha_num - !$OMP SIMD do k=1,4 det_alpha_grad_lapl(k,i,det_i) = det_alpha_grad_lapl_curr(k,i) enddo - !$OMP END SIMD if (do_pseudo) then det_alpha_pseudo(i,det_i) = det_alpha_pseudo_curr(i) endif @@ -1088,11 +1060,9 @@ END_PROVIDER det_beta_value(det_j) = det_beta_value_curr !DIR$ LOOP COUNT (200) do i=elec_alpha_num+1,elec_num - !$OMP SIMD do k=1,4 det_beta_grad_lapl(k,i,det_j) = det_beta_grad_lapl_curr(k,i) enddo - !$OMP END SIMD if (do_pseudo) then det_beta_pseudo(i,det_j) = det_beta_pseudo_curr(i) endif @@ -1223,11 +1193,9 @@ END_PROVIDER ! ----- psidet_value = 0.d0 - !$OMP SIMD reduction(+:psidet_value) do j=1,det_beta_num psidet_value = psidet_value + det_beta_value(j) * DaC(j) enddo - !$OMP END SIMD if (psidet_value == 0.d0) then @@ -1246,22 +1214,18 @@ END_PROVIDER do i=1,det_alpha_num do j=1,elec_alpha_num - !$OMP SIMD do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_alpha_grad_lapl(k,j,i)*CDb(i) enddo - !$OMP END SIMD pseudo_non_local(j) = pseudo_non_local(j) + det_alpha_pseudo(j,i)*CDb(i) enddo enddo do i=1,det_beta_num do j=elec_alpha_num+1,elec_num - !$OMP SIMD do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_beta_grad_lapl(k,j,i)*DaC(i) enddo - !$OMP END SIMD pseudo_non_local(j) = pseudo_non_local(j) + det_beta_pseudo(j,i)*DaC(i) enddo enddo @@ -1278,21 +1242,17 @@ END_PROVIDER do i=1,det_alpha_num do j=1,elec_alpha_num - !$OMP SIMD do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_alpha_grad_lapl(k,j,i)*CDb(i) enddo - !$OMP END SIMD enddo enddo do i=1,det_beta_num do j=elec_alpha_num+1,elec_num - !$OMP SIMD do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_beta_grad_lapl(k,j,i)*DaC(i) enddo - !$OMP END SIMD enddo enddo @@ -1392,14 +1352,12 @@ BEGIN_PROVIDER [ double precision, det_alpha_grad_lapl_curr, (4,elec_alpha_num) imo = mo_list_alpha_curr(j ) imo2 = mo_list_alpha_curr(j+1) do i=1,elec_alpha_num,2 - !$OMP SIMD do k=1,4 - det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl(k,i ,imo )*slater_matrix_alpha_inv_det(i ,j ) & - + mo_grad_lapl(k,i ,imo2)*slater_matrix_alpha_inv_det(i ,j+1) - det_alpha_grad_lapl_curr(k,i+1) = det_alpha_grad_lapl_curr(k,i+1) + mo_grad_lapl(k,i+1,imo )*slater_matrix_alpha_inv_det(i+1,j ) & - + mo_grad_lapl(k,i+1,imo2)*slater_matrix_alpha_inv_det(i+1,j+1) + det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl_alpha(k,i ,imo )*slater_matrix_alpha_inv_det(i ,j ) & + + mo_grad_lapl_alpha(k,i ,imo2)*slater_matrix_alpha_inv_det(i ,j+1) + det_alpha_grad_lapl_curr(k,i+1) = det_alpha_grad_lapl_curr(k,i+1) + mo_grad_lapl_alpha(k,i+1,imo )*slater_matrix_alpha_inv_det(i+1,j ) & + + mo_grad_lapl_alpha(k,i+1,imo2)*slater_matrix_alpha_inv_det(i+1,j+1) enddo - !$OMP END SIMD enddo enddo @@ -1409,32 +1367,26 @@ BEGIN_PROVIDER [ double precision, det_alpha_grad_lapl_curr, (4,elec_alpha_num) imo = mo_list_alpha_curr(j ) imo2 = mo_list_alpha_curr(j+1) do i=1,elec_alpha_num-1,2 - !$OMP SIMD do k=1,4 - det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl(k,i ,imo )*slater_matrix_alpha_inv_det(i ,j ) & - + mo_grad_lapl(k,i ,imo2)*slater_matrix_alpha_inv_det(i ,j+1) - det_alpha_grad_lapl_curr(k,i+1) = det_alpha_grad_lapl_curr(k,i+1) + mo_grad_lapl(k,i+1,imo )*slater_matrix_alpha_inv_det(i+1,j ) & - + mo_grad_lapl(k,i+1,imo2)*slater_matrix_alpha_inv_det(i+1,j+1) + det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl_alpha(k,i ,imo )*slater_matrix_alpha_inv_det(i ,j ) & + + mo_grad_lapl_alpha(k,i ,imo2)*slater_matrix_alpha_inv_det(i ,j+1) + det_alpha_grad_lapl_curr(k,i+1) = det_alpha_grad_lapl_curr(k,i+1) + mo_grad_lapl_alpha(k,i+1,imo )*slater_matrix_alpha_inv_det(i+1,j ) & + + mo_grad_lapl_alpha(k,i+1,imo2)*slater_matrix_alpha_inv_det(i+1,j+1) enddo - !$OMP END SIMD enddo i=elec_alpha_num - !$OMP SIMD do k=1,4 - det_alpha_grad_lapl_curr(k,i) = det_alpha_grad_lapl_curr(k,i) + mo_grad_lapl(k,i,imo )*slater_matrix_alpha_inv_det(i,j ) & - + mo_grad_lapl(k,i,imo2)*slater_matrix_alpha_inv_det(i,j+1) + det_alpha_grad_lapl_curr(k,i) = det_alpha_grad_lapl_curr(k,i) + mo_grad_lapl_alpha(k,i,imo )*slater_matrix_alpha_inv_det(i,j ) & + + mo_grad_lapl_alpha(k,i,imo2)*slater_matrix_alpha_inv_det(i,j+1) enddo - !$OMP END SIMD enddo j=elec_alpha_num imo = mo_list_alpha_curr(j) do i=1,elec_alpha_num - !$OMP SIMD do k=1,4 - det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl(k,i ,imo)*slater_matrix_alpha_inv_det(i ,j) + det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl_alpha(k,i ,imo)*slater_matrix_alpha_inv_det(i ,j) enddo - !$OMP END SIMD enddo endif @@ -1469,7 +1421,7 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 ! do i=elec_alpha_num+1,elec_num ! do k=1,4 ! det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& -! mo_grad_lapl(k,i,imo)*slater_matrix_beta_inv_det(i-elec_alpha_num,j) +! mo_grad_lapl_alpha(k,i,imo)*slater_matrix_beta_inv_det(i-elec_alpha_num,j) ! enddo ! enddo ! enddo @@ -1484,16 +1436,14 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 !DIR$ LOOP COUNT (16) do i=elec_alpha_num+1,elec_num,2 l = i-elec_alpha_num - !$OMP SIMD do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& - mo_grad_lapl(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & - mo_grad_lapl(k,i,imo2)*slater_matrix_beta_inv_det(l,j+1) + mo_grad_lapl_beta(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & + mo_grad_lapl_beta(k,i,imo2)*slater_matrix_beta_inv_det(l,j+1) det_beta_grad_lapl_curr(k,i+1) = det_beta_grad_lapl_curr(k,i+1) +& - mo_grad_lapl(k,i+1,imo )*slater_matrix_beta_inv_det(l+1,j ) + & - mo_grad_lapl(k,i+1,imo2)*slater_matrix_beta_inv_det(l+1,j+1) + mo_grad_lapl_beta(k,i+1,imo )*slater_matrix_beta_inv_det(l+1,j ) + & + mo_grad_lapl_beta(k,i+1,imo2)*slater_matrix_beta_inv_det(l+1,j+1) enddo - !$OMP END SIMD enddo enddo @@ -1505,26 +1455,22 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 !DIR$ LOOP COUNT (16) do i=elec_alpha_num+1,elec_num-1,2 l = i-elec_alpha_num - !$OMP SIMD do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& - mo_grad_lapl(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & - mo_grad_lapl(k,i,imo2)*slater_matrix_beta_inv_det(l,j+1) + mo_grad_lapl_beta(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & + mo_grad_lapl_beta(k,i,imo2)*slater_matrix_beta_inv_det(l,j+1) det_beta_grad_lapl_curr(k,i+1) = det_beta_grad_lapl_curr(k,i+1) +& - mo_grad_lapl(k,i+1,imo )*slater_matrix_beta_inv_det(l+1,j ) + & - mo_grad_lapl(k,i+1,imo2)*slater_matrix_beta_inv_det(l+1,j+1) + mo_grad_lapl_beta(k,i+1,imo )*slater_matrix_beta_inv_det(l+1,j ) + & + mo_grad_lapl_beta(k,i+1,imo2)*slater_matrix_beta_inv_det(l+1,j+1) enddo - !$OMP END SIMD enddo i = elec_num l = elec_num-elec_alpha_num - !$OMP SIMD do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& - mo_grad_lapl(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & - mo_grad_lapl(k,i,imo2)*slater_matrix_beta_inv_det(l,j+1) + mo_grad_lapl_beta(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & + mo_grad_lapl_beta(k,i,imo2)*slater_matrix_beta_inv_det(l,j+1) enddo - !$OMP END SIMD enddo j = elec_beta_num @@ -1532,12 +1478,10 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 !DIR$ LOOP COUNT (16) do i=elec_alpha_num+1,elec_num l = i-elec_alpha_num - !$OMP SIMD do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& - mo_grad_lapl(k,i,imo)*slater_matrix_beta_inv_det(l,j) + mo_grad_lapl_beta(k,i,imo)*slater_matrix_beta_inv_det(l,j) enddo - !$OMP END SIMD enddo endif diff --git a/src/mo.irp.f b/src/mo.irp.f index 91d0b63..12d407a 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -349,19 +349,28 @@ BEGIN_PROVIDER [ double precision, mo_lapl, (elec_num_8,mo_num) ] END_PROVIDER -BEGIN_PROVIDER [ double precision, mo_grad_lapl, (4,elec_num,mo_num) ] + BEGIN_PROVIDER [ double precision, mo_grad_lapl_alpha, (4,elec_alpha_num,mo_num) ] +&BEGIN_PROVIDER [ double precision, mo_grad_lapl_beta , (4,elec_alpha_num+1:elec_num,mo_num) ] implicit none BEGIN_DOC ! Gradients and laplacian END_DOC integer :: i,j do j=1,mo_num - do i=1,elec_num - mo_grad_lapl(1,i,j) = mo_grad_transp_x(j,i) - mo_grad_lapl(2,i,j) = mo_grad_transp_y(j,i) - mo_grad_lapl(3,i,j) = mo_grad_transp_z(j,i) - mo_grad_lapl(4,i,j) = mo_lapl_transp (j,i) - enddo + do i=1,elec_alpha_num + mo_grad_lapl_alpha(1,i,j) = mo_grad_transp_x(j,i) + mo_grad_lapl_alpha(2,i,j) = mo_grad_transp_y(j,i) + mo_grad_lapl_alpha(3,i,j) = mo_grad_transp_z(j,i) + mo_grad_lapl_alpha(4,i,j) = mo_lapl_transp (j,i) + enddo + enddo + do j=1,mo_num + do i=elec_alpha_num+1,elec_num + mo_grad_lapl_beta(1,i,j) = mo_grad_transp_x(j,i) + mo_grad_lapl_beta(2,i,j) = mo_grad_transp_y(j,i) + mo_grad_lapl_beta(3,i,j) = mo_grad_transp_z(j,i) + mo_grad_lapl_beta(4,i,j) = mo_lapl_transp (j,i) + enddo enddo END_PROVIDER From 5c872ed2aa5ba18262e59a7dcf374fca5b41d943 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 14:50:08 +0200 Subject: [PATCH 07/15] Specific det_updates up to 150x150 --- src/det.irp.f | 275 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 259 insertions(+), 16 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index 9ab89a8..6c36c52 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -87,6 +87,106 @@ subroutine det_update(n,LDS,m,l,S,S_inv,d) 48;; 49;; 50;; + 51;; + 52;; + 53;; + 54;; + 55;; + 56;; + 57;; + 58;; + 59;; + 60;; + 61;; + 62;; + 63;; + 64;; + 65;; + 66;; + 67;; + 68;; + 69;; + 70;; + 71;; + 72;; + 73;; + 74;; + 75;; + 76;; + 77;; + 78;; + 79;; + 80;; + 81;; + 82;; + 83;; + 84;; + 85;; + 86;; + 87;; + 88;; + 89;; + 90;; + 91;; + 92;; + 93;; + 94;; + 95;; + 96;; + 97;; + 98;; + 99;; + 100;; + 101;; + 102;; + 103;; + 104;; + 105;; + 106;; + 107;; + 108;; + 109;; + 110;; + 111;; + 112;; + 113;; + 114;; + 115;; + 116;; + 117;; + 118;; + 119;; + 120;; + 121;; + 122;; + 123;; + 124;; + 125;; + 126;; + 127;; + 128;; + 129;; + 130;; + 131;; + 132;; + 133;; + 134;; + 135;; + 136;; + 137;; + 138;; + 139;; + 140;; + 141;; + 142;; + 143;; + 144;; + 145;; + 146;; + 147;; + 148;; + 149;; + 150;; END_TEMPLATE end select end @@ -186,6 +286,7 @@ subroutine det_update4(n,LDS,m,l,S,S_inv,d) return endif + !DIR$ VECTOR ALIGNED do i=1,4 w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -221,6 +322,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j + !DIR$ VECTOR ALIGNED do i=1,$n u(i) = m(i) - S(i,l) enddo @@ -239,6 +341,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) return endif + !DIR$ VECTOR ALIGNED do j=1,$n,4 z(j ) = S_inv($n,j )*u($n) z(j+1) = S_inv($n,j+1)*u($n) @@ -253,6 +356,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo enddo + !DIR$ VECTOR ALIGNED do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -282,6 +386,31 @@ SUBST [ n ] 40 ;; 44 ;; 48 ;; +52 ;; +56 ;; +60 ;; +64 ;; +68 ;; +72 ;; +76 ;; +80 ;; +84 ;; +88 ;; +92 ;; +96 ;; +100 ;; +104 ;; +108 ;; +112 ;; +116 ;; +120 ;; +124 ;; +128 ;; +132 ;; +136 ;; +140 ;; +144 ;; +148 ;; END_TEMPLATE @@ -306,6 +435,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j + !DIR$ VECTOR ALIGNED do i=1,$n u(i) = m(i) - S(i,l) enddo @@ -330,6 +460,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+1) = S_inv($n,j+1)*u($n) z(j+2) = S_inv($n,j+2)*u($n) z(j+3) = S_inv($n,j+3)*u($n) + !DIR$ VECTOR ALIGNED do i=1,$n-1 z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) @@ -339,15 +470,18 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo z($n) = S_inv($n,$n)*u($n) + !DIR$ VECTOR ALIGNED do i=1,$n-1 z($n) = z($n) + S_inv(i,$n)*u(i) enddo + !DIR$ VECTOR ALIGNED do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) enddo do i=1,$n-1,4 + !DIR$ VECTOR ALIGNED do j=1,$n-1 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) @@ -356,6 +490,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo enddo + !DIR$ VECTOR ALIGNED do i=1,$n-1,4 S_inv($n,i ) = S_inv($n,i )*lambda - w($n)*z(i ) S_inv($n,i+1) = S_inv($n,i+1)*lambda - w($n)*z(i+1) @@ -363,6 +498,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) S_inv($n,i+3) = S_inv($n,i+3)*lambda - w($n)*z(i+3) enddo + !DIR$ VECTOR ALIGNED do i=1,$n S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*z($n) enddo @@ -383,6 +519,31 @@ SUBST [ n ] 41 ;; 45 ;; 49 ;; +53 ;; +57 ;; +61 ;; +65 ;; +69 ;; +73 ;; +77 ;; +81 ;; +85 ;; +89 ;; +93 ;; +97 ;; +101 ;; +105 ;; +109 ;; +113 ;; +117 ;; +121 ;; +125 ;; +129 ;; +133 ;; +137 ;; +141 ;; +145 ;; +149 ;; END_TEMPLATE @@ -408,6 +569,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j + !DIR$ VECTOR ALIGNED do i=1,$n u(i) = m(i) - S(i,l) enddo @@ -426,6 +588,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) return endif + !DIR$ VECTOR ALIGNED do j=1,$n-2,4 z(j ) = S_inv($n,j )*u($n) z(j+1) = S_inv($n,j+1)*u($n) @@ -449,6 +612,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) enddo + !DIR$ VECTOR ALIGNED do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -486,6 +650,31 @@ SUBST [ n ] 42 ;; 46 ;; 50 ;; +54 ;; +58 ;; +62 ;; +66 ;; +70 ;; +74 ;; +78 ;; +82 ;; +86 ;; +90 ;; +94 ;; +98 ;; +102 ;; +106 ;; +110 ;; +114 ;; +118 ;; +122 ;; +126 ;; +130 ;; +134 ;; +138 ;; +142 ;; +146 ;; +150 ;; END_TEMPLATE @@ -510,6 +699,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j + !DIR$ VECTOR ALIGNED do i=1,$n u(i) = m(i) - S(i,l) enddo @@ -535,6 +725,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+1) = S_inv($n,j+1)*u($n) z(j+2) = S_inv($n,j+2)*u($n) z(j+3) = S_inv($n,j+3)*u($n) + !DIR$ VECTOR ALIGNED do i=1,$n-1 z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) @@ -547,18 +738,21 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j ) = S_inv($n,j )*u($n) z(j+1) = S_inv($n,j+1)*u($n) z(j+2) = S_inv($n,j+2)*u($n) + !DIR$ VECTOR ALIGNED do i=1,$n-1 z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) enddo + !DIR$ VECTOR ALIGNED do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) enddo do i=1,$n-3,4 + !DIR$ VECTOR ALIGNED do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) @@ -568,6 +762,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo i=$n-2 + !DIR$ VECTOR ALIGNED do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) @@ -588,6 +783,31 @@ SUBST [ n ] 39 ;; 43 ;; 47 ;; +51 ;; +55 ;; +59 ;; +63 ;; +67 ;; +71 ;; +75 ;; +79 ;; +83 ;; +87 ;; +91 ;; +95 ;; +99 ;; +103 ;; +107 ;; +111 ;; +115 ;; +119 ;; +123 ;; +127 ;; +131 ;; +135 ;; +139 ;; +143 ;; +147 ;; END_TEMPLATE @@ -612,8 +832,11 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= n) !DIR$ ASSUME (LDS <= 3840) !DIR$ ASSUME (MOD(LDS,$IRP_ALIGN/8) == 0) + !DIR$ ASSUME (n>150) + integer :: i,j,n4 + !DIR$ VECTOR ALIGNED do i=1,n u(i) = m(i) - S(i,l) enddo @@ -633,26 +856,19 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) return endif - n4 = iand(n,not(4)) - do j=1,n4,8 + n4 = iand(n,not(3)) + !DIR$ VECTOR ALIGNED + do j=1,n4,4 z(j ) = 0.d0 z(j+1) = 0.d0 z(j+2) = 0.d0 z(j+3) = 0.d0 - z(j+4) = 0.d0 - z(j+5) = 0.d0 - z(j+6) = 0.d0 - z(j+7) = 0.d0 !DIR$ VECTOR ALIGNED do i=1,n z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) - z(j+4) = z(j+4) + S_inv(i,j+4)*u(i) - z(j+5) = z(j+5) + S_inv(i,j+5)*u(i) - z(j+6) = z(j+6) + S_inv(i,j+6)*u(i) - z(j+7) = z(j+7) + S_inv(i,j+7)*u(i) enddo enddo @@ -664,16 +880,29 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) enddo enddo + !DIR$ VECTOR ALIGNED do i=1,n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) enddo - do i=1,n - !DIR$ VECTOR ALIGNED - do j=1,n - S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j) - enddo + do i=1,n4,4 + !DIR$ VECTOR ALIGNED + !DIR$ VECTOR ALWAYS + do j=1,n + S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) + S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) + S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) + S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) + enddo + enddo + + do i=n4+1,n + !DIR$ VECTOR ALIGNED + !DIR$ VECTOR ALWAYS + do j=1,n + S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j) + enddo enddo end @@ -1008,6 +1237,7 @@ END_PROVIDER det_alpha_value(det_i) = det_alpha_value_curr do i=1,elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_alpha_grad_lapl(k,i,det_i) = det_alpha_grad_lapl_curr(k,i) enddo @@ -1214,6 +1444,7 @@ END_PROVIDER do i=1,det_alpha_num do j=1,elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_alpha_grad_lapl(k,j,i)*CDb(i) enddo @@ -1223,6 +1454,7 @@ END_PROVIDER do i=1,det_beta_num do j=elec_alpha_num+1,elec_num + !DIR$ VECTOR ALIGNED do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_beta_grad_lapl(k,j,i)*DaC(i) enddo @@ -1230,18 +1462,21 @@ END_PROVIDER enddo enddo + !DIR$ VECTOR ALIGNED do j=1,elec_num pseudo_non_local(j) = pseudo_non_local(j) * psidet_inv enddo else + !DIR$ VECTOR ALIGNED do j=1,elec_num psidet_grad_lapl(1:4,j) = 0.d0 enddo do i=1,det_alpha_num do j=1,elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_alpha_grad_lapl(k,j,i)*CDb(i) enddo @@ -1250,6 +1485,7 @@ END_PROVIDER do i=1,det_beta_num do j=elec_alpha_num+1,elec_num + !DIR$ VECTOR ALIGNED do k=1,4 psidet_grad_lapl(k,j) = psidet_grad_lapl(k,j) + det_beta_grad_lapl(k,j,i)*DaC(i) enddo @@ -1352,6 +1588,7 @@ BEGIN_PROVIDER [ double precision, det_alpha_grad_lapl_curr, (4,elec_alpha_num) imo = mo_list_alpha_curr(j ) imo2 = mo_list_alpha_curr(j+1) do i=1,elec_alpha_num,2 + !DIR$ VECTOR ALIGNED do k=1,4 det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl_alpha(k,i ,imo )*slater_matrix_alpha_inv_det(i ,j ) & + mo_grad_lapl_alpha(k,i ,imo2)*slater_matrix_alpha_inv_det(i ,j+1) @@ -1367,6 +1604,7 @@ BEGIN_PROVIDER [ double precision, det_alpha_grad_lapl_curr, (4,elec_alpha_num) imo = mo_list_alpha_curr(j ) imo2 = mo_list_alpha_curr(j+1) do i=1,elec_alpha_num-1,2 + !DIR$ VECTOR ALIGNED do k=1,4 det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl_alpha(k,i ,imo )*slater_matrix_alpha_inv_det(i ,j ) & + mo_grad_lapl_alpha(k,i ,imo2)*slater_matrix_alpha_inv_det(i ,j+1) @@ -1375,6 +1613,7 @@ BEGIN_PROVIDER [ double precision, det_alpha_grad_lapl_curr, (4,elec_alpha_num) enddo enddo i=elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_alpha_grad_lapl_curr(k,i) = det_alpha_grad_lapl_curr(k,i) + mo_grad_lapl_alpha(k,i,imo )*slater_matrix_alpha_inv_det(i,j ) & + mo_grad_lapl_alpha(k,i,imo2)*slater_matrix_alpha_inv_det(i,j+1) @@ -1384,6 +1623,7 @@ BEGIN_PROVIDER [ double precision, det_alpha_grad_lapl_curr, (4,elec_alpha_num) j=elec_alpha_num imo = mo_list_alpha_curr(j) do i=1,elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_alpha_grad_lapl_curr(k,i ) = det_alpha_grad_lapl_curr(k,i ) + mo_grad_lapl_alpha(k,i ,imo)*slater_matrix_alpha_inv_det(i ,j) enddo @@ -1436,6 +1676,7 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 !DIR$ LOOP COUNT (16) do i=elec_alpha_num+1,elec_num,2 l = i-elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& mo_grad_lapl_beta(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & @@ -1455,6 +1696,7 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 !DIR$ LOOP COUNT (16) do i=elec_alpha_num+1,elec_num-1,2 l = i-elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& mo_grad_lapl_beta(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & @@ -1466,6 +1708,7 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 enddo i = elec_num l = elec_num-elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& mo_grad_lapl_beta(k,i,imo )*slater_matrix_beta_inv_det(l,j ) + & @@ -1475,9 +1718,9 @@ BEGIN_PROVIDER [ double precision, det_beta_grad_lapl_curr, (4,elec_alpha_num+1 j = elec_beta_num imo = mo_list_beta_curr(j) - !DIR$ LOOP COUNT (16) do i=elec_alpha_num+1,elec_num l = i-elec_alpha_num + !DIR$ VECTOR ALIGNED do k=1,4 det_beta_grad_lapl_curr(k,i) = det_beta_grad_lapl_curr(k,i) +& mo_grad_lapl_beta(k,i,imo)*slater_matrix_beta_inv_det(l,j) From 56cd62957ad0bcd16c28b708d4d2d816506eefcb Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 15:05:17 +0200 Subject: [PATCH 08/15] Optimized det (mod1) --- src/det.irp.f | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index 6c36c52..c6d5166 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -434,20 +434,20 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0) !DIR$ ASSUME (LDS >= $n) integer :: i,j + double precision :: zl, ui - !DIR$ VECTOR ALIGNED - do i=1,$n - u(i) = m(i) - S(i,l) - enddo - - z(l) = S_inv($n,l)*u($n) + zl = 0.d0 !DIR$ VECTOR ALIGNED do i=1,$n-1 - z(l) = z(l) + S_inv(i,l)*u(i) + ui = m(i) - S(i,l) + u(i) = ui + zl = zl + S_inv(i,l)*ui enddo - + u($n) = m($n) - S($n,l) + zl = zl + S_inv($n,l)*u($n) + d_inv = 1.d0/d - d = d+z(l) + d = d+zl lambda = d*d_inv if (dabs(lambda) < 1.d-3) then d = 0.d0 @@ -456,10 +456,10 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ VECTOR ALIGNED do j=1,$n-1,4 - z(j ) = S_inv($n,j )*u($n) - z(j+1) = S_inv($n,j+1)*u($n) - z(j+2) = S_inv($n,j+2)*u($n) - z(j+3) = S_inv($n,j+3)*u($n) + z(j ) = 0.d0 + z(j+1) = 0.d0 + z(j+2) = 0.d0 + z(j+3) = 0.d0 !DIR$ VECTOR ALIGNED do i=1,$n-1 z(j ) = z(j ) + S_inv(i,j )*u(i) @@ -467,11 +467,15 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) enddo + z(j ) = z(j ) + S_inv($n,j )*u($n) + z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) + z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) + z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) enddo - z($n) = S_inv($n,$n)*u($n) + z($n) = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 + do i=1,$n z($n) = z($n) + S_inv(i,$n)*u(i) enddo !DIR$ VECTOR ALIGNED @@ -488,14 +492,10 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) enddo - enddo - - !DIR$ VECTOR ALIGNED - do i=1,$n-1,4 - S_inv($n,i ) = S_inv($n,i )*lambda - w($n)*z(i ) - S_inv($n,i+1) = S_inv($n,i+1)*lambda - w($n)*z(i+1) - S_inv($n,i+2) = S_inv($n,i+2)*lambda - w($n)*z(i+2) - S_inv($n,i+3) = S_inv($n,i+3)*lambda - w($n)*z(i+3) + S_inv($n,i ) = S_inv($n,i )*lambda - w($n)*z(i ) + S_inv($n,i+1) = S_inv($n,i+1)*lambda - w($n)*z(i+1) + S_inv($n,i+2) = S_inv($n,i+2)*lambda - w($n)*z(i+2) + S_inv($n,i+3) = S_inv($n,i+3)*lambda - w($n)*z(i+3) enddo !DIR$ VECTOR ALIGNED From 2efdfa26e9085660649b84ee1867daaaba6266d7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2016 23:45:01 +0200 Subject: [PATCH 09/15] Optimized det (mod4) --- src/det.irp.f | 114 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 34 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index c6d5166..9e2e11d 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -327,9 +327,9 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) u(i) = m(i) - S(i,l) enddo - z(l) = S_inv($n,l)*u($n) + z(l) = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 + do i=1,$n z(l) = z(l) + S_inv(i,l)*u(i) enddo @@ -343,12 +343,12 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ VECTOR ALIGNED do j=1,$n,4 - z(j ) = S_inv($n,j )*u($n) - z(j+1) = S_inv($n,j+1)*u($n) - z(j+2) = S_inv($n,j+2)*u($n) - z(j+3) = S_inv($n,j+3)*u($n) + z(j ) = 0.d0 + z(j+1) = 0.d0 + z(j+2) = 0.d0 + z(j+3) = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 + do i=1,$n z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) @@ -454,12 +454,10 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) return endif + !DIR$ VECTOR ALIGNED + z = 0.d0 !DIR$ VECTOR ALIGNED do j=1,$n-1,4 - z(j ) = 0.d0 - z(j+1) = 0.d0 - z(j+2) = 0.d0 - z(j+3) = 0.d0 !DIR$ VECTOR ALIGNED do i=1,$n-1 z(j ) = z(j ) + S_inv(i,j )*u(i) @@ -473,11 +471,11 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) enddo - z($n) = 0.d0 !DIR$ VECTOR ALIGNED do i=1,$n z($n) = z($n) + S_inv(i,$n)*u(i) enddo + !DIR$ VECTOR ALIGNED do i=1,$n w(i) = S_inv(i,l)*d_inv @@ -574,11 +572,14 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) u(i) = m(i) - S(i,l) enddo - z(l) = S_inv($n,l)*u($n) + z(l) = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 - z(l) = z(l) + S_inv(i,l)*u(i) + do i=1,$n-2,4 + z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) enddo + i=$n-1 + z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) d_inv = 1.d0/d d = d+z(l) @@ -588,29 +589,37 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) return endif + !DIR$ VECTOR ALIGNED + z = 0.d0 !DIR$ VECTOR ALIGNED do j=1,$n-2,4 - z(j ) = S_inv($n,j )*u($n) - z(j+1) = S_inv($n,j+1)*u($n) - z(j+2) = S_inv($n,j+2)*u($n) - z(j+3) = S_inv($n,j+3)*u($n) !DIR$ VECTOR ALIGNED - do i=1,$n-1 + do i=1,$n-2 z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) enddo + z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) + z(j ) = z(j ) + S_inv($n,j )*u($n) + z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) + z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) + z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1) + z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) + z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1) + z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) enddo j=$n-1 - z(j ) = S_inv($n,j )*u($n) - z(j+1) = S_inv($n,j+1)*u($n) !DIR$ VECTOR ALIGNED - do i=1,$n-1 + do i=1,$n-2 z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) enddo + z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) + z(j ) = z(j ) + S_inv($n,j )*u($n) + z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) + z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) !DIR$ VECTOR ALIGNED do i=1,$n @@ -620,20 +629,32 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n-2,4 !DIR$ VECTOR ALIGNED - do j=1,$n + do j=1,$n-2 S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) enddo + S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1) + S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n ) + S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1) + S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n ) + S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -z(i+2)*w($n-1) + S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -z(i+2)*w($n ) + S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -z(i+3)*w($n-1) + S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -z(i+3)*w($n ) enddo i=$n-1 !DIR$ VECTOR ALIGNED - do j=1,$n + do j=1,$n-2 S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) enddo + S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1) + S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1) + S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n ) + S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n ) end @@ -704,11 +725,14 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) u(i) = m(i) - S(i,l) enddo - z(l) = S_inv($n,l)*u($n) + z(l) = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 - z(l) = z(l) + S_inv(i,l)*u(i) + do i=1,$n-3,4 + z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) enddo + i=$n-2 + z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + S_inv(i+2,l)*u(i+2) d_inv = 1.d0/d @@ -719,19 +743,29 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) return endif + !DIR$ VECTOR ALIGNED + z = 0.d0 !DIR$ VECTOR ALIGNED do j=1,$n-3,4 - z(j ) = S_inv($n,j )*u($n) - z(j+1) = S_inv($n,j+1)*u($n) - z(j+2) = S_inv($n,j+2)*u($n) - z(j+3) = S_inv($n,j+3)*u($n) !DIR$ VECTOR ALIGNED - do i=1,$n-1 + do i=1,$n-3 z(j ) = z(j ) + S_inv(i,j )*u(i) z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) enddo + z(j ) = z(j ) + S_inv($n-2,j )*u($n-2) + z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) + z(j ) = z(j ) + S_inv($n,j )*u($n) + z(j+1) = z(j+1) + S_inv($n-2,j+1)*u($n-2) + z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) + z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) + z(j+2) = z(j+2) + S_inv($n-2,j+2)*u($n-2) + z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1) + z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) + z(j+3) = z(j+3) + S_inv($n-2,j+3)*u($n-2) + z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1) + z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) enddo j=$n-2 @@ -753,12 +787,24 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n-3,4 !DIR$ VECTOR ALIGNED - do j=1,$n + do j=1,$n-3 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) enddo + S_inv($n-2,i ) = S_inv($n-2,i )*lambda -z(i )*w($n-2) + S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1) + S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n ) + S_inv($n-2,i+1) = S_inv($n-2,i+1)*lambda -z(i+1)*w($n-2) + S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1) + S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n ) + S_inv($n-2,i+2) = S_inv($n-2,i+2)*lambda -z(i+2)*w($n-2) + S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -z(i+2)*w($n-1) + S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -z(i+2)*w($n ) + S_inv($n-2,i+3) = S_inv($n-2,i+3)*lambda -z(i+3)*w($n-2) + S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -z(i+3)*w($n-1) + S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -z(i+3)*w($n ) enddo i=$n-2 From b5f2e62421ca34b3d70bbf40e536299c8804a5d3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 4 Jun 2016 00:30:12 +0200 Subject: [PATCH 10/15] DIR$ SIMD --- src/det.irp.f | 202 +++++++++++++++++++++++++++++++------------------- src/mo.irp.f | 6 +- 2 files changed, 130 insertions(+), 78 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index 9e2e11d..c488e12 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -434,49 +434,60 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0) !DIR$ ASSUME (LDS >= $n) integer :: i,j - double precision :: zl, ui + double precision :: zj, zj1, zj2, zj3 - zl = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 - ui = m(i) - S(i,l) - u(i) = ui - zl = zl + S_inv(i,l)*ui + !DIR$ SIMD + do i=1,$n + u(i) = m(i) - S(i,l) enddo - u($n) = m($n) - S($n,l) - zl = zl + S_inv($n,l)*u($n) + + zj = 0.d0 + !DIR$ SIMD REDUCTION(+:zj) + do i=1,$n-1,4 + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) + enddo + zj = zj + S_inv($n,l)*u($n) d_inv = 1.d0/d - d = d+zl + d = d+zj lambda = d*d_inv if (dabs(lambda) < 1.d-3) then d = 0.d0 return endif - !DIR$ VECTOR ALIGNED - z = 0.d0 !DIR$ VECTOR ALIGNED do j=1,$n-1,4 + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 + zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,$n-1 - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) - z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) + zj3 = zj3 + S_inv(i,j+3)*u(i) enddo - z(j ) = z(j ) + S_inv($n,j )*u($n) - z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) - z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) - z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) + z(j ) = zj + S_inv($n,j )*u($n) + z(j+1) = zj1 + S_inv($n,j+1)*u($n) + z(j+2) = zj2 + S_inv($n,j+2)*u($n) + z(j+3) = zj3 + S_inv($n,j+3)*u($n) enddo + zj = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj) do i=1,$n - z($n) = z($n) + S_inv(i,$n)*u(i) + zj = zj + S_inv(i,$n)*u(i) enddo + z($n) = zj !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -484,6 +495,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n-1,4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n-1 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) @@ -497,6 +509,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do i=1,$n S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*z($n) enddo @@ -567,61 +580,71 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (LDS >= $n) integer :: i,j + double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED + !DIR$ SIMD do i=1,$n u(i) = m(i) - S(i,l) enddo - z(l) = 0.d0 + zj = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj) do i=1,$n-2,4 - z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & - + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) enddo i=$n-1 - z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) d_inv = 1.d0/d - d = d+z(l) + d = d+zj lambda = d*d_inv if (dabs(lambda) < 1.d-3) then d = 0.d0 return endif - !DIR$ VECTOR ALIGNED - z = 0.d0 !DIR$ VECTOR ALIGNED do j=1,$n-2,4 + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 + zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,$n-2 - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) - z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) + zj3 = zj3 + S_inv(i,j+3)*u(i) enddo - z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) + z(j ) = zj + S_inv($n-1,j )*u($n-1) z(j ) = z(j ) + S_inv($n,j )*u($n) - z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) + z(j+1) = zj1 + S_inv($n-1,j+1)*u($n-1) z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) - z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1) + z(j+2) = zj2 + S_inv($n-1,j+2)*u($n-1) z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) - z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1) + z(j+3) = zj3 + S_inv($n-1,j+3)*u($n-1) z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) enddo j=$n-1 + zj = 0.d0 + zj1 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1) do i=1,$n-2 - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) enddo - z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) + z(j ) = zj + S_inv($n-1,j )*u($n-1) z(j ) = z(j ) + S_inv($n,j )*u($n) - z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) + z(j+1) = zj1 + S_inv($n-1,j+1)*u($n-1) z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -629,6 +652,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n-2,4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n-2 S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) @@ -647,6 +671,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) i=$n-1 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n-2 S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) @@ -719,67 +744,75 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0) !DIR$ ASSUME (LDS >= $n) integer :: i,j - + + double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED + !DIR$ SIMD do i=1,$n u(i) = m(i) - S(i,l) enddo - z(l) = 0.d0 + zj = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj) do i=1,$n-3,4 - z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & - + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) enddo i=$n-2 - z(l) = z(l) + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + S_inv(i+2,l)*u(i+2) + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) + S_inv(i+2,l)*u(i+2) d_inv = 1.d0/d - d = d+z(l) + d = d+zj lambda = d*d_inv if (dabs(lambda) < 1.d-3) then d = 0.d0 return endif - !DIR$ VECTOR ALIGNED - z = 0.d0 !DIR$ VECTOR ALIGNED do j=1,$n-3,4 + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 + zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,$n-3 - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) - z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) + zj3 = zj3 + S_inv(i,j+3)*u(i) enddo - z(j ) = z(j ) + S_inv($n-2,j )*u($n-2) + z(j ) = zj + S_inv($n-2,j )*u($n-2) z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) z(j ) = z(j ) + S_inv($n,j )*u($n) - z(j+1) = z(j+1) + S_inv($n-2,j+1)*u($n-2) + z(j+1) = zj1 + S_inv($n-2,j+1)*u($n-2) z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) - z(j+2) = z(j+2) + S_inv($n-2,j+2)*u($n-2) + z(j+2) = zj2 + S_inv($n-2,j+2)*u($n-2) z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1) z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) - z(j+3) = z(j+3) + S_inv($n-2,j+3)*u($n-2) + z(j+3) = zj3 + S_inv($n-2,j+3)*u($n-2) z(j+3) = z(j+3) + S_inv($n-1,j+3)*u($n-1) z(j+3) = z(j+3) + S_inv($n,j+3)*u($n) enddo j=$n-2 - z(j ) = S_inv($n,j )*u($n) - z(j+1) = S_inv($n,j+1)*u($n) - z(j+2) = S_inv($n,j+2)*u($n) + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n-1 - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2) + do i=1,$n + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -787,6 +820,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n-3,4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n-3 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) @@ -809,6 +843,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) i=$n-2 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) @@ -881,20 +916,23 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (n>150) integer :: i,j,n4 + double precision :: zl !DIR$ VECTOR ALIGNED + !DIR$ SIMD do i=1,n u(i) = m(i) - S(i,l) enddo - z(l) = 0.d0 + zl = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zl) do i=1,n - z(l) = z(l) + S_inv(i,l)*u(i) + zl = zl + S_inv(i,l)*u(i) enddo d_inv = 1.d0/d - d = d+z(l) + d = d+zl lambda = d*d_inv if ( dabs(lambda) < 1.d-3 ) then @@ -902,31 +940,41 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) return endif + double precision :: zj, zj1, zj2, zj3 + n4 = iand(n,not(3)) !DIR$ VECTOR ALIGNED do j=1,n4,4 - z(j ) = 0.d0 - z(j+1) = 0.d0 - z(j+2) = 0.d0 - z(j+3) = 0.d0 + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 + zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,n - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) - z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) + zj3 = zj3 + S_inv(i,j+3)*u(i) enddo + z(j ) = zj + z(j+1) = zj1 + z(j+2) = zj2 + z(j+3) = zj3 enddo do j=n4+1,n - z(j) = 0.d0 + zj = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj) do i=1,n - z(j) = z(j) + S_inv(i,j)*u(i) + zj = zj + S_inv(i,j)*u(i) enddo + z(j ) = zj enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -934,7 +982,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) do i=1,n4,4 !DIR$ VECTOR ALIGNED - !DIR$ VECTOR ALWAYS + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,n S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) @@ -945,7 +993,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) do i=n4+1,n !DIR$ VECTOR ALIGNED - !DIR$ VECTOR ALWAYS + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,n S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j) enddo diff --git a/src/mo.irp.f b/src/mo.irp.f index 12d407a..eb300af 100644 --- a/src/mo.irp.f +++ b/src/mo.irp.f @@ -709,7 +709,7 @@ subroutine sparse_full_mv(A,LDA, & call MM_PREFETCH (A(j,indices(4)),3) IRP_ENDIF - !DIR$ VECTOR ALIGNED + !DIR$ SIMD do j=1,LDC C1(j) = 0. C2(j) = 0. @@ -755,6 +755,7 @@ subroutine sparse_full_mv(A,LDA, & do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d11,d21,d31,d41) do j=1,$IRP_ALIGN/4 IRP_IF NO_PREFETCH IRP_ELSE @@ -768,6 +769,7 @@ subroutine sparse_full_mv(A,LDA, & enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d12,d22,d32,d42,d13,d23,d33,d43) do j=1,$IRP_ALIGN/4 C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 + A(j+k,k_vec(2))*d22& + A(j+k,k_vec(3))*d32 + A(j+k,k_vec(4))*d42 @@ -776,6 +778,7 @@ subroutine sparse_full_mv(A,LDA, & enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d14,d24,d34,d44,d15,d25,d35,d45) do j=1,$IRP_ALIGN/4 C4(j+k) = C4(j+k) + A(j+k,k_vec(1))*d14 + A(j+k,k_vec(2))*d24& + A(j+k,k_vec(3))*d34 + A(j+k,k_vec(4))*d44 @@ -796,6 +799,7 @@ subroutine sparse_full_mv(A,LDA, & !DIR$ VECTOR ALIGNED do k=0,LDA-1,$IRP_ALIGN/4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d11,d12,d13,d14,d15) do j=1,$IRP_ALIGN/4 C1(j+k) = C1(j+k) + A(j+k,k_vec(1))*d11 C2(j+k) = C2(j+k) + A(j+k,k_vec(1))*d12 From f88fc822d93e1b3b888ce4ceb4e97b0f5130810f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Jun 2016 18:21:32 +0200 Subject: [PATCH 11/15] Optimized mod(0) --- src/det.irp.f | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index c488e12..e2d72c6 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -321,20 +321,24 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ ASSUME (mod(LDS,$IRP_ALIGN/8) == 0) !DIR$ ASSUME (LDS >= $n) integer :: i,j + double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED + !DIR$ SIMD do i=1,$n u(i) = m(i) - S(i,l) enddo - z(l) = 0.d0 + zj = 0.d0 !DIR$ VECTOR ALIGNED - do i=1,$n - z(l) = z(l) + S_inv(i,l)*u(i) + !DIR$ SIMD REDUCTION(+:zj) + do i=1,$n-1,4 + zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) enddo d_inv = 1.d0/d - d = d+z(l) + d = d+zj lambda = d*d_inv if (dabs(lambda) < 1.d-3) then d = 0.d0 @@ -343,20 +347,26 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) !DIR$ VECTOR ALIGNED do j=1,$n,4 - z(j ) = 0.d0 - z(j+1) = 0.d0 - z(j+2) = 0.d0 - z(j+3) = 0.d0 + zj = 0.d0 + zj1 = 0.d0 + zj2 = 0.d0 + zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,$n - z(j ) = z(j ) + S_inv(i,j )*u(i) - z(j+1) = z(j+1) + S_inv(i,j+1)*u(i) - z(j+2) = z(j+2) + S_inv(i,j+2)*u(i) - z(j+3) = z(j+3) + S_inv(i,j+3)*u(i) + zj = zj + S_inv(i,j )*u(i) + zj1 = zj1 + S_inv(i,j+1)*u(i) + zj2 = zj2 + S_inv(i,j+2)*u(i) + zj3 = zj3 + S_inv(i,j+3)*u(i) enddo + z(j ) = zj + z(j+1) = zj1 + z(j+2) = zj2 + z(j+3) = zj3 enddo !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -364,11 +374,12 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) do i=1,$n,4 !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(lambda,z) do j=1,$n - S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) - S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) - S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) - S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) + S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) + S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) + S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) + S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) enddo enddo From 72baed17c2f5867dcd151dcf4783cc80c3c53b09 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 6 Jun 2016 18:58:46 +0200 Subject: [PATCH 12/15] SIMD --- src/det.irp.f | 174 +++++++++++++++++++++++++++++++------------------- 1 file changed, 109 insertions(+), 65 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index e2d72c6..0049d7b 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -373,13 +373,17 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo do i=1,$n,4 + zj = z(i ) + zj1 = z(i+1) + zj2 = z(i+2) + zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) do j=1,$n - S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) - S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) - S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) - S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) + S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj + S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 + S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*zj2 + S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*zj3 enddo enddo @@ -505,24 +509,29 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo do i=1,$n-1,4 + zj = z(i ) + zj1 = z(i+1) + zj2 = z(i+2) + zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) do j=1,$n-1 - S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) - S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) - S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) - S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) + S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj + S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 + S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*zj2 + S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*zj3 enddo - S_inv($n,i ) = S_inv($n,i )*lambda - w($n)*z(i ) - S_inv($n,i+1) = S_inv($n,i+1)*lambda - w($n)*z(i+1) - S_inv($n,i+2) = S_inv($n,i+2)*lambda - w($n)*z(i+2) - S_inv($n,i+3) = S_inv($n,i+3)*lambda - w($n)*z(i+3) + S_inv($n,i ) = S_inv($n,i )*lambda - w($n)*zj + S_inv($n,i+1) = S_inv($n,i+1)*lambda - w($n)*zj1 + S_inv($n,i+2) = S_inv($n,i+2)*lambda - w($n)*zj2 + S_inv($n,i+3) = S_inv($n,i+3)*lambda - w($n)*zj3 enddo + zj = z($n) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj) do i=1,$n - S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*z($n) + S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*zj enddo @@ -662,35 +671,41 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo do i=1,$n-2,4 + zj = z(i) + zj1 = z(i+1) + zj2 = z(i+2) + zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,z,zj1,zj2,zj3) do j=1,$n-2 - S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) - S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) - S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) - S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) + S_inv(j,i ) = S_inv(j,i )*lambda -zj *w(j) + S_inv(j,i+1) = S_inv(j,i+1)*lambda -zj1*w(j) + S_inv(j,i+2) = S_inv(j,i+2)*lambda -zj2*w(j) + S_inv(j,i+3) = S_inv(j,i+3)*lambda -zj3*w(j) enddo - S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1) - S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n ) - S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1) - S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n ) - S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -z(i+2)*w($n-1) - S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -z(i+2)*w($n ) - S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -z(i+3)*w($n-1) - S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -z(i+3)*w($n ) + S_inv($n-1,i ) = S_inv($n-1,i )*lambda -zj *w($n-1) + S_inv($n ,i ) = S_inv($n ,i )*lambda -zj *w($n ) + S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -zj1*w($n-1) + S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -zj1*w($n ) + S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -zj2*w($n-1) + S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -zj2*w($n ) + S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -zj3*w($n-1) + S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -zj3*w($n ) enddo i=$n-1 + zj = z(i) + zj1= z(i+1) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1) do j=1,$n-2 - S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) - S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) + S_inv(j,i ) = S_inv(j,i )*lambda -zj*w(j) + S_inv(j,i+1) = S_inv(j,i+1)*lambda -zj1*w(j) enddo - S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1) - S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1) - S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n ) - S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n ) + S_inv($n-1,i ) = S_inv($n-1,i )*lambda -zj*w($n-1) + S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -zj1*w($n-1) + S_inv($n ,i ) = S_inv($n ,i )*lambda -zj*w($n ) + S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -zj1*w($n ) end @@ -816,11 +831,20 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = 0.d0 !DIR$ VECTOR ALIGNED !DIR$ SIMD REDUCTION(+:zj,zj1,zj2) - do i=1,$n + do i=1,$n-3 zj = zj + S_inv(i,j )*u(i) zj1 = zj1 + S_inv(i,j+1)*u(i) zj2 = zj2 + S_inv(i,j+2)*u(i) enddo + z(j ) = zj + S_inv($n-2,j )*u($n-2) + z(j ) = z(j ) + S_inv($n-1,j )*u($n-1) + z(j ) = z(j ) + S_inv($n,j )*u($n) + z(j+1) = zj1 + S_inv($n-2,j+1)*u($n-2) + z(j+1) = z(j+1) + S_inv($n-1,j+1)*u($n-1) + z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) + z(j+2) = zj2 + S_inv($n-2,j+2)*u($n-2) + z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1) + z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) !DIR$ VECTOR ALIGNED !DIR$ SIMD FIRSTPRIVATE(d_inv) @@ -830,37 +854,45 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo do i=1,$n-3,4 + zj = z(i) + zj1 = z(i+1) + zj2 = z(i+2) + zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) do j=1,$n-3 - S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) - S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) - S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) - S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*z(i+3) + S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj + S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 + S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*zj2 + S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*zj3 enddo - S_inv($n-2,i ) = S_inv($n-2,i )*lambda -z(i )*w($n-2) - S_inv($n-1,i ) = S_inv($n-1,i )*lambda -z(i )*w($n-1) - S_inv($n ,i ) = S_inv($n ,i )*lambda -z(i )*w($n ) - S_inv($n-2,i+1) = S_inv($n-2,i+1)*lambda -z(i+1)*w($n-2) - S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -z(i+1)*w($n-1) - S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -z(i+1)*w($n ) - S_inv($n-2,i+2) = S_inv($n-2,i+2)*lambda -z(i+2)*w($n-2) - S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -z(i+2)*w($n-1) - S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -z(i+2)*w($n ) - S_inv($n-2,i+3) = S_inv($n-2,i+3)*lambda -z(i+3)*w($n-2) - S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -z(i+3)*w($n-1) - S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -z(i+3)*w($n ) + S_inv($n-2,i ) = S_inv($n-2,i )*lambda -zj *w($n-2) + S_inv($n-1,i ) = S_inv($n-1,i )*lambda -zj *w($n-1) + S_inv($n ,i ) = S_inv($n ,i )*lambda -zj *w($n ) + S_inv($n-2,i+1) = S_inv($n-2,i+1)*lambda -zj1*w($n-2) + S_inv($n-1,i+1) = S_inv($n-1,i+1)*lambda -zj1*w($n-1) + S_inv($n ,i+1) = S_inv($n ,i+1)*lambda -zj1*w($n ) + S_inv($n-2,i+2) = S_inv($n-2,i+2)*lambda -zj2*w($n-2) + S_inv($n-1,i+2) = S_inv($n-1,i+2)*lambda -zj2*w($n-1) + S_inv($n ,i+2) = S_inv($n ,i+2)*lambda -zj2*w($n ) + S_inv($n-2,i+3) = S_inv($n-2,i+3)*lambda -zj3*w($n-2) + S_inv($n-1,i+3) = S_inv($n-1,i+3)*lambda -zj3*w($n-1) + S_inv($n ,i+3) = S_inv($n ,i+3)*lambda -zj3*w($n ) enddo i=$n-2 + zj = z(i) + zj1 = z(i+1) + zj2 = z(i+2) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2) do j=1,$n - S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*z(i ) - S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*z(i+1) - S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*z(i+2) + S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj + S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 + S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*zj2 enddo + end SUBST [ n ] @@ -991,22 +1023,34 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) S(i,l) = m(i) enddo + !DIR$ VECTOR ALIGNED + !DIR$ SIMD FIRSTPRIVATE(d_inv) + do i=1,n + w(i) = S_inv(i,l)*d_inv + S(i,l) = m(i) + enddo + do i=1,n4,4 + zj = z(i) + zj1 = z(i+1) + zj2 = z(i+2) + zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) do j=1,n - S_inv(j,i ) = S_inv(j,i )*lambda -z(i )*w(j) - S_inv(j,i+1) = S_inv(j,i+1)*lambda -z(i+1)*w(j) - S_inv(j,i+2) = S_inv(j,i+2)*lambda -z(i+2)*w(j) - S_inv(j,i+3) = S_inv(j,i+3)*lambda -z(i+3)*w(j) + S_inv(j,i ) = S_inv(j,i )*lambda -zj *w(j) + S_inv(j,i+1) = S_inv(j,i+1)*lambda -zj1*w(j) + S_inv(j,i+2) = S_inv(j,i+2)*lambda -zj2*w(j) + S_inv(j,i+3) = S_inv(j,i+3)*lambda -zj3*w(j) enddo enddo do i=n4+1,n + zj = z(i) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj) do j=1,n - S_inv(j,i) = S_inv(j,i)*lambda -z(i)*w(j) + S_inv(j,i) = S_inv(j,i)*lambda -zj*w(j) enddo enddo From 076803da1a80ab6ce1da182754616640b0b24c8d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 10 Jun 2016 00:20:01 +0200 Subject: [PATCH 13/15] noprefetch --- src/det.irp.f | 66 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/src/det.irp.f b/src/det.irp.f index 0049d7b..c920099 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -324,14 +324,16 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED - !DIR$ SIMD + !DIR$ NOPREFETCH + !DIR$ SIMD NOVECREMAINDER do i=1,$n u(i) = m(i) - S(i,l) enddo zj = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj) + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj) NOVECREMAINDER do i=1,$n-1,4 zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) @@ -352,7 +354,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = 0.d0 zj3 = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) NOVECREMAINDER do i=1,$n zj = zj + S_inv(i,j )*u(i) zj1 = zj1 + S_inv(i,j+1)*u(i) @@ -366,7 +369,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(d_inv) + !DIR$ NOPREFETCH + !DIR$ SIMD FIRSTPRIVATE(d_inv) NOVECREMAINDER do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -378,7 +382,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = z(i+2) zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) + !DIR$ NOPREFETCH + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) NOVECREMAINDER do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 @@ -452,12 +457,14 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED - !DIR$ SIMD + !DIR$ NOPREFETCH + !DIR$ SIMD NOVECREMAINDER do i=1,$n u(i) = m(i) - S(i,l) enddo zj = 0.d0 + !DIR$ NOPREFETCH !DIR$ SIMD REDUCTION(+:zj) do i=1,$n-1,4 zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & @@ -480,7 +487,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = 0.d0 zj3 = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) NOVECREMAINDER do i=1,$n-1 zj = zj + S_inv(i,j )*u(i) zj1 = zj1 + S_inv(i,j+1)*u(i) @@ -495,14 +503,16 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj) - do i=1,$n + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj) NOVECREMAINDER + do i=1,$n-1 zj = zj + S_inv(i,$n)*u(i) enddo - z($n) = zj + z($n) = zj + S_inv($n,$n)*u($n) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(d_inv) + !DIR$ NOPREFETCH + !DIR$ SIMD FIRSTPRIVATE(d_inv) NOVECREMAINDER do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -514,7 +524,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = z(i+2) zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) + !DIR$ NOPREFETCH + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) NOVECREMAINDER do j=1,$n-1 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 @@ -529,7 +540,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj = z($n) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,zj) + !DIR$ NOPREFETCH + !DIR$ SIMD FIRSTPRIVATE(lambda,zj) NOVECREMAINDER do i=1,$n S_inv(i,$n) = S_inv(i,$n)*lambda -w(i)*zj enddo @@ -602,14 +614,16 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) double precision :: zj, zj1, zj2, zj3 !DIR$ VECTOR ALIGNED - !DIR$ SIMD + !DIR$ NOPREFETCH + !DIR$ SIMD NOVECREMAINDER do i=1,$n u(i) = m(i) - S(i,l) enddo zj = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj) + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj) NOVECREMAINDER do i=1,$n-2,4 zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) @@ -653,7 +667,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj = 0.d0 zj1 = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj,zj1) + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj,zj1) NOVECREMAINDER do i=1,$n-2 zj = zj + S_inv(i,j )*u(i) zj1 = zj1 + S_inv(i,j+1)*u(i) @@ -664,7 +679,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(d_inv) + !DIR$ NOPREFETCH + !DIR$ SIMD FIRSTPRIVATE(d_inv) NOVECREMAINDER do i=1,$n w(i) = S_inv(i,l)*d_inv S(i,l) = m(i) @@ -780,7 +796,8 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj) + !DIR$ NOPREFETCH + !DIR$ SIMD REDUCTION(+:zj) NOVECREMAINDER do i=1,$n-3,4 zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) & + S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) @@ -830,6 +847,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj1 = 0.d0 zj2 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD REDUCTION(+:zj,zj1,zj2) do i=1,$n-3 zj = zj + S_inv(i,j )*u(i) @@ -847,6 +865,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n w(i) = S_inv(i,l)*d_inv @@ -859,6 +878,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = z(i+2) zj3 = z(i+3) !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) do j=1,$n-3 S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj @@ -885,6 +905,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj1 = z(i+1) zj2 = z(i+2) !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2) do j=1,$n S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj @@ -962,6 +983,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) double precision :: zl !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD do i=1,n u(i) = m(i) - S(i,l) @@ -969,6 +991,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) zl = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD REDUCTION(+:zl) do i=1,n zl = zl + S_inv(i,l)*u(i) @@ -987,12 +1010,14 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) n4 = iand(n,not(3)) !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH do j=1,n4,4 zj = 0.d0 zj1 = 0.d0 zj2 = 0.d0 zj3 = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) do i=1,n zj = zj + S_inv(i,j )*u(i) @@ -1009,6 +1034,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) do j=n4+1,n zj = 0.d0 !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD REDUCTION(+:zj) do i=1,n zj = zj + S_inv(i,j)*u(i) @@ -1017,6 +1043,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) enddo !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,n w(i) = S_inv(i,l)*d_inv @@ -1024,6 +1051,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) enddo !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,n w(i) = S_inv(i,l)*d_inv @@ -1036,6 +1064,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) zj2 = z(i+2) zj3 = z(i+3) !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) do j=1,n S_inv(j,i ) = S_inv(j,i )*lambda -zj *w(j) @@ -1048,6 +1077,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) do i=n4+1,n zj = z(i) !DIR$ VECTOR ALIGNED + !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(lambda,zj) do j=1,n S_inv(j,i) = S_inv(j,i)*lambda -zj*w(j) From 3b101c0b8ada34c799ba33f6e518306e3bb5e441 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 22 Jun 2016 23:18:21 +0200 Subject: [PATCH 14/15] Removed useless things from EZFIO --- ezfio_config/qmc.config | 3 --- src/det.irp.f | 20 +++----------------- src/ezfio_interface.irp.f | 2 -- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/ezfio_config/qmc.config b/ezfio_config/qmc.config index 29aaac9..73812d1 100644 --- a/ezfio_config/qmc.config +++ b/ezfio_config/qmc.config @@ -11,9 +11,6 @@ mo_basis mo_tot_num integer mo_coef real (ao_basis_ao_num,mo_basis_mo_tot_num) mo_classif character (mo_basis_mo_tot_num) - mo_closed_num integer =n_count_ch(mo_basis_mo_classif,size(mo_basis_mo_classif),'c') - mo_active_num integer =n_count_ch(mo_basis_mo_classif,size(mo_basis_mo_classif),'a') - mo_virtual_num integer =n_count_ch(mo_basis_mo_classif,size(mo_basis_mo_classif),'v') mo_energy real (mo_basis_mo_tot_num) mo_occ real (mo_basis_mo_tot_num) mo_symmetry character*(8) (mo_basis_mo_tot_num) diff --git a/src/det.irp.f b/src/det.irp.f index c920099..0982d07 100644 --- a/src/det.irp.f +++ b/src/det.irp.f @@ -241,7 +241,6 @@ subroutine det_update3(n,LDS,m,l,S,S_inv,d) double precision,intent(inout) :: d ! Det(S) integer :: i - !DIR$ VECTOR ALIGNED do i=1,3 S(i,l) = m(i) enddo @@ -323,7 +322,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) integer :: i,j double precision :: zj, zj1, zj2, zj3 - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD NOVECREMAINDER do i=1,$n @@ -368,7 +366,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+3) = zj3 enddo - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) NOVECREMAINDER do i=1,$n @@ -456,9 +453,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) integer :: i,j double precision :: zj, zj1, zj2, zj3 - !DIR$ VECTOR ALIGNED - !DIR$ NOPREFETCH - !DIR$ SIMD NOVECREMAINDER do i=1,$n u(i) = m(i) - S(i,l) enddo @@ -510,7 +504,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) enddo z($n) = zj + S_inv($n,$n)*u($n) - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) NOVECREMAINDER do i=1,$n @@ -613,7 +606,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) integer :: i,j double precision :: zj, zj1, zj2, zj3 - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD NOVECREMAINDER do i=1,$n @@ -646,7 +638,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = 0.d0 zj3 = 0.d0 !DIR$ VECTOR ALIGNED - !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) + !DIR$ SIMD REDUCTION(+:zj,zj1,zj2,zj3) NOVECREMAINDER do i=1,$n-2 zj = zj + S_inv(i,j )*u(i) zj1 = zj1 + S_inv(i,j+1)*u(i) @@ -678,7 +670,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+1) = zj1 + S_inv($n-1,j+1)*u($n-1) z(j+1) = z(j+1) + S_inv($n,j+1)*u($n) - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) NOVECREMAINDER do i=1,$n @@ -692,7 +683,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) zj2 = z(i+2) zj3 = z(i+3) !DIR$ VECTOR ALIGNED - !DIR$ SIMD FIRSTPRIVATE(lambda,z,zj1,zj2,zj3) + !DIR$ SIMD FIRSTPRIVATE(lambda,zj,zj1,zj2,zj3) NOVECREMAINDER do j=1,$n-2 S_inv(j,i ) = S_inv(j,i )*lambda -zj *w(j) S_inv(j,i+1) = S_inv(j,i+1)*lambda -zj1*w(j) @@ -788,7 +779,7 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) integer :: i,j double precision :: zj, zj1, zj2, zj3 - !DIR$ VECTOR ALIGNED + !DIR$ SIMD do i=1,$n u(i) = m(i) - S(i,l) @@ -864,7 +855,6 @@ subroutine det_update$n(n,LDS,m,l,S,S_inv,d) z(j+2) = z(j+2) + S_inv($n-1,j+2)*u($n-1) z(j+2) = z(j+2) + S_inv($n,j+2)*u($n) - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,$n @@ -982,9 +972,7 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) integer :: i,j,n4 double precision :: zl - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH - !DIR$ SIMD do i=1,n u(i) = m(i) - S(i,l) enddo @@ -1042,7 +1030,6 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) z(j ) = zj enddo - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,n @@ -1050,7 +1037,6 @@ subroutine det_update_general(n,LDS,m,l,S,S_inv,d) S(i,l) = m(i) enddo - !DIR$ VECTOR ALIGNED !DIR$ NOPREFETCH !DIR$ SIMD FIRSTPRIVATE(d_inv) do i=1,n diff --git a/src/ezfio_interface.irp.f b/src/ezfio_interface.irp.f index 08c9cfe..bf537a8 100644 --- a/src/ezfio_interface.irp.f +++ b/src/ezfio_interface.irp.f @@ -52,8 +52,6 @@ data = [ \ data_no_set = [\ ("mo_basis_mo_tot_num" , "integer" , ""), -("mo_basis_mo_active_num" , "integer" , ""), -("mo_basis_mo_closed_num" , "integer" , ""), ("pseudo_ao_pseudo_grid" , "double precision" , "(ao_num,pseudo_lmax+pseudo_lmax+1,pseudo_lmax-0+1,nucl_num,pseudo_grid_size)"), ("pseudo_mo_pseudo_grid" , "double precision" , "(ao_num,pseudo_lmax+pseudo_lmax+1,pseudo_lmax-0+1,nucl_num,pseudo_grid_size)"), ("pseudo_pseudo_dz_k" , "double precision" , "(nucl_num,pseudo_klocmax)"), From 1a001e620b9421d538e9682d5df344e338528e26 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 24 Jun 2016 09:11:37 +0200 Subject: [PATCH 15/15] Better time step error in SRMC --- src/SAMPLING/reconfigure.irp.f | 2 +- src/SAMPLING/srmc_step.irp.f | 94 ++++++++++++++++++++++++---------- 2 files changed, 68 insertions(+), 28 deletions(-) diff --git a/src/SAMPLING/reconfigure.irp.f b/src/SAMPLING/reconfigure.irp.f index c9434fa..6c92502 100644 --- a/src/SAMPLING/reconfigure.irp.f +++ b/src/SAMPLING/reconfigure.irp.f @@ -15,7 +15,7 @@ subroutine reconfigure(ipos,w) tmp = tmp + w(k) enddo dwalk_num = dble(walk_num)/tmp - + integer :: kp, km kp=0 km=0 diff --git a/src/SAMPLING/srmc_step.irp.f b/src/SAMPLING/srmc_step.irp.f index fc98ab5..5e06331 100644 --- a/src/SAMPLING/srmc_step.irp.f +++ b/src/SAMPLING/srmc_step.irp.f @@ -44,8 +44,8 @@ END_SHELL real, allocatable :: elec_coord_tmp(:,:,:) integer :: mod_align - double precision :: E_loc_save(walk_num_dmc_max) - double precision :: E_loc_save_tmp(walk_num_dmc_max) + double precision :: E_loc_save(4,walk_num_dmc_max) + double precision :: E_loc_save_tmp(4,walk_num_dmc_max) double precision :: psi_value_save(walk_num) double precision :: psi_value_save_tmp(walk_num) double precision :: srmc_weight(walk_num) @@ -124,7 +124,7 @@ END_SHELL psi_grad_psi_inv_z(i) = psi_grad_psi_inv_save(i,3,i_walk) enddo psi_value = psi_value_save(i_walk) - E_loc = E_loc_save(i_walk) + E_loc = E_loc_save(1,i_walk) enddo SOFT_TOUCH elec_coord psi_grad_psi_inv_x psi_grad_psi_inv_y psi_grad_psi_inv_z psi_value E_loc else @@ -135,7 +135,7 @@ END_SHELL enddo TOUCH elec_coord psi_value_save(i_walk) = psi_value - E_loc_save(i_walk) = E_loc + E_loc_save(:,i_walk) = E_loc endif double precision :: p,q @@ -144,19 +144,54 @@ END_SHELL call brownian_step(p,q,accepted,delta_x) if ( psi_value * psi_value_save(i_walk) >= 0.d0 ) then - delta = ((E_loc+E_loc_save(i_walk))*0.5d0 - E_ref) * p - if ( delta > thr ) then - srmc_weight(i_walk) = dexp(-dtime_step*thr) - else if ( delta < -thr ) then - srmc_weight(i_walk) = dexp(dtime_step*thr) - else +! delta = (E_loc+E_loc_save(1,i_walk))*0.5d0 +! delta = (5.d0 * E_loc + 8.d0 * E_loc_save(1,i_walk) - E_loc_save(2,i_walk))/12.d0 + + delta = (9.d0*E_loc+19.d0*E_loc_save(1,i_walk)- & + 5.d0*E_loc_save(2,i_walk)+E_loc_save(3,i_walk))/24.d0 + +! delta = -((-251.d0*E_loc)-646.d0*E_loc_save(1,i_walk)+264.d0*E_loc_save(2,i_walk)-& +! 106.d0*E_loc_save(3,i_walk)+19.d0*E_loc_save(4,i_walk))/720.d0 + + delta = (delta - E_ref)*p + + if (delta >= 0.d0) then srmc_weight(i_walk) = dexp(-dtime_step*delta) + else + srmc_weight(i_walk) = 2.d0-dexp(dtime_step*delta) endif + + +! if (accepted) then +! ! Compute correction to past weights +! double precision :: delta_old, delta_new +! delta_old = (9.d0*E_loc_save(1,i_walk)+19.d0*E_loc_save(2,i_walk)-& +! 5.d0*E_loc_save(3,i_walk)+E_loc_save(4,i_walk))/24.d0 - E_ref +! +! +! if (delta_old >= 0.d0) then +! srmc_weight(i_walk) = srmc_weight(i_walk) * dexp(dtime_step*delta_old) +! else +! srmc_weight(i_walk) = srmc_weight(i_walk) * (2.d0-dexp(-dtime_step*delta_old)) +! endif +! +! delta_new = (-(E_loc_save_tmp(3,i_walk)-13.d0*E_loc_save_tmp(2,i_walk)& +! -13.d0*E_loc_save_tmp(1,i_walk)+E_loc))/24.d0 - E_ref +! +! if (delta_new >= 0.d0) then +! srmc_weight(i_walk) = srmc_weight(i_walk) * dexp(-dtime_step*delta_new) +! else +! srmc_weight(i_walk) = srmc_weight(i_walk) * (2.d0-dexp(dtime_step*delta_new) ) +! endif +! +! endif + elec_coord(elec_num+1,1) += p*time_step elec_coord(elec_num+1,2) = E_loc elec_coord(elec_num+1,3) = srmc_weight(i_walk) * pop_weight_mult do l=1,3 do i=1,elec_num+1 + elec_coord_full(i,l,i_walk) = elec_coord(i,l) enddo enddo @@ -167,25 +202,30 @@ END_SHELL enddo psi_value_save(i_walk) = psi_value - E_loc_save(i_walk) = E_loc + if (accepted) then + E_loc_save(4,i_walk) = E_loc_save(3,i_walk) + E_loc_save(3,i_walk) = E_loc_save(2,i_walk) + E_loc_save(2,i_walk) = E_loc_save(1,i_walk) + E_loc_save(1,i_walk) = E_loc + endif BEGIN_SHELL [ /usr/bin/python ] from properties import * t = """ if (calc_$X) then ! Kahan's summation algorithm to compute these sums reducing the rounding error: - ! $X_srmc_block_walk += $X * pop_weight_mult * srmc_weight(i_walk) - ! $X_2_srmc_block_walk += $X_2 * pop_weight_mult * srmc_weight(i_walk) + ! $X_srmc_block_walk += $X * srmc_pop_weight_mult * srmc_weight(i_walk) + ! $X_2_srmc_block_walk += $X_2 * srmc_pop_weight_mult * srmc_weight(i_walk) ! see http://en.wikipedia.org/wiki/Kahan_summation_algorithm - $X_srmc_block_walk_kahan($D2 3) = $X * pop_weight_mult * srmc_weight(i_walk) - $X_srmc_block_walk_kahan($D2 1) + $X_srmc_block_walk_kahan($D2 3) = $X * srmc_pop_weight_mult * srmc_weight(i_walk) - $X_srmc_block_walk_kahan($D2 1) $X_srmc_block_walk_kahan($D2 2) = $X_srmc_block_walk $D1 + $X_srmc_block_walk_kahan($D2 3) $X_srmc_block_walk_kahan($D2 1) = ($X_srmc_block_walk_kahan($D2 2) - $X_srmc_block_walk $D1 ) & - $X_srmc_block_walk_kahan($D2 3) $X_srmc_block_walk $D1 = $X_srmc_block_walk_kahan($D2 2) - $X_2_srmc_block_walk_kahan($D2 3) = $X_2 * pop_weight_mult * srmc_weight(i_walk) - $X_2_srmc_block_walk_kahan($D2 1) + $X_2_srmc_block_walk_kahan($D2 3) = $X_2 * srmc_pop_weight_mult * srmc_weight(i_walk) - $X_2_srmc_block_walk_kahan($D2 1) $X_2_srmc_block_walk_kahan($D2 2) = $X_2_srmc_block_walk $D1 + $X_2_srmc_block_walk_kahan($D2 3) $X_2_srmc_block_walk_kahan($D2 1) = ($X_2_srmc_block_walk_kahan($D2 2) - $X_2_srmc_block_walk $D1 ) & - $X_2_srmc_block_walk_kahan($D2 3) @@ -203,7 +243,7 @@ for p in properties: END_SHELL - block_weight += pop_weight_mult * srmc_weight(i_walk) + block_weight += srmc_pop_weight_mult * srmc_weight(i_walk) else srmc_weight(i_walk) = 0.d0 @@ -221,15 +261,15 @@ END_SHELL ! Eventually, recompute the weight of the population if (srmc_projection_step == 1) then - pop_weight_mult = 1.d0 + srmc_pop_weight_mult = 1.d0 do k=1,srmc_projection - pop_weight_mult *= pop_weight(k) + srmc_pop_weight_mult *= srmc_pop_weight(k) enddo endif ! Remove contribution of the old value of the weight at the new ! projection step - pop_weight_mult *= 1.d0/pop_weight(srmc_projection_step) + srmc_pop_weight_mult *= 1.d0/srmc_pop_weight(srmc_projection_step) ! Compute the new weight of the population double precision :: sum_weight @@ -237,10 +277,10 @@ END_SHELL do k=1,walk_num sum_weight += srmc_weight(k) enddo - pop_weight(srmc_projection_step) = sum_weight/dble(walk_num) + srmc_pop_weight(srmc_projection_step) = sum_weight/dble(walk_num) ! Update the running population weight - pop_weight_mult *= pop_weight(srmc_projection_step) + srmc_pop_weight_mult *= srmc_pop_weight(srmc_projection_step) ! Reconfiguration integer :: ipos(walk_num) @@ -257,7 +297,7 @@ END_SHELL enddo enddo psi_value_save_tmp(k) = psi_value_save(k) - E_loc_save_tmp(k) = E_loc_save(k) + E_loc_save_tmp(:,k) = E_loc_save(:,k) enddo integer :: ipm @@ -272,7 +312,7 @@ END_SHELL enddo enddo psi_value_save(k) = psi_value_save_tmp(ipm) - E_loc_save(k) = E_loc_save_tmp(ipm) + E_loc_save(:,k) = E_loc_save_tmp(:,ipm) enddo call system_clock(cpu1, count_rate, count_max) @@ -287,7 +327,7 @@ END_SHELL cpu2 = cpu1 endif - SOFT_TOUCH elec_coord_full pop_weight_mult + SOFT_TOUCH elec_coord_full srmc_pop_weight_mult first_loop = .False. @@ -314,7 +354,7 @@ END_SHELL END_PROVIDER -BEGIN_PROVIDER [ double precision, pop_weight_mult ] +BEGIN_PROVIDER [ double precision, srmc_pop_weight_mult ] implicit none BEGIN_DOC ! Population weight of SRMC @@ -335,12 +375,12 @@ END_PROVIDER srmc_projection_step = 0 END_PROVIDER -BEGIN_PROVIDER [ double precision, pop_weight, (0:srmc_projection+1) ] +BEGIN_PROVIDER [ double precision, srmc_pop_weight, (0:srmc_projection+1) ] implicit none BEGIN_DOC ! Population weight of SRMC END_DOC - pop_weight = 1.d0 + srmc_pop_weight = 1.d0 END_PROVIDER