From 43ebd409a879c19a7b65f84c77f66d0dfa604892 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 29 Jan 2024 11:59:39 +0100 Subject: [PATCH] Improved vectorization of mo_value --- org/qmckl_mo.org | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 6599d98..ce83ab3 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -1339,23 +1339,32 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context, int64_t n=0; - for (n=0 ; n < nidx-4 ; n+=4) { + for (n=0 ; n < nidx-8 ; n+=8) { const float* restrict ck1 = coefficient_t_sp + idx[n ]*mo_num; const float* restrict ck2 = coefficient_t_sp + idx[n+1]*mo_num; const float* restrict ck3 = coefficient_t_sp + idx[n+2]*mo_num; const float* restrict ck4 = coefficient_t_sp + idx[n+3]*mo_num; + const float* restrict ck5 = coefficient_t_sp + idx[n+4]*mo_num; + const float* restrict ck6 = coefficient_t_sp + idx[n+5]*mo_num; + const float* restrict ck7 = coefficient_t_sp + idx[n+6]*mo_num; + const float* restrict ck8 = coefficient_t_sp + idx[n+7]*mo_num; const float a11 = av1[n ]; const float a21 = av1[n+1]; const float a31 = av1[n+2]; const float a41 = av1[n+3]; + const float a51 = av1[n+4]; + const float a61 = av1[n+5]; + const float a71 = av1[n+6]; + const float a81 = av1[n+7]; IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif for (int64_t i=0 ; i