1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-07-03 09:56:10 +02:00

Improved vectorization of mo_value

This commit is contained in:
Anthony Scemama 2024-01-29 11:59:39 +01:00
parent 098b6deec3
commit 43ebd409a8

View File

@ -1339,23 +1339,32 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
int64_t n=0;
for (n=0 ; n < nidx-4 ; n+=4) {
for (n=0 ; n < nidx-8 ; n+=8) {
const float* restrict ck1 = coefficient_t_sp + idx[n ]*mo_num;
const float* restrict ck2 = coefficient_t_sp + idx[n+1]*mo_num;
const float* restrict ck3 = coefficient_t_sp + idx[n+2]*mo_num;
const float* restrict ck4 = coefficient_t_sp + idx[n+3]*mo_num;
const float* restrict ck5 = coefficient_t_sp + idx[n+4]*mo_num;
const float* restrict ck6 = coefficient_t_sp + idx[n+5]*mo_num;
const float* restrict ck7 = coefficient_t_sp + idx[n+6]*mo_num;
const float* restrict ck8 = coefficient_t_sp + idx[n+7]*mo_num;
const float a11 = av1[n ];
const float a21 = av1[n+1];
const float a31 = av1[n+2];
const float a41 = av1[n+3];
const float a51 = av1[n+4];
const float a61 = av1[n+5];
const float a71 = av1[n+6];
const float a81 = av1[n+7];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
}
}
@ -1453,23 +1462,32 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
int64_t n=0;
for (n=0 ; n < nidx-4 ; n+=4) {
for (n=0 ; n < nidx-8 ; n+=8) {
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
const double* restrict ck2 = coefficient_t + idx[n+1]*mo_num;
const double* restrict ck3 = coefficient_t + idx[n+2]*mo_num;
const double* restrict ck4 = coefficient_t + idx[n+3]*mo_num;
const double* restrict ck5 = coefficient_t + idx[n+4]*mo_num;
const double* restrict ck6 = coefficient_t + idx[n+5]*mo_num;
const double* restrict ck7 = coefficient_t + idx[n+6]*mo_num;
const double* restrict ck8 = coefficient_t + idx[n+7]*mo_num;
const double a11 = av1[n ];
const double a21 = av1[n+1];
const double a31 = av1[n+2];
const double a41 = av1[n+3];
const double a51 = av1[n+4];
const double a61 = av1[n+5];
const double a71 = av1[n+6];
const double a81 = av1[n+7];
IVDEP
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
}
}