mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-03 20:54:09 +01:00
Improved vectorization of mo_value
This commit is contained in:
parent
098b6deec3
commit
43ebd409a8
@ -1339,23 +1339,32 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
|
||||
|
||||
int64_t n=0;
|
||||
|
||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
||||
for (n=0 ; n < nidx-8 ; n+=8) {
|
||||
const float* restrict ck1 = coefficient_t_sp + idx[n ]*mo_num;
|
||||
const float* restrict ck2 = coefficient_t_sp + idx[n+1]*mo_num;
|
||||
const float* restrict ck3 = coefficient_t_sp + idx[n+2]*mo_num;
|
||||
const float* restrict ck4 = coefficient_t_sp + idx[n+3]*mo_num;
|
||||
const float* restrict ck5 = coefficient_t_sp + idx[n+4]*mo_num;
|
||||
const float* restrict ck6 = coefficient_t_sp + idx[n+5]*mo_num;
|
||||
const float* restrict ck7 = coefficient_t_sp + idx[n+6]*mo_num;
|
||||
const float* restrict ck8 = coefficient_t_sp + idx[n+7]*mo_num;
|
||||
|
||||
const float a11 = av1[n ];
|
||||
const float a21 = av1[n+1];
|
||||
const float a31 = av1[n+2];
|
||||
const float a41 = av1[n+3];
|
||||
const float a51 = av1[n+4];
|
||||
const float a61 = av1[n+5];
|
||||
const float a71 = av1[n+6];
|
||||
const float a81 = av1[n+7];
|
||||
|
||||
IVDEP
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
||||
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
||||
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1453,23 +1462,32 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
|
||||
|
||||
int64_t n=0;
|
||||
|
||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
||||
for (n=0 ; n < nidx-8 ; n+=8) {
|
||||
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
|
||||
const double* restrict ck2 = coefficient_t + idx[n+1]*mo_num;
|
||||
const double* restrict ck3 = coefficient_t + idx[n+2]*mo_num;
|
||||
const double* restrict ck4 = coefficient_t + idx[n+3]*mo_num;
|
||||
const double* restrict ck5 = coefficient_t + idx[n+4]*mo_num;
|
||||
const double* restrict ck6 = coefficient_t + idx[n+5]*mo_num;
|
||||
const double* restrict ck7 = coefficient_t + idx[n+6]*mo_num;
|
||||
const double* restrict ck8 = coefficient_t + idx[n+7]*mo_num;
|
||||
|
||||
const double a11 = av1[n ];
|
||||
const double a21 = av1[n+1];
|
||||
const double a31 = av1[n+2];
|
||||
const double a41 = av1[n+3];
|
||||
const double a51 = av1[n+4];
|
||||
const double a61 = av1[n+5];
|
||||
const double a71 = av1[n+6];
|
||||
const double a81 = av1[n+7];
|
||||
|
||||
IVDEP
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
||||
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
||||
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user