mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-07-22 18:57:40 +02:00
Improved vectorization of mo_value
This commit is contained in:
parent
098b6deec3
commit
43ebd409a8
@ -1339,23 +1339,32 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
|
|||||||
|
|
||||||
int64_t n=0;
|
int64_t n=0;
|
||||||
|
|
||||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
for (n=0 ; n < nidx-8 ; n+=8) {
|
||||||
const float* restrict ck1 = coefficient_t_sp + idx[n ]*mo_num;
|
const float* restrict ck1 = coefficient_t_sp + idx[n ]*mo_num;
|
||||||
const float* restrict ck2 = coefficient_t_sp + idx[n+1]*mo_num;
|
const float* restrict ck2 = coefficient_t_sp + idx[n+1]*mo_num;
|
||||||
const float* restrict ck3 = coefficient_t_sp + idx[n+2]*mo_num;
|
const float* restrict ck3 = coefficient_t_sp + idx[n+2]*mo_num;
|
||||||
const float* restrict ck4 = coefficient_t_sp + idx[n+3]*mo_num;
|
const float* restrict ck4 = coefficient_t_sp + idx[n+3]*mo_num;
|
||||||
|
const float* restrict ck5 = coefficient_t_sp + idx[n+4]*mo_num;
|
||||||
|
const float* restrict ck6 = coefficient_t_sp + idx[n+5]*mo_num;
|
||||||
|
const float* restrict ck7 = coefficient_t_sp + idx[n+6]*mo_num;
|
||||||
|
const float* restrict ck8 = coefficient_t_sp + idx[n+7]*mo_num;
|
||||||
|
|
||||||
const float a11 = av1[n ];
|
const float a11 = av1[n ];
|
||||||
const float a21 = av1[n+1];
|
const float a21 = av1[n+1];
|
||||||
const float a31 = av1[n+2];
|
const float a31 = av1[n+2];
|
||||||
const float a41 = av1[n+3];
|
const float a41 = av1[n+3];
|
||||||
|
const float a51 = av1[n+4];
|
||||||
|
const float a61 = av1[n+5];
|
||||||
|
const float a71 = av1[n+6];
|
||||||
|
const float a81 = av1[n+7];
|
||||||
|
|
||||||
IVDEP
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
vgl_sp[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
||||||
|
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1453,23 +1462,32 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
|
|||||||
|
|
||||||
int64_t n=0;
|
int64_t n=0;
|
||||||
|
|
||||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
for (n=0 ; n < nidx-8 ; n+=8) {
|
||||||
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
|
const double* restrict ck1 = coefficient_t + idx[n ]*mo_num;
|
||||||
const double* restrict ck2 = coefficient_t + idx[n+1]*mo_num;
|
const double* restrict ck2 = coefficient_t + idx[n+1]*mo_num;
|
||||||
const double* restrict ck3 = coefficient_t + idx[n+2]*mo_num;
|
const double* restrict ck3 = coefficient_t + idx[n+2]*mo_num;
|
||||||
const double* restrict ck4 = coefficient_t + idx[n+3]*mo_num;
|
const double* restrict ck4 = coefficient_t + idx[n+3]*mo_num;
|
||||||
|
const double* restrict ck5 = coefficient_t + idx[n+4]*mo_num;
|
||||||
|
const double* restrict ck6 = coefficient_t + idx[n+5]*mo_num;
|
||||||
|
const double* restrict ck7 = coefficient_t + idx[n+6]*mo_num;
|
||||||
|
const double* restrict ck8 = coefficient_t + idx[n+7]*mo_num;
|
||||||
|
|
||||||
const double a11 = av1[n ];
|
const double a11 = av1[n ];
|
||||||
const double a21 = av1[n+1];
|
const double a21 = av1[n+1];
|
||||||
const double a31 = av1[n+2];
|
const double a31 = av1[n+2];
|
||||||
const double a41 = av1[n+3];
|
const double a41 = av1[n+3];
|
||||||
|
const double a51 = av1[n+4];
|
||||||
|
const double a61 = av1[n+5];
|
||||||
|
const double a71 = av1[n+6];
|
||||||
|
const double a81 = av1[n+7];
|
||||||
|
|
||||||
IVDEP
|
IVDEP
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||||
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
vgl1[i] += ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
|
||||||
|
ck5[i] * a51 + ck6[i] * a61 + ck7[i] * a71 + ck8[i] * a81;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user