1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 20:36:01 +01:00

Accelerated HPC AO->MO transformation

This commit is contained in:
Anthony Scemama 2022-03-28 17:58:03 +02:00
parent 1b0bfd40be
commit bab87884cd

View File

@ -803,25 +803,70 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
} }
} }
for (int64_t n=0 ; n < nidx ; ++n) { int64_t n;
for (n=0 ; n < nidx-4 ; n+=4) {
int64_t k = idx[n]; int64_t k = idx[n];
const double* restrict ck1 = coef_normalized_t + k*mo_num; const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num;
if (avgl1[n] != 0.) { const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num;
const double a1 = av1[n]; const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num;
const double a2 = av2[n]; const double* restrict ck4 = coef_normalized_t + idx[n+3]*mo_num;
const double a3 = av3[n];
const double a4 = av4[n]; const double a11 = av1[n ];
const double a5 = av5[n]; const double a21 = av1[n+1];
const double a31 = av1[n+2];
const double a41 = av1[n+3];
const double a12 = av2[n ];
const double a22 = av2[n+1];
const double a32 = av2[n+2];
const double a42 = av2[n+3];
const double a13 = av3[n ];
const double a23 = av3[n+1];
const double a33 = av3[n+2];
const double a43 = av3[n+3];
const double a14 = av4[n ];
const double a24 = av4[n+1];
const double a34 = av4[n+2];
const double a44 = av4[n+3];
const double a15 = av5[n ];
const double a25 = av5[n+1];
const double a35 = av5[n+2];
const double a45 = av5[n+3];
#ifdef HAVE_OPENMP
#pragma omp simd
#endif
for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
}
}
int64_t n0 = nidx-4;
n0 = n0 < 0 ? 0 : n0;
for (int64_t n=n0 ; n < nidx ; n+=1) {
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
const double a1 = av1[n];
const double a2 = av2[n];
const double a3 = av3[n];
const double a4 = av4[n];
const double a5 = av5[n];
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] += ck1[i] * a1; vgl1[i] += ck[i] * a1;
vgl2[i] += ck1[i] * a2; vgl2[i] += ck[i] * a2;
vgl3[i] += ck1[i] * a3; vgl3[i] += ck[i] * a3;
vgl4[i] += ck1[i] * a4; vgl4[i] += ck[i] * a4;
vgl5[i] += ck1[i] * a5; vgl5[i] += ck[i] * a5;
}
} }
} }
} }