mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Accelerated HPC AO->MO transformation
This commit is contained in:
parent
1b0bfd40be
commit
bab87884cd
@ -803,25 +803,70 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
}
|
||||
}
|
||||
|
||||
for (int64_t n=0 ; n < nidx ; ++n) {
|
||||
int64_t n;
|
||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
||||
int64_t k = idx[n];
|
||||
const double* restrict ck1 = coef_normalized_t + k*mo_num;
|
||||
if (avgl1[n] != 0.) {
|
||||
const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num;
|
||||
const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num;
|
||||
const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num;
|
||||
const double* restrict ck4 = coef_normalized_t + idx[n+3]*mo_num;
|
||||
|
||||
const double a11 = av1[n ];
|
||||
const double a21 = av1[n+1];
|
||||
const double a31 = av1[n+2];
|
||||
const double a41 = av1[n+3];
|
||||
|
||||
const double a12 = av2[n ];
|
||||
const double a22 = av2[n+1];
|
||||
const double a32 = av2[n+2];
|
||||
const double a42 = av2[n+3];
|
||||
|
||||
const double a13 = av3[n ];
|
||||
const double a23 = av3[n+1];
|
||||
const double a33 = av3[n+2];
|
||||
const double a43 = av3[n+3];
|
||||
|
||||
const double a14 = av4[n ];
|
||||
const double a24 = av4[n+1];
|
||||
const double a34 = av4[n+2];
|
||||
const double a44 = av4[n+3];
|
||||
|
||||
const double a15 = av5[n ];
|
||||
const double a25 = av5[n+1];
|
||||
const double a35 = av5[n+2];
|
||||
const double a45 = av5[n+3];
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
|
||||
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
|
||||
vgl3[i] = vgl3[i] + ck1[i] * a13 + ck2[i] * a23 + ck3[i] * a33 + ck4[i] * a43;
|
||||
vgl4[i] = vgl4[i] + ck1[i] * a14 + ck2[i] * a24 + ck3[i] * a34 + ck4[i] * a44;
|
||||
vgl5[i] = vgl5[i] + ck1[i] * a15 + ck2[i] * a25 + ck3[i] * a35 + ck4[i] * a45;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t n0 = nidx-4;
|
||||
n0 = n0 < 0 ? 0 : n0;
|
||||
for (int64_t n=n0 ; n < nidx ; n+=1) {
|
||||
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
|
||||
const double a1 = av1[n];
|
||||
const double a2 = av2[n];
|
||||
const double a3 = av3[n];
|
||||
const double a4 = av4[n];
|
||||
const double a5 = av5[n];
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
#endif
|
||||
for (int64_t i=0 ; i<mo_num ; ++i) {
|
||||
vgl1[i] += ck1[i] * a1;
|
||||
vgl2[i] += ck1[i] * a2;
|
||||
vgl3[i] += ck1[i] * a3;
|
||||
vgl4[i] += ck1[i] * a4;
|
||||
vgl5[i] += ck1[i] * a5;
|
||||
}
|
||||
vgl1[i] += ck[i] * a1;
|
||||
vgl2[i] += ck[i] * a2;
|
||||
vgl3[i] += ck[i] * a3;
|
||||
vgl4[i] += ck[i] * a4;
|
||||
vgl5[i] += ck[i] * a5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user