1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-05 11:00:36 +01:00

Moved ivdep after omp simd

This commit is contained in:
Anthony Scemama 2024-01-30 23:46:06 +01:00
parent dd3db966b0
commit 5060bde30f
2 changed files with 24 additions and 24 deletions

View File

@ -5944,28 +5944,28 @@ IVDEP
ao_value_1[0] = s1 * f[0]; ao_value_1[0] = s1 * f[0];
break; break;
case 3: case 3:
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int il=0 ; il<3 ; ++il) { for (int il=0 ; il<3 ; ++il) {
ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_value_1[il] = poly_vgl_1[il] * s1 * f[il];
} }
break; break;
case(6): case(6):
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int il=0 ; il<6 ; ++il) { for (int il=0 ; il<6 ; ++il) {
ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_value_1[il] = poly_vgl_1[il] * s1 * f[il];
} }
break; break;
default: default:
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int il=0 ; il<n ; ++il) { for (int il=0 ; il<n ; ++il) {
ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_value_1[il] = poly_vgl_1[il] * s1 * f[il];
} }
@ -6713,10 +6713,10 @@ qmckl_compute_ao_vgl_hpc_gaussian (
// if (do_sparse) { // if (do_sparse) {
for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) { for (int i=0 ; i<nucleus_shell_num[inucl] ; ++i) {
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int j=0 ; j<8 ; ++j) { for (int j=0 ; j<8 ; ++j) {
ce_mat[i][j] = 0.; ce_mat[i][j] = 0.;
} }
@ -6725,10 +6725,10 @@ IVDEP
for (int l=1 ; l<idx[0]; ++l) { for (int l=1 ; l<idx[0]; ++l) {
const int k = idx[l]; const int k = idx[l];
if (k >= nidx) break; if (k >= nidx) break;
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int j=0 ; j<8 ; ++j) { for (int j=0 ; j<8 ; ++j) {
ce_mat[i][j] = ce_mat[i][j] + v[l] * exp_mat[k][j]; ce_mat[i][j] = ce_mat[i][j] + v[l] * exp_mat[k][j];
} }
@ -6814,10 +6814,10 @@ IVDEP
ao_vgl_5[0] = s5 * f[0]; ao_vgl_5[0] = s5 * f[0];
break; break;
case 3: case 3:
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int il=0 ; il<3 ; ++il) { for (int il=0 ; il<3 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
@ -6830,10 +6830,10 @@ IVDEP
} }
break; break;
case 6: case 6:
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int il=0 ; il<6 ; ++il) { for (int il=0 ; il<6 ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];
@ -6846,10 +6846,10 @@ IVDEP
} }
break; break;
default: default:
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int il=0 ; il<n ; ++il) { for (int il=0 ; il<n ; ++il) {
ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il];
ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il];

View File

@ -1301,10 +1301,10 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
"coefficient_t_sp"); "coefficient_t_sp");
}; };
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num*ao_num ; ++i) { for (int64_t i=0 ; i<mo_num*ao_num ; ++i) {
coefficient_t_sp[i] = (float) coefficient_t[i]; coefficient_t_sp[i] = (float) coefficient_t[i];
} }
@ -1358,10 +1358,10 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
const float a71 = av1[n+6]; const float a71 = av1[n+6];
const float a81 = av1[n+7]; const float a81 = av1[n+7];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl_sp[i] = vgl_sp[i] + vgl_sp[i] = vgl_sp[i] +
ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
@ -1373,19 +1373,19 @@ qmckl_compute_mo_basis_mo_value_hpc_sp (const qmckl_context context,
const float* restrict ck = coefficient_t_sp + idx[m]*mo_num; const float* restrict ck = coefficient_t_sp + idx[m]*mo_num;
const float a1 = av1[m]; const float a1 = av1[m];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl_sp[i] = vgl_sp[i] + ck[i] * a1; vgl_sp[i] = vgl_sp[i] + ck[i] * a1;
} }
} }
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = (double) vgl_sp[i]; vgl1[i] = (double) vgl_sp[i];
} }
@ -1482,10 +1482,10 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
const double a71 = av1[n+6]; const double a71 = av1[n+6];
const double a81 = av1[n+7]; const double a81 = av1[n+7];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + vgl1[i] = vgl1[i] +
ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41 +
@ -1497,10 +1497,10 @@ qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context,
const double* restrict ck = coefficient_t + idx[m]*mo_num; const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m]; const double a1 = av1[m];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck[i] * a1; vgl1[i] = vgl1[i] + ck[i] * a1;
} }
@ -2038,10 +2038,10 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
const double a35 = av5[n+2]; const double a35 = av5[n+2];
const double a45 = av5[n+3]; const double a45 = av5[n+3];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41; vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42; vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
@ -2059,10 +2059,10 @@ IVDEP
const double a4 = av4[m]; const double a4 = av4[m];
const double a5 = av5[m]; const double a5 = av5[m];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck[i] * a1; vgl1[i] = vgl1[i] + ck[i] * a1;
vgl2[i] = vgl2[i] + ck[i] * a2; vgl2[i] = vgl2[i] + ck[i] * a2;
@ -2120,10 +2120,10 @@ qmckl_compute_mo_basis_mo_vgl_hpc_sp (const qmckl_context context,
"coefficient_t_sp"); "coefficient_t_sp");
}; };
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num*ao_num ; ++i) { for (int64_t i=0 ; i<mo_num*ao_num ; ++i) {
coefficient_t_sp[i] = (float) coefficient_t[i]; coefficient_t_sp[i] = (float) coefficient_t[i];
} }
@ -2219,10 +2219,10 @@ qmckl_compute_mo_basis_mo_vgl_hpc_sp (const qmckl_context context,
const float a35 = av5[n+2]; const float a35 = av5[n+2];
const float a45 = av5[n+3]; const float a45 = av5[n+3];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl_sp1[i] = vgl_sp1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41; vgl_sp1[i] = vgl_sp1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl_sp2[i] = vgl_sp2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42; vgl_sp2[i] = vgl_sp2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
@ -2240,10 +2240,10 @@ qmckl_compute_mo_basis_mo_vgl_hpc_sp (const qmckl_context context,
const float a4 = av4[m]; const float a4 = av4[m];
const float a5 = av5[m]; const float a5 = av5[m];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl_sp1[i] = vgl_sp1[i] + ck[i] * a1; vgl_sp1[i] = vgl_sp1[i] + ck[i] * a1;
vgl_sp2[i] = vgl_sp2[i] + ck[i] * a2; vgl_sp2[i] = vgl_sp2[i] + ck[i] * a2;
@ -2252,10 +2252,10 @@ qmckl_compute_mo_basis_mo_vgl_hpc_sp (const qmckl_context context,
vgl_sp5[i] = vgl_sp5[i] + ck[i] * a5; vgl_sp5[i] = vgl_sp5[i] + ck[i] * a5;
} }
} }
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = (double) vgl_sp1[i]; vgl1[i] = (double) vgl_sp1[i];
vgl2[i] = (double) vgl_sp2[i]; vgl2[i] = (double) vgl_sp2[i];
@ -2583,10 +2583,10 @@ qmckl_compute_mo_basis_mo_value_cusp_hpc (const qmckl_context context,
const double a31 = av1[n+2]; const double a31 = av1[n+2];
const double a41 = av1[n+3]; const double a41 = av1[n+3];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41; vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
} }
@ -2596,10 +2596,10 @@ IVDEP
const double* restrict ck = coefficient_t + idx[m]*mo_num; const double* restrict ck = coefficient_t + idx[m]*mo_num;
const double a1 = av1[m]; const double a1 = av1[m];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck[i] * a1; vgl1[i] = vgl1[i] + ck[i] * a1;
} }
@ -3008,10 +3008,10 @@ qmckl_compute_mo_basis_mo_vgl_cusp_hpc (const qmckl_context context,
const double a35 = av5[n+2]; const double a35 = av5[n+2];
const double a45 = av5[n+3]; const double a45 = av5[n+3];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41; vgl1[i] = vgl1[i] + ck1[i] * a11 + ck2[i] * a21 + ck3[i] * a31 + ck4[i] * a41;
vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42; vgl2[i] = vgl2[i] + ck1[i] * a12 + ck2[i] * a22 + ck3[i] * a32 + ck4[i] * a42;
@ -3029,10 +3029,10 @@ IVDEP
const double a4 = av4[m]; const double a4 = av4[m];
const double a5 = av5[m]; const double a5 = av5[m];
IVDEP
#ifdef HAVE_OPENMP #ifdef HAVE_OPENMP
#pragma omp simd #pragma omp simd
#endif #endif
IVDEP
for (int64_t i=0 ; i<mo_num ; ++i) { for (int64_t i=0 ; i<mo_num ; ++i) {
vgl1[i] = vgl1[i] + ck[i] * a1; vgl1[i] = vgl1[i] + ck[i] * a1;
vgl2[i] = vgl2[i] + ck[i] * a2; vgl2[i] = vgl2[i] + ck[i] * a2;