From 5060bde30fd224285b86a7f33d25edd1181b33f7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 30 Jan 2024 23:46:06 +0100 Subject: [PATCH 1/2] Moved ivdep after omp simd --- org/qmckl_ao.org | 16 ++++++++-------- org/qmckl_mo.org | 32 ++++++++++++++++---------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index db8a9c6..03e46f4 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -5944,28 +5944,28 @@ IVDEP ao_value_1[0] = s1 * f[0]; break; case 3: -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int il=0 ; il<3 ; ++il) { ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; } break; case(6): -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int il=0 ; il<6 ; ++il) { ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; } break; default: -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int il=0 ; il= nidx) break; -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int j=0 ; j<8 ; ++j) { ce_mat[i][j] = ce_mat[i][j] + v[l] * exp_mat[k][j]; } @@ -6814,10 +6814,10 @@ IVDEP ao_vgl_5[0] = s5 * f[0]; break; case 3: -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int il=0 ; il<3 ; ++il) { ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; @@ -6830,10 +6830,10 @@ IVDEP } break; case 6: -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int il=0 ; il<6 ; ++il) { ao_vgl_1[il] = poly_vgl_1[il] * s1 * f[il]; ao_vgl_2[il] = (poly_vgl_2[il] * s1 + poly_vgl_1[il] * s2) * f[il]; @@ -6846,10 +6846,10 @@ IVDEP } break; default: -IVDEP #ifdef HAVE_OPENMP #pragma omp simd #endif +IVDEP for (int il=0 ; il Date: Tue, 6 Feb 2024 22:27:17 +0100 Subject: [PATCH 2/2] Fix memset --- org/qmckl_ao.org | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index db8a9c6..f09faff 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -5759,8 +5759,6 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, { <> - memset(ao_value, 0, ao_num*point_num*sizeof(double)); - #ifdef HAVE_OPENMP #pragma omp parallel if (point_num > 16) #endif @@ -5800,6 +5798,9 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, #pragma omp for #endif for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { + + memset(&ao_value[ipoint*ao_num], 0, ao_num*sizeof(double)); + const double e_coord[3] __attribute__((aligned(64))) = { coord[ipoint], coord[ipoint + point_num], @@ -6537,8 +6538,6 @@ qmckl_compute_ao_vgl_hpc_gaussian ( { <> - memset(ao_vgl, 0, 5*ao_num*point_num*sizeof(double)); - #ifdef HAVE_OPENMP #pragma omp parallel if (point_num > 16) #endif @@ -6590,6 +6589,9 @@ qmckl_compute_ao_vgl_hpc_gaussian ( #pragma omp for #endif for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { + + memset(&ao_vgl[ipoint*ao_num*5], 0, 5*ao_num*sizeof(double)); + const double e_coord[3] __attribute__((aligned(64))) = { coord[ipoint], coord[ipoint + point_num],