Resolved some warnings of icx

This commit is contained in:
Francois Coppens 2022-07-21 13:57:28 +02:00
parent ebe38e79e3
commit fa03590f6f
4 changed files with 23 additions and 24 deletions

File diff suppressed because one or more lines are too long

View File

@ -18,7 +18,7 @@ done
# Generate C-header file # Generate C-header file
NELEMENTS=${#SELECTION[@]} NELEMENTS=${#SELECTION[@]}
echo "const uint32_t n_cycles = $NELEMENTS;" > ${NU}_cycles.h echo "enum { n_cycles = $NELEMENTS };" > ${NU}_cycles.h
echo -n "uint32_t cycles[n_cycles] = {" >> ${NU}_cycles.h echo -n "uint32_t cycles[n_cycles] = {" >> ${NU}_cycles.h
for VAL in "${SELECTION[@]}" for VAL in "${SELECTION[@]}"
do do

View File

@ -30,7 +30,7 @@ uint32_t qmckl_sherman_morrison(
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0; C[i] = 0.0;
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l. C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
} }
@ -50,7 +50,7 @@ uint32_t qmckl_sherman_morrison(
*determinant *= den; *determinant *= den;
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j]; // selecting proper column of v_l^T * S_inv D[j] = Slater_inv[cui * LDS + j]; // selecting proper column of v_l^T * S_inv
} }
@ -58,7 +58,7 @@ uint32_t qmckl_sherman_morrison(
// A^{-1} = A^{-1} - C x D / den // A^{-1} = A^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden; const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update; Slater_inv[i * LDS + j] -= update;
@ -98,7 +98,7 @@ uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
C[i * 2] = 0; C[i * 2] = 0;
C[i * 2 + 1] = 0; C[i * 2 + 1] = 0;
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t k = 0; k < LDS; k++) { for (uint32_t k = 0; k < LDS; k++) {
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k]; C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k]; C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
@ -133,7 +133,7 @@ uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
double *__restrict r1dim = &(Slater_inv[row1 * LDS]); double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
double *__restrict r2dim = &(Slater_inv[row2 * LDS]); double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j]; tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j]; tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
@ -142,7 +142,7 @@ uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
// Compute (S^T)^{-1} - C * tmp : Dim x LDS // Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j]; Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j]; Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
@ -183,7 +183,7 @@ uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
C[i * 3 + 1] = 0; C[i * 3 + 1] = 0;
C[i * 3 + 2] = 0; C[i * 3 + 2] = 0;
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t k = 0; k < LDS; k++) { for (uint32_t k = 0; k < LDS; k++) {
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k]; C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k]; C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
@ -232,7 +232,7 @@ uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
double *__restrict r2dim = &(Slater_inv[row2 * LDS]); double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
double *__restrict r3dim = &(Slater_inv[row3 * LDS]); double *__restrict r3dim = &(Slater_inv[row3 * LDS]);
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j]; tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
tmp[LDS + j] = Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j]; tmp[LDS + j] = Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
@ -242,7 +242,7 @@ uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
// Compute (S^T)^{-1} - C * tmp : Dim x LDS // Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j]; Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j]; Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
@ -301,7 +301,7 @@ uint32_t qmckl_woodbury_k(const uint64_t vLDS,
double det = 1.0; double det = 1.0;
int j = 0; int j = 0;
for (uint32_t i = 0; i < N_updates; i++) { for (uint32_t i = 0; i < N_updates; i++) {
j += min(abs(ipiv[i] - i), 1); j += min(ipiv[i] - i, 1);
det *= B[(N_updates + 1) * i]; det *= B[(N_updates + 1) * i];
} }
if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even
@ -398,7 +398,7 @@ uint32_t qmckl_woodbury_k_cublas_offload(const uint64_t vLDS,
double det = 1.0; double det = 1.0;
int j = 0; int j = 0;
for (uint32_t i = 0; i < N_updates; i++) { for (uint32_t i = 0; i < N_updates; i++) {
j += min(abs(ipiv[i] - i), 1); j += min(ipiv[i] - i, 1);
det *= B[(N_updates + 1) * i]; det *= B[(N_updates + 1) * i];
} }
if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even
@ -456,7 +456,7 @@ uint32_t qmckl_slagel_splitting(
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0; C[i] = 0.0;
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l. C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
} }
@ -473,7 +473,7 @@ uint32_t qmckl_slagel_splitting(
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve // U_l = U_l / 2: split the update in 2 equal halves and save the second halve
// in later_updates // in later_updates
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t i = 0; i < LDS; i++) { for (uint32_t i = 0; i < LDS; i++) {
later_updates[*later * LDS + i] = Updates[l * LDS + i] / 2.0; later_updates[*later * LDS + i] = Updates[l * LDS + i] / 2.0;
C[i] /= 2.0; C[i] /= 2.0;
@ -489,7 +489,7 @@ uint32_t qmckl_slagel_splitting(
// D = v^T x S^{-1} : 1 x LDS // D = v^T x S^{-1} : 1 x LDS
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j]; D[j] = Slater_inv[cui * LDS + j];
} }
@ -497,7 +497,7 @@ uint32_t qmckl_slagel_splitting(
// S^{-1} = S^{-1} - C x D / den // S^{-1} = S^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden; const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update; Slater_inv[i * LDS + j] -= update;
@ -692,7 +692,7 @@ uint32_t qmckl_sherman_morrison_later(
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0; C[i] = 0.0;
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l. C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
} }
@ -703,7 +703,7 @@ uint32_t qmckl_sherman_morrison_later(
double den = 1.0 + C[cui]; double den = 1.0 + C[cui];
if (fabs(den) < breakdown) { if (fabs(den) < breakdown) {
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
// for (uint32_t i = 0; i < Dim; i++) { // for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t i = 0; i < LDS; i++) { for (uint32_t i = 0; i < LDS; i++) {
later_updates[later * LDS + i] = Updates[l * LDS + i]; later_updates[later * LDS + i] = Updates[l * LDS + i];
@ -719,7 +719,7 @@ uint32_t qmckl_sherman_morrison_later(
// D = v^T x A^{-1} // D = v^T x A^{-1}
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j]; D[j] = Slater_inv[cui * LDS + j];
} }
@ -727,7 +727,7 @@ uint32_t qmckl_sherman_morrison_later(
// S^{-1} = S^{-1} - C x D / den // S^{-1} = S^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep #pragma ivdep
#pragma vector aligned, novecremainder #pragma vector aligned
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden; const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update; Slater_inv[i * LDS + j] -= update;

View File

@ -4,7 +4,7 @@
#define DATASET "dataset_329d_zeropadded_cm.hdf5" #define DATASET "dataset_329d_zeropadded_cm.hdf5"
// #define DATASET "dataset_15784d_zeropadded_cm.hdf5" // #define DATASET "dataset_15784d_zeropadded_cm.hdf5"
#define REPETITIONS 1000000 #define REPETITIONS 100000
uint64_t n_splits; uint64_t n_splits;
uint64_t block_fail; uint64_t block_fail;
@ -15,8 +15,7 @@ int main(int argc, char **argv) {
char *version = argv[1]; char *version = argv[1];
// SETUP STORAGE AND DATA ACCESS // SETUP STORAGE AND DATA ACCESS
hid_t file_id, dataset_id; hid_t file_id;
herr_t status;
file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT); file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT);
char nupds_key[32]; char nupds_key[32];
char upd_idx_key[32]; char upd_idx_key[32];