mirror of
https://github.com/TREX-CoE/Sherman-Morrison.git
synced 2024-12-25 05:43:54 +01:00
Resolved some warnings of icx
This commit is contained in:
parent
ebe38e79e3
commit
fa03590f6f
File diff suppressed because one or more lines are too long
@ -18,7 +18,7 @@ done
|
|||||||
|
|
||||||
# Generate C-header file
|
# Generate C-header file
|
||||||
NELEMENTS=${#SELECTION[@]}
|
NELEMENTS=${#SELECTION[@]}
|
||||||
echo "const uint32_t n_cycles = $NELEMENTS;" > ${NU}_cycles.h
|
echo "enum { n_cycles = $NELEMENTS };" > ${NU}_cycles.h
|
||||||
echo -n "uint32_t cycles[n_cycles] = {" >> ${NU}_cycles.h
|
echo -n "uint32_t cycles[n_cycles] = {" >> ${NU}_cycles.h
|
||||||
for VAL in "${SELECTION[@]}"
|
for VAL in "${SELECTION[@]}"
|
||||||
do
|
do
|
||||||
|
@ -30,7 +30,7 @@ uint32_t qmckl_sherman_morrison(
|
|||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
C[i] = 0.0;
|
C[i] = 0.0;
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ uint32_t qmckl_sherman_morrison(
|
|||||||
*determinant *= den;
|
*determinant *= den;
|
||||||
|
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
D[j] = Slater_inv[cui * LDS + j]; // selecting proper column of v_l^T * S_inv
|
D[j] = Slater_inv[cui * LDS + j]; // selecting proper column of v_l^T * S_inv
|
||||||
}
|
}
|
||||||
@ -58,7 +58,7 @@ uint32_t qmckl_sherman_morrison(
|
|||||||
// A^{-1} = A^{-1} - C x D / den
|
// A^{-1} = A^{-1} - C x D / den
|
||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
const double update = C[i] * D[j] * iden;
|
const double update = C[i] * D[j] * iden;
|
||||||
Slater_inv[i * LDS + j] -= update;
|
Slater_inv[i * LDS + j] -= update;
|
||||||
@ -98,7 +98,7 @@ uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
|
|||||||
C[i * 2] = 0;
|
C[i * 2] = 0;
|
||||||
C[i * 2 + 1] = 0;
|
C[i * 2 + 1] = 0;
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t k = 0; k < LDS; k++) {
|
for (uint32_t k = 0; k < LDS; k++) {
|
||||||
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
||||||
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||||
@ -133,7 +133,7 @@ uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
|
|||||||
double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
|
double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
|
||||||
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
|
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
||||||
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
||||||
@ -142,7 +142,7 @@ uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
|
|||||||
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
||||||
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
||||||
@ -183,7 +183,7 @@ uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
|
|||||||
C[i * 3 + 1] = 0;
|
C[i * 3 + 1] = 0;
|
||||||
C[i * 3 + 2] = 0;
|
C[i * 3 + 2] = 0;
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t k = 0; k < LDS; k++) {
|
for (uint32_t k = 0; k < LDS; k++) {
|
||||||
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
||||||
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||||
@ -232,7 +232,7 @@ uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
|
|||||||
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
|
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
|
||||||
double *__restrict r3dim = &(Slater_inv[row3 * LDS]);
|
double *__restrict r3dim = &(Slater_inv[row3 * LDS]);
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
||||||
tmp[LDS + j] = Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
tmp[LDS + j] = Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
||||||
@ -242,7 +242,7 @@ uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
|
|||||||
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
||||||
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
||||||
@ -301,7 +301,7 @@ uint32_t qmckl_woodbury_k(const uint64_t vLDS,
|
|||||||
double det = 1.0;
|
double det = 1.0;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
for (uint32_t i = 0; i < N_updates; i++) {
|
for (uint32_t i = 0; i < N_updates; i++) {
|
||||||
j += min(abs(ipiv[i] - i), 1);
|
j += min(ipiv[i] - i, 1);
|
||||||
det *= B[(N_updates + 1) * i];
|
det *= B[(N_updates + 1) * i];
|
||||||
}
|
}
|
||||||
if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even
|
if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even
|
||||||
@ -398,7 +398,7 @@ uint32_t qmckl_woodbury_k_cublas_offload(const uint64_t vLDS,
|
|||||||
double det = 1.0;
|
double det = 1.0;
|
||||||
int j = 0;
|
int j = 0;
|
||||||
for (uint32_t i = 0; i < N_updates; i++) {
|
for (uint32_t i = 0; i < N_updates; i++) {
|
||||||
j += min(abs(ipiv[i] - i), 1);
|
j += min(ipiv[i] - i, 1);
|
||||||
det *= B[(N_updates + 1) * i];
|
det *= B[(N_updates + 1) * i];
|
||||||
}
|
}
|
||||||
if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even
|
if ((j & 1) == 0) det = -det; // multiply det with -1 if j is even
|
||||||
@ -456,7 +456,7 @@ uint32_t qmckl_slagel_splitting(
|
|||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
C[i] = 0.0;
|
C[i] = 0.0;
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
||||||
}
|
}
|
||||||
@ -473,7 +473,7 @@ uint32_t qmckl_slagel_splitting(
|
|||||||
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve
|
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve
|
||||||
// in later_updates
|
// in later_updates
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t i = 0; i < LDS; i++) {
|
for (uint32_t i = 0; i < LDS; i++) {
|
||||||
later_updates[*later * LDS + i] = Updates[l * LDS + i] / 2.0;
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] / 2.0;
|
||||||
C[i] /= 2.0;
|
C[i] /= 2.0;
|
||||||
@ -489,7 +489,7 @@ uint32_t qmckl_slagel_splitting(
|
|||||||
|
|
||||||
// D = v^T x S^{-1} : 1 x LDS
|
// D = v^T x S^{-1} : 1 x LDS
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
D[j] = Slater_inv[cui * LDS + j];
|
D[j] = Slater_inv[cui * LDS + j];
|
||||||
}
|
}
|
||||||
@ -497,7 +497,7 @@ uint32_t qmckl_slagel_splitting(
|
|||||||
// S^{-1} = S^{-1} - C x D / den
|
// S^{-1} = S^{-1} - C x D / den
|
||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
const double update = C[i] * D[j] * iden;
|
const double update = C[i] * D[j] * iden;
|
||||||
Slater_inv[i * LDS + j] -= update;
|
Slater_inv[i * LDS + j] -= update;
|
||||||
@ -692,7 +692,7 @@ uint32_t qmckl_sherman_morrison_later(
|
|||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
C[i] = 0.0;
|
C[i] = 0.0;
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
||||||
}
|
}
|
||||||
@ -703,7 +703,7 @@ uint32_t qmckl_sherman_morrison_later(
|
|||||||
double den = 1.0 + C[cui];
|
double den = 1.0 + C[cui];
|
||||||
if (fabs(den) < breakdown) {
|
if (fabs(den) < breakdown) {
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
// for (uint32_t i = 0; i < Dim; i++) {
|
// for (uint32_t i = 0; i < Dim; i++) {
|
||||||
for (uint32_t i = 0; i < LDS; i++) {
|
for (uint32_t i = 0; i < LDS; i++) {
|
||||||
later_updates[later * LDS + i] = Updates[l * LDS + i];
|
later_updates[later * LDS + i] = Updates[l * LDS + i];
|
||||||
@ -719,7 +719,7 @@ uint32_t qmckl_sherman_morrison_later(
|
|||||||
|
|
||||||
// D = v^T x A^{-1}
|
// D = v^T x A^{-1}
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
D[j] = Slater_inv[cui * LDS + j];
|
D[j] = Slater_inv[cui * LDS + j];
|
||||||
}
|
}
|
||||||
@ -727,7 +727,7 @@ uint32_t qmckl_sherman_morrison_later(
|
|||||||
// S^{-1} = S^{-1} - C x D / den
|
// S^{-1} = S^{-1} - C x D / den
|
||||||
for (uint32_t i = 0; i < Dim; i++) {
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
#pragma ivdep
|
#pragma ivdep
|
||||||
#pragma vector aligned, novecremainder
|
#pragma vector aligned
|
||||||
for (uint32_t j = 0; j < LDS; j++) {
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
const double update = C[i] * D[j] * iden;
|
const double update = C[i] * D[j] * iden;
|
||||||
Slater_inv[i * LDS + j] -= update;
|
Slater_inv[i * LDS + j] -= update;
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
#define DATASET "dataset_329d_zeropadded_cm.hdf5"
|
#define DATASET "dataset_329d_zeropadded_cm.hdf5"
|
||||||
// #define DATASET "dataset_15784d_zeropadded_cm.hdf5"
|
// #define DATASET "dataset_15784d_zeropadded_cm.hdf5"
|
||||||
#define REPETITIONS 1000000
|
#define REPETITIONS 100000
|
||||||
|
|
||||||
uint64_t n_splits;
|
uint64_t n_splits;
|
||||||
uint64_t block_fail;
|
uint64_t block_fail;
|
||||||
@ -15,8 +15,7 @@ int main(int argc, char **argv) {
|
|||||||
char *version = argv[1];
|
char *version = argv[1];
|
||||||
|
|
||||||
// SETUP STORAGE AND DATA ACCESS
|
// SETUP STORAGE AND DATA ACCESS
|
||||||
hid_t file_id, dataset_id;
|
hid_t file_id;
|
||||||
herr_t status;
|
|
||||||
file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT);
|
file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT);
|
||||||
char nupds_key[32];
|
char nupds_key[32];
|
||||||
char upd_idx_key[32];
|
char upd_idx_key[32];
|
||||||
|
Loading…
Reference in New Issue
Block a user