1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 12:23:56 +01:00

Added compiler dependent macros that define vectorization pragmas.

This commit is contained in:
Francois Coppens 2023-01-19 15:25:12 +01:00
parent fcf0907b82
commit 9c608166ec

View File

@ -113,6 +113,23 @@ int main() {
#include "qmckl.h" #include "qmckl.h"
#include "config.h" #include "config.h"
// Order important because
// __GNUC__ also set in ICC, ICX and CLANG
// __clang__ also set in ICX
#if defined(__INTEL_COMPILER)
#define IVDEP _Pragma("ivdep")
#define ALIGNED _Pragma("vector aligned")
#elif defined(__INTEL_LLVM_COMPILER)
#define IVDEP _Pragma("ivdep")
#define ALIGNED _Pragma("vector aligned")
#elif defined(__clang__)
#define IVDEP _Pragma("clang loop vectorize(enable)")
#define ALIGNED
#elif defined(__GNUC__)
#define IVDEP _Pragma("GCC ivdep")
#define ALIGNED
#endif
qmckl_exit_code qmckl_sherman_morrison_hpc( qmckl_exit_code qmckl_sherman_morrison_hpc(
const qmckl_context context, const qmckl_context context,
const uint64_t LDS, const uint64_t LDS,
@ -140,8 +157,8 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
// C = S^{-1} x u_l // C = S^{-1} x u_l
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0f; C[i] = 0.0f;
#pragma ivdep IVDEP
#pragma vector aligned ALIGNED
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j];
} }
@ -161,16 +178,16 @@ qmckl_exit_code qmckl_sherman_morrison_hpc(
*determinant *= den; *determinant *= den;
// selecting column: v_l^T * S_inv // selecting column: v_l^T * S_inv
#pragma ivdep IVDEP
#pragma vector aligned ALIGNED
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j]; D[j] = Slater_inv[cui * LDS + j];
} }
// A^{-1} = A^{-1} - C x D / den // A^{-1} = A^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) { for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep IVDEP
#pragma vector aligned ALIGNED
for (uint32_t j = 0; j < LDS; j++) { for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden; const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update; Slater_inv[i * LDS + j] -= update;
@ -215,8 +232,8 @@ static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
// C = A^{-1} x U_l // C = A^{-1} x U_l
for (uint64_t i = 0; i < {Dim}; i++) { for (uint64_t i = 0; i < {Dim}; i++) {
C[i] = 0; C[i] = 0;
#pragma ivdep IVDEP
#pragma vector aligned ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) { for (uint64_t j = 0; j < D{Dim}_P; j++) {
C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j]; C[i] += Slater_inv[i * D{Dim}_P + j] * Updates[l * D{Dim}_P + j];
} }
@ -236,16 +253,16 @@ static inline qmckl_exit_code qmckl_sherman_morrison_{Dim}(
*determinant *= den; *determinant *= den;
// selecting column: D = v_l^T * S_inv // selecting column: D = v_l^T * S_inv
#pragma ivdep IVDEP
#pragma vector aligned ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) { for (uint64_t j = 0; j < D{Dim}_P; j++) {
D[j] = Slater_inv[cui * D{Dim}_P + j]; D[j] = Slater_inv[cui * D{Dim}_P + j];
} }
// A^{-1} = A^{-1} - C x D / den // A^{-1} = A^{-1} - C x D / den
for (uint64_t i = 0; i < {Dim}; i++) { for (uint64_t i = 0; i < {Dim}; i++) {
#pragma ivdep IVDEP
#pragma vector aligned ALIGNED
for (uint64_t j = 0; j < D{Dim}_P; j++) { for (uint64_t j = 0; j < D{Dim}_P; j++) {
double update = C[i] * D[j] * iden; double update = C[i] * D[j] * iden;
Slater_inv[i * D{Dim}_P + j] -= update; Slater_inv[i * D{Dim}_P + j] -= update;
@ -329,7 +346,7 @@ qmckl_exit_code qmckl_sherman_morrison(const qmckl_context context,
<<naive-python-switch()>> <<naive-python-switch()>>
} }
} }
else { // When SIMD_LENGTH > 1, called with LDS == Dim and Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH) else { // When SIMD_LENGTH > 1, called with LDS == Dim AND Dim != (1+(Dim-1)/SIMD_LENGTH)*SIMD_LENGTH)
return qmckl_sherman_morrison_hpc(context, return qmckl_sherman_morrison_hpc(context,
LDS, LDS,
Dim, Dim,