diff --git a/org/qmckl_memory.org b/org/qmckl_memory.org index caf6acd..64c4a1c 100644 --- a/org/qmckl_memory.org +++ b/org/qmckl_memory.org @@ -125,11 +125,10 @@ void* qmckl_malloc(qmckl_context context, ~qmckl_context~. 4. The function then allocates memory: - If the ~HAVE_HPC~ macro is defined, the memory allocation is done using - the ~aligned_alloc~ function with a 64-byte alignment, rounding up the - requested size to the nearest multiple of 64 bytes. If the ~HAVE_HPC~ - macro is not defined, the memory allocation is done using the standard - ~malloc~ function. + If the ~HAVE_HPC~ and ~HAVE_ALIGNED_ALLOC~ macros are defined, the memory + allocation is done using the ~aligned_alloc~ function with a 64-byte alignment, + rounding up the requested size to the nearest multiple of 64 bytes. Else, the + memory allocation is done using the standard ~malloc~ function. 5 If the allocation fails, the function returns ~NULL~. @@ -154,7 +153,7 @@ void* qmckl_malloc(qmckl_context context, const qmckl_memory_info_struct info) { qmckl_context_struct* const ctx = (qmckl_context_struct*) context; /* Allocate memory and zero it */ -#ifdef HAVE_HPC +#if defined(HAVE_HPC) && defined(HAVE_ALIGNED_ALLOC) assert( ((info.size+64) >> 6) << 6 >= info.size ); void * pointer = aligned_alloc(64, ((info.size+64) >> 6) << 6 ); #else diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 6e7d779..c910525 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -348,22 +348,6 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels. #include "assert.h" #include "stdio.h" -// Order important because -// __GNUC__ also set in ICC, ICX and CLANG -// __clang__ also set in ICX -#if defined(__INTEL_COMPILER) - #define IVDEP _Pragma("ivdep") - #define ALIGNED _Pragma("vector aligned") -#elif defined(__INTEL_LLVM_COMPILER) - #define IVDEP _Pragma("ivdep") - #define ALIGNED _Pragma("vector aligned") -#elif defined(__clang__) - #define IVDEP _Pragma("clang loop vectorize(enable)") - #define ALIGNED -#elif defined(__GNUC__) - #define IVDEP _Pragma("GCC ivdep") - #define ALIGNED -#endif #+end_src ~qmckl_sm_naive_hpc~ is a high performance variation of