mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Removed IVPDEP from SM
This commit is contained in:
parent
2291103a9b
commit
10ee050050
@ -125,11 +125,10 @@ void* qmckl_malloc(qmckl_context context,
|
|||||||
~qmckl_context~.
|
~qmckl_context~.
|
||||||
|
|
||||||
4. The function then allocates memory:
|
4. The function then allocates memory:
|
||||||
If the ~HAVE_HPC~ macro is defined, the memory allocation is done using
|
If the ~HAVE_HPC~ and ~HAVE_ALIGNED_ALLOC~ macros are defined, the memory
|
||||||
the ~aligned_alloc~ function with a 64-byte alignment, rounding up the
|
allocation is done using the ~aligned_alloc~ function with a 64-byte alignment,
|
||||||
requested size to the nearest multiple of 64 bytes. If the ~HAVE_HPC~
|
rounding up the requested size to the nearest multiple of 64 bytes. Else, the
|
||||||
macro is not defined, the memory allocation is done using the standard
|
memory allocation is done using the standard ~malloc~ function.
|
||||||
~malloc~ function.
|
|
||||||
|
|
||||||
5 If the allocation fails, the function returns ~NULL~.
|
5 If the allocation fails, the function returns ~NULL~.
|
||||||
|
|
||||||
@ -154,7 +153,7 @@ void* qmckl_malloc(qmckl_context context, const qmckl_memory_info_struct info) {
|
|||||||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||||
|
|
||||||
/* Allocate memory and zero it */
|
/* Allocate memory and zero it */
|
||||||
#ifdef HAVE_HPC
|
#if defined(HAVE_HPC) && defined(HAVE_ALIGNED_ALLOC)
|
||||||
assert( ((info.size+64) >> 6) << 6 >= info.size );
|
assert( ((info.size+64) >> 6) << 6 >= info.size );
|
||||||
void * pointer = aligned_alloc(64, ((info.size+64) >> 6) << 6 );
|
void * pointer = aligned_alloc(64, ((info.size+64) >> 6) << 6 );
|
||||||
#else
|
#else
|
||||||
|
@ -348,22 +348,6 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
|||||||
#include "assert.h"
|
#include "assert.h"
|
||||||
#include "stdio.h"
|
#include "stdio.h"
|
||||||
|
|
||||||
// Order important because
|
|
||||||
// __GNUC__ also set in ICC, ICX and CLANG
|
|
||||||
// __clang__ also set in ICX
|
|
||||||
#if defined(__INTEL_COMPILER)
|
|
||||||
#define IVDEP _Pragma("ivdep")
|
|
||||||
#define ALIGNED _Pragma("vector aligned")
|
|
||||||
#elif defined(__INTEL_LLVM_COMPILER)
|
|
||||||
#define IVDEP _Pragma("ivdep")
|
|
||||||
#define ALIGNED _Pragma("vector aligned")
|
|
||||||
#elif defined(__clang__)
|
|
||||||
#define IVDEP _Pragma("clang loop vectorize(enable)")
|
|
||||||
#define ALIGNED
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define IVDEP _Pragma("GCC ivdep")
|
|
||||||
#define ALIGNED
|
|
||||||
#endif
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
~qmckl_sm_naive_hpc~ is a high performance variation of
|
~qmckl_sm_naive_hpc~ is a high performance variation of
|
||||||
|
Loading…
Reference in New Issue
Block a user