diff --git a/configure.ac b/configure.ac index 6ddfa51..3ac782d 100644 --- a/configure.ac +++ b/configure.ac @@ -246,6 +246,105 @@ int simd=1; AC_MSG_RESULT([$SIMD_LENGTH]) AC_DEFINE_UNQUOTED([SIMD_LENGTH], [$SIMD_LENGTH], [Length of SIMD vectors]) +# Checking IVDEP +ivdep="" +AC_MSG_CHECKING([for ivdep pragma]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include +]], [[ + int main() { + #pragma ivdep + for (int i = 0; i < 10; ++i) { + printf("Testing: %d\n", i); + } + return 0; + } +]])], + [ivdep='_Pragma("ivdep")'], [ +]) + +AS_IF([test "x$ivdep" = "x"], [ + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include + ]], [[ + int main() { + #pragma clang loop vectorize(enable) + for (int i = 0; i < 10; ++i) { + printf("Testing: %d\n", i); + } + return 0; + } + ]])], + [ivdep='_Pragma("clang loop vectorize(enable)")'], [ + ]) +]) + +AS_IF([test "x$ivdep" = "x"], [ + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ + #include + ]], [[ + int main() { + #pragma GCC ivdep + for (int i = 0; i < 10; ++i) { + printf("Testing: %d\n", i); + } + return 0; + } + ]])], + [ivdep='_Pragma("GCC ivdep")'], [ + ]) +]) + +AC_DEFINE_UNQUOTED([IVDEP], [$ivdep], [IVDEP pragma]) +AS_IF([test "x$ivdep" = "x"], [ + ivdep="no" +]) +AC_MSG_RESULT([$ivdep]) + + +# Checking ALIGNED + +AC_MSG_CHECKING([for aligned_alloc]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +]], [[ + int main() { + void * pointer = aligned_alloc(64, 100); + free(pointer); + return 0; + } +]])], + [have_aligned_alloc=yes], [have_aligned_alloc=no +]) +AS_IF([test "x$have_aligned_alloc" = "xyes"], [ + AC_DEFINE([HAVE_ALIGNED_ALLOC], [1], [Define to 1 if you have the aligned_alloc function.]) +]) +AC_MSG_RESULT([$have_aligned_alloc]) + +aligned="" +AC_MSG_CHECKING([for vector aligned pragma]) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +]], [[ + int main() { + double __attribute__((aligned(8))) a[10] ; + #pragma vector aligned + for (int i = 0; i < 10; ++i) { + a[i] = (double) i; + } + return 0; + } +]])], + [aligned='_Pragma("vector aligned")'], [ +]) + +AS_IF([test "x$have_aligned_alloc" = "xno"], [ + aligned="" +]) + +AC_DEFINE_UNQUOTED([ALIGNED], [$aligned], [VECTOR ALIGNED pragma]) +AS_IF([test "x$aligned" = "x"], [ + aligned="no" +]) +AC_MSG_RESULT([$aligned]) # QMCKLDGEMM diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index e61b7f2..36d9f6e 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3846,7 +3846,7 @@ print ( "[7][4][26] : %e"% lf(a,x,y)) assert(qmckl_electron_provided(context)); - const int64_t point_num = elec_num; + int64_t point_num = elec_num; rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3); assert(rc == QMCKL_SUCCESS); @@ -4261,7 +4261,7 @@ print ( "[1][4][26] : %25.15e"% lf(a,x,y)) assert(qmckl_electron_provided(context)); - const int64_t point_num = elec_num; + int64_t point_num = elec_num; rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3); assert(rc == QMCKL_SUCCESS); @@ -6310,7 +6310,7 @@ double* elec_coord = &(chbrclf_elec_coord[0][0][0]); assert(qmckl_electron_provided(context)); -const int64_t point_num = elec_num; +int64_t point_num = elec_num; rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3); assert(rc == QMCKL_SUCCESS); @@ -7261,7 +7261,7 @@ double* elec_coord = &(chbrclf_elec_coord[0][0][0]); assert(qmckl_electron_provided(context)); -const int64_t point_num = elec_num; +int64_t point_num = elec_num; rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3); assert(rc == QMCKL_SUCCESS); diff --git a/org/qmckl_memory.org b/org/qmckl_memory.org index caf6acd..64c4a1c 100644 --- a/org/qmckl_memory.org +++ b/org/qmckl_memory.org @@ -125,11 +125,10 @@ void* qmckl_malloc(qmckl_context context, ~qmckl_context~. 4. The function then allocates memory: - If the ~HAVE_HPC~ macro is defined, the memory allocation is done using - the ~aligned_alloc~ function with a 64-byte alignment, rounding up the - requested size to the nearest multiple of 64 bytes. If the ~HAVE_HPC~ - macro is not defined, the memory allocation is done using the standard - ~malloc~ function. + If the ~HAVE_HPC~ and ~HAVE_ALIGNED_ALLOC~ macros are defined, the memory + allocation is done using the ~aligned_alloc~ function with a 64-byte alignment, + rounding up the requested size to the nearest multiple of 64 bytes. Else, the + memory allocation is done using the standard ~malloc~ function. 5 If the allocation fails, the function returns ~NULL~. @@ -154,7 +153,7 @@ void* qmckl_malloc(qmckl_context context, const qmckl_memory_info_struct info) { qmckl_context_struct* const ctx = (qmckl_context_struct*) context; /* Allocate memory and zero it */ -#ifdef HAVE_HPC +#if defined(HAVE_HPC) && defined(HAVE_ALIGNED_ALLOC) assert( ((info.size+64) >> 6) << 6 >= info.size ); void * pointer = aligned_alloc(64, ((info.size+64) >> 6) << 6 ); #else diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 9f12d40..40b3190 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -255,7 +255,6 @@ qmckl_exit_code qmckl_set_mo_basis_mo_num(qmckl_context context, const int64_t m <> - return QMCKL_SUCCESS; } #+end_src @@ -1087,7 +1086,6 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context) } else { rc = qmckl_provide_en_distance(context); if (rc != QMCKL_SUCCESS) { - return rc; return qmckl_failwith( context, QMCKL_NOT_PROVIDED, "qmckl_provide_mo_basis_mo_value", @@ -2812,11 +2810,11 @@ print ( "[4][1][15][14] : %25.15e"% lf(a,x,y)) int64_t elec_up_num = chbrclf_elec_up_num; int64_t elec_dn_num = chbrclf_elec_dn_num; double* elec_coord = &(chbrclf_elec_coord[0][0][0]); -const int64_t nucl_num = chbrclf_nucl_num; +int64_t nucl_num = chbrclf_nucl_num; const double* nucl_charge = chbrclf_charge; const double* nucl_coord = &(chbrclf_nucl_coord[0][0]); -const int64_t point_num = walk_num*elec_num; +int64_t point_num = walk_num*elec_num; rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); assert (rc == QMCKL_SUCCESS); diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 6e7d779..68ba0b6 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -348,22 +348,6 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels. #include "assert.h" #include "stdio.h" -// Order important because -// __GNUC__ also set in ICC, ICX and CLANG -// __clang__ also set in ICX -#if defined(__INTEL_COMPILER) - #define IVDEP _Pragma("ivdep") - #define ALIGNED _Pragma("vector aligned") -#elif defined(__INTEL_LLVM_COMPILER) - #define IVDEP _Pragma("ivdep") - #define ALIGNED _Pragma("vector aligned") -#elif defined(__clang__) - #define IVDEP _Pragma("clang loop vectorize(enable)") - #define ALIGNED -#elif defined(__GNUC__) - #define IVDEP _Pragma("GCC ivdep") - #define ALIGNED -#endif #+end_src ~qmckl_sm_naive_hpc~ is a high performance variation of @@ -1240,7 +1224,6 @@ case {Dim}: { later_index, later, determinant); - break; }""" result = [] for Dim in <>: @@ -3078,7 +3061,6 @@ qmckl_exit_code qmckl_sm_splitting( determinant); #endif - return QMCKL_SUCCESS; } #+end_src