mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-08 04:19:15 +01:00
Merge olympe2:qmckl
This commit is contained in:
commit
42222f73a5
99
configure.ac
99
configure.ac
@ -246,6 +246,105 @@ int simd=1;
|
|||||||
AC_MSG_RESULT([$SIMD_LENGTH])
|
AC_MSG_RESULT([$SIMD_LENGTH])
|
||||||
AC_DEFINE_UNQUOTED([SIMD_LENGTH], [$SIMD_LENGTH], [Length of SIMD vectors])
|
AC_DEFINE_UNQUOTED([SIMD_LENGTH], [$SIMD_LENGTH], [Length of SIMD vectors])
|
||||||
|
|
||||||
|
# Checking IVDEP
|
||||||
|
ivdep=""
|
||||||
|
AC_MSG_CHECKING([for ivdep pragma])
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#include <stdio.h>
|
||||||
|
]], [[
|
||||||
|
int main() {
|
||||||
|
#pragma ivdep
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
printf("Testing: %d\n", i);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
]])],
|
||||||
|
[ivdep='_Pragma("ivdep")'], [
|
||||||
|
])
|
||||||
|
|
||||||
|
AS_IF([test "x$ivdep" = "x"], [
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#include <stdio.h>
|
||||||
|
]], [[
|
||||||
|
int main() {
|
||||||
|
#pragma clang loop vectorize(enable)
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
printf("Testing: %d\n", i);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
]])],
|
||||||
|
[ivdep='_Pragma("clang loop vectorize(enable)")'], [
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AS_IF([test "x$ivdep" = "x"], [
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
#include <stdio.h>
|
||||||
|
]], [[
|
||||||
|
int main() {
|
||||||
|
#pragma GCC ivdep
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
printf("Testing: %d\n", i);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
]])],
|
||||||
|
[ivdep='_Pragma("GCC ivdep")'], [
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([IVDEP], [$ivdep], [IVDEP pragma])
|
||||||
|
AS_IF([test "x$ivdep" = "x"], [
|
||||||
|
ivdep="no"
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT([$ivdep])
|
||||||
|
|
||||||
|
|
||||||
|
# Checking ALIGNED
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for aligned_alloc])
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
]], [[
|
||||||
|
int main() {
|
||||||
|
void * pointer = aligned_alloc(64, 100);
|
||||||
|
free(pointer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
]])],
|
||||||
|
[have_aligned_alloc=yes], [have_aligned_alloc=no
|
||||||
|
])
|
||||||
|
AS_IF([test "x$have_aligned_alloc" = "xyes"], [
|
||||||
|
AC_DEFINE([HAVE_ALIGNED_ALLOC], [1], [Define to 1 if you have the aligned_alloc function.])
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT([$have_aligned_alloc])
|
||||||
|
|
||||||
|
aligned=""
|
||||||
|
AC_MSG_CHECKING([for vector aligned pragma])
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
]], [[
|
||||||
|
int main() {
|
||||||
|
double __attribute__((aligned(8))) a[10] ;
|
||||||
|
#pragma vector aligned
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
a[i] = (double) i;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
]])],
|
||||||
|
[aligned='_Pragma("vector aligned")'], [
|
||||||
|
])
|
||||||
|
|
||||||
|
AS_IF([test "x$have_aligned_alloc" = "xno"], [
|
||||||
|
aligned=""
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([ALIGNED], [$aligned], [VECTOR ALIGNED pragma])
|
||||||
|
AS_IF([test "x$aligned" = "x"], [
|
||||||
|
aligned="no"
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT([$aligned])
|
||||||
|
|
||||||
|
|
||||||
# QMCKLDGEMM
|
# QMCKLDGEMM
|
||||||
|
@ -3846,7 +3846,7 @@ print ( "[7][4][26] : %e"% lf(a,x,y))
|
|||||||
|
|
||||||
assert(qmckl_electron_provided(context));
|
assert(qmckl_electron_provided(context));
|
||||||
|
|
||||||
const int64_t point_num = elec_num;
|
int64_t point_num = elec_num;
|
||||||
|
|
||||||
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
||||||
assert(rc == QMCKL_SUCCESS);
|
assert(rc == QMCKL_SUCCESS);
|
||||||
@ -4261,7 +4261,7 @@ print ( "[1][4][26] : %25.15e"% lf(a,x,y))
|
|||||||
|
|
||||||
assert(qmckl_electron_provided(context));
|
assert(qmckl_electron_provided(context));
|
||||||
|
|
||||||
const int64_t point_num = elec_num;
|
int64_t point_num = elec_num;
|
||||||
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
||||||
assert(rc == QMCKL_SUCCESS);
|
assert(rc == QMCKL_SUCCESS);
|
||||||
|
|
||||||
@ -6310,7 +6310,7 @@ double* elec_coord = &(chbrclf_elec_coord[0][0][0]);
|
|||||||
|
|
||||||
assert(qmckl_electron_provided(context));
|
assert(qmckl_electron_provided(context));
|
||||||
|
|
||||||
const int64_t point_num = elec_num;
|
int64_t point_num = elec_num;
|
||||||
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
||||||
assert(rc == QMCKL_SUCCESS);
|
assert(rc == QMCKL_SUCCESS);
|
||||||
|
|
||||||
@ -7261,7 +7261,7 @@ double* elec_coord = &(chbrclf_elec_coord[0][0][0]);
|
|||||||
|
|
||||||
assert(qmckl_electron_provided(context));
|
assert(qmckl_electron_provided(context));
|
||||||
|
|
||||||
const int64_t point_num = elec_num;
|
int64_t point_num = elec_num;
|
||||||
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
rc = qmckl_set_point(context, 'N', point_num, elec_coord, point_num*3);
|
||||||
assert(rc == QMCKL_SUCCESS);
|
assert(rc == QMCKL_SUCCESS);
|
||||||
|
|
||||||
|
@ -125,11 +125,10 @@ void* qmckl_malloc(qmckl_context context,
|
|||||||
~qmckl_context~.
|
~qmckl_context~.
|
||||||
|
|
||||||
4. The function then allocates memory:
|
4. The function then allocates memory:
|
||||||
If the ~HAVE_HPC~ macro is defined, the memory allocation is done using
|
If the ~HAVE_HPC~ and ~HAVE_ALIGNED_ALLOC~ macros are defined, the memory
|
||||||
the ~aligned_alloc~ function with a 64-byte alignment, rounding up the
|
allocation is done using the ~aligned_alloc~ function with a 64-byte alignment,
|
||||||
requested size to the nearest multiple of 64 bytes. If the ~HAVE_HPC~
|
rounding up the requested size to the nearest multiple of 64 bytes. Else, the
|
||||||
macro is not defined, the memory allocation is done using the standard
|
memory allocation is done using the standard ~malloc~ function.
|
||||||
~malloc~ function.
|
|
||||||
|
|
||||||
5 If the allocation fails, the function returns ~NULL~.
|
5 If the allocation fails, the function returns ~NULL~.
|
||||||
|
|
||||||
@ -154,7 +153,7 @@ void* qmckl_malloc(qmckl_context context, const qmckl_memory_info_struct info) {
|
|||||||
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
|
||||||
|
|
||||||
/* Allocate memory and zero it */
|
/* Allocate memory and zero it */
|
||||||
#ifdef HAVE_HPC
|
#if defined(HAVE_HPC) && defined(HAVE_ALIGNED_ALLOC)
|
||||||
assert( ((info.size+64) >> 6) << 6 >= info.size );
|
assert( ((info.size+64) >> 6) << 6 >= info.size );
|
||||||
void * pointer = aligned_alloc(64, ((info.size+64) >> 6) << 6 );
|
void * pointer = aligned_alloc(64, ((info.size+64) >> 6) << 6 );
|
||||||
#else
|
#else
|
||||||
|
@ -255,7 +255,6 @@ qmckl_exit_code qmckl_set_mo_basis_mo_num(qmckl_context context, const int64_t m
|
|||||||
|
|
||||||
<<post>>
|
<<post>>
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
|
||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
@ -1087,7 +1086,6 @@ qmckl_exit_code qmckl_provide_mo_basis_mo_value(qmckl_context context)
|
|||||||
} else {
|
} else {
|
||||||
rc = qmckl_provide_en_distance(context);
|
rc = qmckl_provide_en_distance(context);
|
||||||
if (rc != QMCKL_SUCCESS) {
|
if (rc != QMCKL_SUCCESS) {
|
||||||
return rc;
|
|
||||||
return qmckl_failwith( context,
|
return qmckl_failwith( context,
|
||||||
QMCKL_NOT_PROVIDED,
|
QMCKL_NOT_PROVIDED,
|
||||||
"qmckl_provide_mo_basis_mo_value",
|
"qmckl_provide_mo_basis_mo_value",
|
||||||
@ -2812,11 +2810,11 @@ print ( "[4][1][15][14] : %25.15e"% lf(a,x,y))
|
|||||||
int64_t elec_up_num = chbrclf_elec_up_num;
|
int64_t elec_up_num = chbrclf_elec_up_num;
|
||||||
int64_t elec_dn_num = chbrclf_elec_dn_num;
|
int64_t elec_dn_num = chbrclf_elec_dn_num;
|
||||||
double* elec_coord = &(chbrclf_elec_coord[0][0][0]);
|
double* elec_coord = &(chbrclf_elec_coord[0][0][0]);
|
||||||
const int64_t nucl_num = chbrclf_nucl_num;
|
int64_t nucl_num = chbrclf_nucl_num;
|
||||||
const double* nucl_charge = chbrclf_charge;
|
const double* nucl_charge = chbrclf_charge;
|
||||||
const double* nucl_coord = &(chbrclf_nucl_coord[0][0]);
|
const double* nucl_coord = &(chbrclf_nucl_coord[0][0]);
|
||||||
|
|
||||||
const int64_t point_num = walk_num*elec_num;
|
int64_t point_num = walk_num*elec_num;
|
||||||
|
|
||||||
rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num);
|
rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num);
|
||||||
assert (rc == QMCKL_SUCCESS);
|
assert (rc == QMCKL_SUCCESS);
|
||||||
|
@ -348,22 +348,6 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
|||||||
#include "assert.h"
|
#include "assert.h"
|
||||||
#include "stdio.h"
|
#include "stdio.h"
|
||||||
|
|
||||||
// Order important because
|
|
||||||
// __GNUC__ also set in ICC, ICX and CLANG
|
|
||||||
// __clang__ also set in ICX
|
|
||||||
#if defined(__INTEL_COMPILER)
|
|
||||||
#define IVDEP _Pragma("ivdep")
|
|
||||||
#define ALIGNED _Pragma("vector aligned")
|
|
||||||
#elif defined(__INTEL_LLVM_COMPILER)
|
|
||||||
#define IVDEP _Pragma("ivdep")
|
|
||||||
#define ALIGNED _Pragma("vector aligned")
|
|
||||||
#elif defined(__clang__)
|
|
||||||
#define IVDEP _Pragma("clang loop vectorize(enable)")
|
|
||||||
#define ALIGNED
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
#define IVDEP _Pragma("GCC ivdep")
|
|
||||||
#define ALIGNED
|
|
||||||
#endif
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
~qmckl_sm_naive_hpc~ is a high performance variation of
|
~qmckl_sm_naive_hpc~ is a high performance variation of
|
||||||
@ -1240,7 +1224,6 @@ case {Dim}: {
|
|||||||
later_index,
|
later_index,
|
||||||
later,
|
later,
|
||||||
determinant);
|
determinant);
|
||||||
break;
|
|
||||||
}"""
|
}"""
|
||||||
result = []
|
result = []
|
||||||
for Dim in <<kernel_generator_range>>:
|
for Dim in <<kernel_generator_range>>:
|
||||||
@ -3078,7 +3061,6 @@ qmckl_exit_code qmckl_sm_splitting(
|
|||||||
determinant);
|
determinant);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
|
||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user