mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 12:23:56 +01:00
Added IVDEP and ALIGNED in configure.ac
This commit is contained in:
parent
7bec8b7984
commit
fd2addb370
88
configure.ac
88
configure.ac
@ -246,6 +246,94 @@ int simd=1;
|
||||
AC_MSG_RESULT([$SIMD_LENGTH])
|
||||
AC_DEFINE_UNQUOTED([SIMD_LENGTH], [$SIMD_LENGTH], [Length of SIMD vectors])
|
||||
|
||||
# Checking IVDEP
|
||||
ivdep=""
|
||||
AC_MSG_CHECKING([for ivdep pragma])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdio.h>
|
||||
]], [[
|
||||
int main() {
|
||||
#pragma ivdep
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
printf("Testing: %d\n", i);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
]])],
|
||||
[ivdep='_Pragma("ivdep")'], [
|
||||
])
|
||||
|
||||
AS_IF([test "x$ivdep" = "x"], [
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdio.h>
|
||||
]], [[
|
||||
int main() {
|
||||
#pragma clang loop vectorize(enable)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
printf("Testing: %d\n", i);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
]])],
|
||||
[ivdep='_Pragma("clang loop vectorize(enable)")'], [
|
||||
])
|
||||
])
|
||||
|
||||
AS_IF([test "x$ivdep" = "x"], [
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdio.h>
|
||||
]], [[
|
||||
int main() {
|
||||
#pragma GCC ivdep
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
printf("Testing: %d\n", i);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
]])],
|
||||
[ivdep='_Pragma("GCC ivdep")'], [
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFINE_UNQUOTED([IVDEP], [$ivdep], [IVDEP pragma])
|
||||
AS_IF([test "x$ivdep" = "x"], [
|
||||
ivdep="no"
|
||||
])
|
||||
AC_MSG_RESULT([$ivdep])
|
||||
|
||||
|
||||
# Checking ALIGNED
|
||||
|
||||
AC_CHECK_FUNCS([aligned_alloc], [have_aligned_alloc=yes], [have_aligned_alloc=no])
|
||||
AS_IF([test "x$have_aligned_alloc" = "xyes"], [
|
||||
AC_DEFINE([HAVE_ALIGNED_ALLOC], [1], [Define to 1 if you have the aligned_alloc function.])
|
||||
])
|
||||
|
||||
aligned=""
|
||||
AC_MSG_CHECKING([for vector aligned pragma])
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
]], [[
|
||||
int main() {
|
||||
double __attribute__((aligned(8))) a[10] ;
|
||||
#pragma vector aligned
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
a[i] = (double) i;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
]])],
|
||||
[aligned='_Pragma("vector aligned")'], [
|
||||
])
|
||||
|
||||
AS_IF([test "x$have_aligned_alloc" = "xno"], [
|
||||
aligned=""
|
||||
])
|
||||
|
||||
AC_DEFINE_UNQUOTED([ALIGNED], [$aligned], [VECTOR ALIGNED pragma])
|
||||
AS_IF([test "x$aligned" = "x"], [
|
||||
aligned="no"
|
||||
])
|
||||
AC_MSG_RESULT([$aligned])
|
||||
|
||||
|
||||
# QMCKLDGEMM
|
||||
|
@ -31,7 +31,7 @@ This is the range that determines the how many high performance kernel instantce
|
||||
#+begin_src python :noweb yes :exports none
|
||||
range(2, 22)
|
||||
#+end_src
|
||||
|
||||
|
||||
|
||||
* Naïve Sherman-Morrison
|
||||
** ~qmckl_sm_naive~
|
||||
@ -109,7 +109,7 @@ subroutine convert(upds, s_inv, Updates, Inverse, nupdates, lds, dim)
|
||||
implicit none
|
||||
integer*8 , intent(in) :: lds, dim, nupdates
|
||||
real*8 , intent(in) :: upds(nupdates * lds)
|
||||
real*8 , intent(in) :: s_inv(dim * lds)
|
||||
real*8 , intent(in) :: s_inv(dim * lds)
|
||||
real*8 , intent(out) , dimension(lds, nupdates) :: Updates
|
||||
real*8 , intent(out) , dimension(dim, lds) :: Inverse
|
||||
|
||||
@ -136,7 +136,7 @@ subroutine copy_back_inv(Inverse, s_inv, lds, dim)
|
||||
implicit none
|
||||
integer*8 , intent(in) :: lds, dim
|
||||
real*8 , intent(in) , dimension(dim, lds) :: Inverse
|
||||
real*8 , intent(out) :: s_inv(dim * lds)
|
||||
real*8 , intent(out) :: s_inv(dim * lds)
|
||||
|
||||
integer*8 :: i, j
|
||||
|
||||
@ -154,7 +154,7 @@ subroutine copy_back_lu(Later_updates, later_upds, lds, nupdates)
|
||||
implicit none
|
||||
integer*8 , intent(in) :: lds, nupdates
|
||||
real*8 , intent(in) , dimension(lds, nupdates) :: Later_updates
|
||||
real*8 , intent(out) :: later_upds(nupdates * lds)
|
||||
real*8 , intent(out) :: later_upds(nupdates * lds)
|
||||
|
||||
integer*8 :: i, j
|
||||
|
||||
@ -300,7 +300,7 @@ qmckl_exit_code qmckl_sm_naive (
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_private_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_hpc")
|
||||
@ -316,7 +316,7 @@ qmckl_exit_code qmckl_sm_naive_hpc (
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sm_naive_args,rettyp=get_value("CRetType"),fname="qmckl_sm_naive_doc")
|
||||
@ -332,7 +332,7 @@ qmckl_exit_code qmckl_sm_naive_doc (
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
*** C sources
|
||||
@ -345,22 +345,6 @@ Common includes and macros used by all the Sherman-Morrison-Woodbury kernels.
|
||||
#include "assert.h"
|
||||
#include "stdio.h"
|
||||
|
||||
// Order important because
|
||||
// __GNUC__ also set in ICC, ICX and CLANG
|
||||
// __clang__ also set in ICX
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#define IVDEP _Pragma("ivdep")
|
||||
#define ALIGNED _Pragma("vector aligned")
|
||||
#elif defined(__INTEL_LLVM_COMPILER)
|
||||
#define IVDEP _Pragma("ivdep")
|
||||
#define ALIGNED _Pragma("vector aligned")
|
||||
#elif defined(__clang__)
|
||||
#define IVDEP _Pragma("clang loop vectorize(enable)")
|
||||
#define ALIGNED
|
||||
#elif defined(__GNUC__)
|
||||
#define IVDEP _Pragma("GCC ivdep")
|
||||
#define ALIGNED
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
~qmckl_sm_naive_hpc~ is a high performance variation of
|
||||
@ -534,7 +518,7 @@ Python script that generated C switch cases that call individual kernel instance
|
||||
#+NAME:naive_switch-case_generator
|
||||
#+begin_src python :noweb yes
|
||||
text="""
|
||||
case {Dim}:
|
||||
case {Dim}:
|
||||
return qmckl_sm_naive_{Dim}(context,
|
||||
N_updates,
|
||||
Updates,
|
||||
@ -604,7 +588,7 @@ qmckl_exit_code qmckl_sm_naive(const qmckl_context context,
|
||||
Slater_inv,
|
||||
determinant);
|
||||
#endif
|
||||
|
||||
|
||||
return QMCKL_FAILURE;
|
||||
}
|
||||
#+end_src
|
||||
@ -695,16 +679,16 @@ end interface
|
||||
#+end_src
|
||||
|
||||
*** Performance
|
||||
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
||||
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
||||
This function performs best when there is only 1 rank-1 update in the update cycle. It is
|
||||
not useful to use Sherman-Morrison with update splitting for these cycles since splitting
|
||||
can never resolve a situation where applying the update causes singular behaviour.
|
||||
*** Tests
|
||||
The tests for the kernels are executed on datasets that are extracted from a run of
|
||||
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
||||
The tests are run such that the kernels reject the computed inverse whenever the computed
|
||||
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
|
||||
QMC=Chem. The tests will return QMCKL_SUCCESS whenever all the elements of the final matrix
|
||||
$R=S.S^-1 - 1$ are smaller than the given tolerance value of 1e-3, and will return
|
||||
The tests for the kernels are executed on datasets that are extracted from a run of
|
||||
QMC=Chem on Benzene (21 spin-up/21 spin down electrons) using 329 unique alpha determinants.
|
||||
The tests are run such that the kernels reject the computed inverse whenever the computed
|
||||
intermediate determinants or denominators are smaller than 1e-3. This is the default value in
|
||||
QMC=Chem. The tests will return QMCKL_SUCCESS whenever all the elements of the final matrix
|
||||
$R=S.S^-1 - 1$ are smaller than the given tolerance value of 1e-3, and will return
|
||||
QMCKL_FAILURE if the values are larger than this tolerance value.
|
||||
|
||||
#+begin_src c :tangle (eval c_test)
|
||||
@ -777,8 +761,8 @@ It has three extra parameters in its API:
|
||||
|
||||
It is up to the user to decide what to do with these updates once the kernel returns. Normally ~qmckl_sm_splitting_core~ is
|
||||
used as the core part of a recursive function, as is done in ~qmckl_sm_splitting~ or as part of a more complex
|
||||
kernel like ~qmckl_sherman_morrison_smw32s~.
|
||||
|
||||
kernel like ~qmckl_sherman_morrison_smw32s~.
|
||||
|
||||
If the determinant is passed it will only be partially updated if there were any update splits.
|
||||
|
||||
*** API
|
||||
@ -922,7 +906,7 @@ integer function qmckl_sm_splitting_core_doc_f( &
|
||||
info = QMCKL_SUCCESS
|
||||
|
||||
write(*,*) "Leaving 'qmckl_sm_splittinig_core_doc_f'"
|
||||
|
||||
|
||||
end function qmckl_sm_splitting_core_doc_f
|
||||
#+end_src
|
||||
|
||||
@ -1003,7 +987,7 @@ qmckl_exit_code qmckl_sm_splitting_core (
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_hpc")
|
||||
@ -1022,7 +1006,7 @@ qmckl_exit_code qmckl_sm_splitting_core_hpc (
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sm_splitting_core_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_core_doc")
|
||||
@ -1041,7 +1025,7 @@ qmckl_exit_code qmckl_sm_splitting_core_doc (
|
||||
double* later_updates,
|
||||
uint64_t* later_index,
|
||||
uint64_t* later,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
*** C sources
|
||||
@ -1242,7 +1226,6 @@ case {Dim}: {
|
||||
later_index,
|
||||
later,
|
||||
determinant);
|
||||
break;
|
||||
}"""
|
||||
result = []
|
||||
for Dim in <<kernel_generator_range>>:
|
||||
@ -1493,7 +1476,7 @@ integer recursive function qmckl_sm_splitting_doc_f( &
|
||||
real*8 , intent(inout) :: determinant
|
||||
|
||||
integer , external :: qmckl_sm_splitting_core_doc_f
|
||||
|
||||
|
||||
integer*8 :: Later
|
||||
integer*8 , dimension(nupdates) :: Later_index
|
||||
real*8 , dimension(lds * nupdates) :: Later_updates
|
||||
@ -1523,7 +1506,7 @@ integer recursive function qmckl_sm_splitting_doc_f( &
|
||||
Later_index, &
|
||||
Later, &
|
||||
determinant)
|
||||
|
||||
|
||||
if (Later > 0) then
|
||||
info = qmckl_sm_splitting_doc_f( &
|
||||
context, &
|
||||
@ -1539,7 +1522,7 @@ integer recursive function qmckl_sm_splitting_doc_f( &
|
||||
info = QMCKL_SUCCESS
|
||||
|
||||
write(*,*) "Leaving 'qmckl_sm_splitting_doc_f'"
|
||||
|
||||
|
||||
end function qmckl_sm_splitting_doc_f
|
||||
#+end_src
|
||||
|
||||
@ -1574,12 +1557,12 @@ integer(c_int32_t) function qmckl_sm_splitting_doc &
|
||||
integer(c_int32_t), external :: qmckl_sm_splitting_doc_f
|
||||
|
||||
write(*,*) "Entering 'qmckl_sm_splitting_doc'"
|
||||
|
||||
|
||||
info = qmckl_sm_splitting_doc_f &
|
||||
(context, LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv, determinant)
|
||||
|
||||
write(*,*) "Leaving 'qmckl_sm_splitting_doc'"
|
||||
|
||||
|
||||
end function qmckl_sm_splitting_doc
|
||||
#+end_src
|
||||
|
||||
@ -1598,7 +1581,7 @@ qmckl_exit_code qmckl_sm_splitting (
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_private_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_hpc")
|
||||
@ -1614,7 +1597,7 @@ qmckl_exit_code qmckl_sm_splitting_hpc (
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_sm_splitting_args,rettyp=get_value("CRetType"),fname="qmckl_sm_splitting_doc")
|
||||
@ -1630,7 +1613,7 @@ qmckl_exit_code qmckl_sm_splitting_doc (
|
||||
const uint64_t* Updates_index,
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant );
|
||||
double* determinant );
|
||||
#+end_src
|
||||
|
||||
*** C source
|
||||
@ -1722,8 +1705,6 @@ qmckl_exit_code qmckl_sm_splitting(
|
||||
const double breakdown,
|
||||
double* Slater_inv,
|
||||
double* determinant) {
|
||||
|
||||
printf("Entering 'qmckl_sm_splitting'\n");
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return qmckl_failwith(
|
||||
@ -1754,11 +1735,9 @@ qmckl_exit_code qmckl_sm_splitting(
|
||||
breakdown,
|
||||
Slater_inv,
|
||||
determinant);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
printf("Leaving 'qmckl_sm_splitting'\n");
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#+end_src
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user