mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-10 21:18:37 +01:00
Merge pull request #72 from PurplePachyderm/master
Merge in-progress work of GPU ports
This commit is contained in:
commit
f8e6d5f06b
58
configure.ac
58
configure.ac
@ -224,6 +224,47 @@ AS_IF([test "$HAVE_HPC" = "yes"], [
|
|||||||
AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines])
|
AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# Enable Verificarlo tests
|
||||||
|
AC_ARG_ENABLE([vfc_ci],
|
||||||
|
[ --enable-vfc_ci Build the library with vfc_ci support],
|
||||||
|
[case "${enableval}" in
|
||||||
|
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
|
||||||
|
no) vfc_ci=false ;;
|
||||||
|
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
|
||||||
|
esac],[vfc_ci=false])
|
||||||
|
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
|
||||||
|
|
||||||
|
if test "$FC" = "verificarlo-f"; then
|
||||||
|
AC_MSG_NOTICE(verificarlo-f detected)
|
||||||
|
# Arguments order is important here
|
||||||
|
FCFLAGS="-Mpreprocess $FCFLAGS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
## Enable GPU offloading
|
||||||
|
|
||||||
|
# OpenACC offloading
|
||||||
|
AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no)
|
||||||
|
AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [
|
||||||
|
AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines])
|
||||||
|
CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS"
|
||||||
|
FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS"
|
||||||
|
])
|
||||||
|
|
||||||
|
# cuBLAS offloading
|
||||||
|
AC_ARG_ENABLE(cublas-offload, [AS_HELP_STRING([--cublas-offload],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no)
|
||||||
|
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
||||||
|
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
|
||||||
|
FCFLAGS="-DHAVE_CUBLAS_OFFLOAD"
|
||||||
|
])
|
||||||
|
|
||||||
|
# General offload
|
||||||
|
AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes" || test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
||||||
|
CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS"
|
||||||
|
FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS $FCFLAGS"
|
||||||
|
])
|
||||||
|
|
||||||
|
##
|
||||||
|
|
||||||
AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no)
|
AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no)
|
||||||
if test "$ok" = "yes"; then
|
if test "$ok" = "yes"; then
|
||||||
if test "$GCC" = "yes"; then
|
if test "$GCC" = "yes"; then
|
||||||
@ -319,21 +360,6 @@ if test "x${QMCKL_DEVEL}" != "x"; then
|
|||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Enable Verificarlo tests
|
|
||||||
AC_ARG_ENABLE([vfc_ci],
|
|
||||||
[ --enable-vfc_ci Build the library with vfc_ci support],
|
|
||||||
[case "${enableval}" in
|
|
||||||
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
|
|
||||||
no) vfc_ci=false ;;
|
|
||||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
|
|
||||||
esac],[vfc_ci=false])
|
|
||||||
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
|
|
||||||
|
|
||||||
if test "$FC" = "verificarlo-f"; then
|
|
||||||
AC_MSG_NOTICE(verificarlo-f detected)
|
|
||||||
# Arguments order is important here
|
|
||||||
FCFLAGS="-Mpreprocess $FCFLAGS"
|
|
||||||
fi
|
|
||||||
|
|
||||||
#PKG-CONFIG
|
#PKG-CONFIG
|
||||||
#mkl-dynamic-lp64-seq
|
#mkl-dynamic-lp64-seq
|
||||||
@ -369,6 +395,8 @@ LDFLAGS:........: ${LDFLAGS}
|
|||||||
LIBS............: ${LIBS}
|
LIBS............: ${LIBS}
|
||||||
USE CHAMELEON...: ${with_chameleon}
|
USE CHAMELEON...: ${with_chameleon}
|
||||||
HPC version.....: ${HAVE_HPC}
|
HPC version.....: ${HAVE_HPC}
|
||||||
|
OpenACC offload.: ${HAVE_OPENACC_OFFLOAD}
|
||||||
|
cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD}
|
||||||
|
|
||||||
Package features:
|
Package features:
|
||||||
${ARGS}
|
${ARGS}
|
||||||
|
@ -151,6 +151,7 @@ int main() {
|
|||||||
| ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative |
|
| ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative |
|
||||||
| ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part |
|
| ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part |
|
||||||
| ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative |
|
| ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative |
|
||||||
|
| ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime |
|
||||||
|
|
||||||
computed data:
|
computed data:
|
||||||
|
|
||||||
@ -327,6 +328,14 @@ kappa_inv = 1.0/kappa
|
|||||||
|
|
||||||
** Data structure
|
** Data structure
|
||||||
|
|
||||||
|
#+begin_src c :comments org :tangle (eval h_type)
|
||||||
|
typedef enum qmckl_jastrow_offload_type{
|
||||||
|
OFFLOAD_NONE,
|
||||||
|
OFFLOAD_OPENACC,
|
||||||
|
OFFLOAD_CUBLAS
|
||||||
|
} qmckl_jastrow_offload_type;
|
||||||
|
#+end_src
|
||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval h_private_type)
|
#+begin_src c :comments org :tangle (eval h_private_type)
|
||||||
typedef struct qmckl_jastrow_struct{
|
typedef struct qmckl_jastrow_struct{
|
||||||
int32_t uninitialized;
|
int32_t uninitialized;
|
||||||
@ -372,6 +381,7 @@ typedef struct qmckl_jastrow_struct{
|
|||||||
uint64_t een_rescaled_n_deriv_e_date;
|
uint64_t een_rescaled_n_deriv_e_date;
|
||||||
bool provided;
|
bool provided;
|
||||||
char * type;
|
char * type;
|
||||||
|
qmckl_jastrow_offload_type offload_type;
|
||||||
} qmckl_jastrow_struct;
|
} qmckl_jastrow_struct;
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
@ -416,6 +426,7 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int
|
|||||||
qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max);
|
qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max);
|
||||||
qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max);
|
qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max);
|
||||||
qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max);
|
qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max);
|
||||||
|
qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type);
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
Along with these core functions, calculation of the jastrow factor
|
Along with these core functions, calculation of the jastrow factor
|
||||||
@ -713,6 +724,32 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context,
|
|||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) {
|
||||||
|
|
||||||
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||||
|
return (char) 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (offload_type == NULL) {
|
||||||
|
return qmckl_failwith( context,
|
||||||
|
QMCKL_INVALID_ARG_2,
|
||||||
|
"qmckl_get_jastrow_offload_type",
|
||||||
|
"offload_type is a null pointer");
|
||||||
|
}
|
||||||
|
|
||||||
|
qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
|
||||||
|
assert (ctx != NULL);
|
||||||
|
|
||||||
|
int32_t mask = 1 << 0;
|
||||||
|
|
||||||
|
if ( (ctx->jastrow.uninitialized & mask) != 0) {
|
||||||
|
return QMCKL_NOT_PROVIDED;
|
||||||
|
}
|
||||||
|
|
||||||
|
*offload_type = ctx->jastrow.offload_type;
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** Initialization functions
|
** Initialization functions
|
||||||
@ -727,6 +764,7 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con
|
|||||||
qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max);
|
qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max);
|
||||||
qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max);
|
qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max);
|
||||||
qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max);
|
qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max);
|
||||||
|
qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type);
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
#+NAME:pre2
|
#+NAME:pre2
|
||||||
@ -1063,6 +1101,14 @@ qmckl_set_jastrow_cord_vector(qmckl_context context,
|
|||||||
<<post2>>
|
<<post2>>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qmckl_exit_code
|
||||||
|
qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type)
|
||||||
|
{
|
||||||
|
<<pre2>>
|
||||||
|
ctx->jastrow.offload_type = offload_type;
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
When the required information is completely entered, other data structures are
|
When the required information is completely entered, other data structures are
|
||||||
@ -2560,6 +2606,7 @@ qmckl_exit_code qmckl_compute_factor_en (
|
|||||||
|
|
||||||
double x, x1, power_ser;
|
double x, x1, power_ser;
|
||||||
|
|
||||||
|
|
||||||
if (context == QMCKL_NULL_CONTEXT) {
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
return QMCKL_INVALID_CONTEXT;
|
return QMCKL_INVALID_CONTEXT;
|
||||||
}
|
}
|
||||||
@ -4812,7 +4859,6 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context)
|
|||||||
|
|
||||||
qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||||
return QMCKL_NULL_CONTEXT;
|
return QMCKL_NULL_CONTEXT;
|
||||||
}
|
}
|
||||||
@ -4844,6 +4890,20 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
|||||||
ctx->jastrow.tmp_c = tmp_c;
|
ctx->jastrow.tmp_c = tmp_c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Choose the correct compute function (depending on offload type) */
|
||||||
|
switch(ctx->jastrow.offload_type) {
|
||||||
|
case OFFLOAD_OPENACC:
|
||||||
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
|
rc =
|
||||||
|
qmckl_compute_tmp_c_acc_offload(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.tmp_c);
|
||||||
|
#else
|
||||||
rc = qmckl_compute_tmp_c(context,
|
rc = qmckl_compute_tmp_c(context,
|
||||||
ctx->jastrow.cord_num,
|
ctx->jastrow.cord_num,
|
||||||
ctx->electron.num,
|
ctx->electron.num,
|
||||||
@ -4852,8 +4912,41 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
|||||||
ctx->jastrow.een_rescaled_e,
|
ctx->jastrow.een_rescaled_e,
|
||||||
ctx->jastrow.een_rescaled_n,
|
ctx->jastrow.een_rescaled_n,
|
||||||
ctx->jastrow.tmp_c);
|
ctx->jastrow.tmp_c);
|
||||||
if (rc != QMCKL_SUCCESS) {
|
|
||||||
return rc;
|
#endif
|
||||||
|
break;
|
||||||
|
case OFFLOAD_CUBLAS:
|
||||||
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
rc =
|
||||||
|
qmckl_compute_tmp_c_cublas_offload(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.tmp_c);
|
||||||
|
#else
|
||||||
|
rc = qmckl_compute_tmp_c(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.tmp_c);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rc = qmckl_compute_tmp_c(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.tmp_c);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->jastrow.tmp_c_date = ctx->date;
|
ctx->jastrow.tmp_c_date = ctx->date;
|
||||||
@ -4864,7 +4957,6 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
|||||||
|
|
||||||
qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||||
return QMCKL_NULL_CONTEXT;
|
return QMCKL_NULL_CONTEXT;
|
||||||
}
|
}
|
||||||
@ -4896,6 +4988,18 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
|||||||
ctx->jastrow.dtmp_c = dtmp_c;
|
ctx->jastrow.dtmp_c = dtmp_c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch(ctx->jastrow.offload_type) {
|
||||||
|
case OFFLOAD_OPENACC:
|
||||||
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
|
rc = qmckl_compute_dtmp_c_acc_offload(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.dtmp_c);
|
||||||
|
#else
|
||||||
rc = qmckl_compute_dtmp_c(context,
|
rc = qmckl_compute_dtmp_c(context,
|
||||||
ctx->jastrow.cord_num,
|
ctx->jastrow.cord_num,
|
||||||
ctx->electron.num,
|
ctx->electron.num,
|
||||||
@ -4904,6 +5008,41 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
|||||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||||
ctx->jastrow.een_rescaled_n,
|
ctx->jastrow.een_rescaled_n,
|
||||||
ctx->jastrow.dtmp_c);
|
ctx->jastrow.dtmp_c);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case OFFLOAD_CUBLAS:
|
||||||
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
rc = qmckl_compute_dtmp_c_acc_offload(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.dtmp_c);
|
||||||
|
#else
|
||||||
|
rc = qmckl_compute_dtmp_c(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.dtmp_c);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rc = qmckl_compute_dtmp_c(context,
|
||||||
|
ctx->jastrow.cord_num,
|
||||||
|
ctx->electron.num,
|
||||||
|
ctx->nucleus.num,
|
||||||
|
ctx->electron.walk_num,
|
||||||
|
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||||
|
ctx->jastrow.een_rescaled_n,
|
||||||
|
ctx->jastrow.dtmp_c);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (rc != QMCKL_SUCCESS) {
|
if (rc != QMCKL_SUCCESS) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -5500,6 +5639,225 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context,
|
|||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
*** Compute tmp_c (OpenACC offload)
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_compute_tmp_c_acc_offload
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+NAME: qmckl_factor_tmp_c_acc_offload_args
|
||||||
|
| Variable | Type | In/Out | Description |
|
||||||
|
|------------------+------------------------------------------------------------------+--------+-----------------------------------|
|
||||||
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
|
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||||
|
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||||
|
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||||
|
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||||
|
| ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor |
|
||||||
|
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||||
|
| ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const tmp_c ) {
|
||||||
|
|
||||||
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
|
return QMCKL_INVALID_CONTEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cord_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elec_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nucl_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute array access strides:
|
||||||
|
// For tmp_c...
|
||||||
|
int stride_k_c = elec_num;
|
||||||
|
int stride_j_c = stride_k_c * nucl_num;
|
||||||
|
int stride_i_c = stride_j_c * (cord_num+1);
|
||||||
|
int stride_nw_c = stride_i_c * cord_num;
|
||||||
|
// For een_rescaled_e...
|
||||||
|
int stride_m_e = elec_num;
|
||||||
|
int stride_i_e = stride_m_e * elec_num;
|
||||||
|
int stride_nw_e = stride_i_e * (cord_num+1);
|
||||||
|
// For een_rescaled_n...
|
||||||
|
int stride_k_n = elec_num;
|
||||||
|
int stride_j_n = stride_k_n * nucl_num;
|
||||||
|
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||||
|
|
||||||
|
#pragma acc parallel
|
||||||
|
#pragma acc loop independent gang worker vector collapse(5)
|
||||||
|
for (int nw=0; nw < walk_num; ++nw) {
|
||||||
|
for (int i=0; i<cord_num; ++i){
|
||||||
|
|
||||||
|
// Replacement for single DGEMM
|
||||||
|
for (int j=0; j<cord_num+1; j++) {
|
||||||
|
for (int k=0; k<nucl_num; k++) {
|
||||||
|
for (int l=0; l<elec_num; l++) {
|
||||||
|
|
||||||
|
// Single reduction
|
||||||
|
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0;
|
||||||
|
for (int m=0; m<elec_num; m++) {
|
||||||
|
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] =
|
||||||
|
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] +
|
||||||
|
een_rescaled_e[l + m*stride_m_e + i*stride_i_e + nw*stride_nw_e] *
|
||||||
|
een_rescaled_n[m + k*stride_k_n + j*stride_j_n + nw*stride_nw_n];
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_factor_tmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const tmp_c );
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
TODO: FIX dtmp_c dimension in the table.
|
||||||
|
|
||||||
|
*** Compute tmp_c (cuBLAS offload)
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_compute_tmp_c_acc_offload
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+NAME: qmckl_factor_tmp_c_cublas_offload_args
|
||||||
|
| Variable | Type | In/Out | Description |
|
||||||
|
|------------------+------------------------------------------------------------------+--------+-----------------------------------|
|
||||||
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
|
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||||
|
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||||
|
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||||
|
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||||
|
| ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor |
|
||||||
|
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||||
|
| ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const tmp_c ) {
|
||||||
|
|
||||||
|
qmckl_exit_code info;
|
||||||
|
int i, j, a, l, kk, p, lmax, nw;
|
||||||
|
char TransA, TransB;
|
||||||
|
double alpha, beta;
|
||||||
|
int M, N, K, LDA, LDB, LDC;
|
||||||
|
|
||||||
|
TransA = 'N';
|
||||||
|
TransB = 'N';
|
||||||
|
alpha = 1.0;
|
||||||
|
beta = 0.0;
|
||||||
|
|
||||||
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
|
return QMCKL_INVALID_CONTEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cord_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elec_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nucl_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_4;
|
||||||
|
}
|
||||||
|
|
||||||
|
M = elec_num;
|
||||||
|
N = nucl_num*(cord_num + 1);
|
||||||
|
K = elec_num;
|
||||||
|
|
||||||
|
LDA = sizeof(een_rescaled_e)/sizeof(double);
|
||||||
|
LDB = sizeof(een_rescaled_n)/sizeof(double);
|
||||||
|
LDC = sizeof(tmp_c)/sizeof(double);
|
||||||
|
|
||||||
|
// TODO Replace with cuBLAS calls
|
||||||
|
for (int nw=0; nw < walk_num; ++nw) {
|
||||||
|
for (int i=0; i<cord_num; ++i){
|
||||||
|
info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, \
|
||||||
|
// &een_rescaled_e[0+0*elec_num+i*elec_num*elec_num+nw*elec_num*elec_num*(cord_num+1)],
|
||||||
|
&een_rescaled_e[ i*elec_num*elec_num+nw*elec_num*elec_num*(cord_num+1)], \
|
||||||
|
LDA, \
|
||||||
|
// &een_rescaled_n[0+0*elec_num+0*elec_num*nucl_num+nw*elec_num*nucl_num*(cord_num+1)],
|
||||||
|
&een_rescaled_n[ nw*elec_num*nucl_num*(cord_num+1)], \
|
||||||
|
LDB, \
|
||||||
|
beta, \
|
||||||
|
// &tmp_c[0+0*elec_num+0*elec_num*nucl_num+i*elec_num*nucl_num*(cord_num+1)+nw*elec_num*nucl_num*(cord_num+1)*cord_num],
|
||||||
|
&tmp_c[ i*elec_num*nucl_num*(cord_num+1)+nw*elec_num*nucl_num*(cord_num+1)*cord_num], \
|
||||||
|
LDC);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_factor_tmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const tmp_c );
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
TODO: FIX dtmp_c dimension in the table.
|
TODO: FIX dtmp_c dimension in the table.
|
||||||
*** Compute dtmp_c
|
*** Compute dtmp_c
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
@ -5740,6 +6098,226 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
|||||||
double* const dtmp_c );
|
double* const dtmp_c );
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
*** Compute dtmp_c (OpenACC offload)
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_compute_dtmp_c_acc_offload
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+NAME: qmckl_factor_dtmp_c_acc_offload_args
|
||||||
|
| Variable | Type | In/Out | Description |
|
||||||
|
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||||
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
|
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||||
|
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||||
|
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||||
|
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||||
|
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
||||||
|
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||||
|
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e_deriv_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const dtmp_c ) {
|
||||||
|
|
||||||
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
|
return QMCKL_INVALID_CONTEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cord_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elec_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nucl_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute strides...
|
||||||
|
// For dtmp_c
|
||||||
|
int stride_l_d = elec_num;
|
||||||
|
int stride_k_d = stride_l_d * 4;
|
||||||
|
int stride_j_d = stride_k_d * nucl_num;
|
||||||
|
int stride_i_d = stride_j_d * (cord_num+1);
|
||||||
|
int stride_nw_d = stride_i_d * cord_num;
|
||||||
|
// For een_rescaled_e_deriv_e
|
||||||
|
int stride_l_e = elec_num;
|
||||||
|
int stride_n_e = stride_l_e * 4;
|
||||||
|
int stride_i_e = stride_n_e * elec_num;
|
||||||
|
int stride_nw_e = stride_i_e * cord_num;
|
||||||
|
// For een_rescaled_n
|
||||||
|
int stride_k_n = elec_num;
|
||||||
|
int stride_j_n = stride_k_n * nucl_num;
|
||||||
|
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||||
|
|
||||||
|
|
||||||
|
#pragma acc parallel
|
||||||
|
#pragma loop independent gang worker vector collapse(6)
|
||||||
|
for (int nw=0; nw < walk_num; nw++) {
|
||||||
|
for (int i=0; i < cord_num; i++) {
|
||||||
|
|
||||||
|
// Single DGEMM
|
||||||
|
for(int j=0; j<cord_num+1; j++) {
|
||||||
|
for(int k=0; k<nucl_num; k++) {
|
||||||
|
for(int l=0; l<4; l++) {
|
||||||
|
for(int m=0; m<elec_num; m++) {
|
||||||
|
|
||||||
|
// Single reduction
|
||||||
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0;
|
||||||
|
for(int n=0; n<elec_num; n++){
|
||||||
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
||||||
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
||||||
|
een_rescaled_e_deriv_e[m + l * stride_l_e + n * stride_n_e + i * stride_i_e + nw * stride_nw_e] *
|
||||||
|
een_rescaled_n[n + k * stride_k_n + j * stride_j_n + nw * stride_nw_n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return QMCKL_SUCCESS;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_factor_dtmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e_deriv_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const dtmp_c );
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
*** Compute dtmp_c (cuBLAS offload)
|
||||||
|
:PROPERTIES:
|
||||||
|
:Name: qmckl_compute_dtmp_c_cublas_offload
|
||||||
|
:CRetType: qmckl_exit_code
|
||||||
|
:FRetType: qmckl_exit_code
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+NAME: qmckl_factor_dtmp_c_cublas_offload_args
|
||||||
|
| Variable | Type | In/Out | Description |
|
||||||
|
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||||
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
|
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||||
|
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||||
|
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||||
|
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||||
|
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
||||||
|
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||||
|
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e_deriv_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const dtmp_c ) {
|
||||||
|
|
||||||
|
qmckl_exit_code info;
|
||||||
|
char TransA, TransB;
|
||||||
|
double alpha, beta;
|
||||||
|
int M, N, K, LDA, LDB, LDC;
|
||||||
|
|
||||||
|
TransA = 'N';
|
||||||
|
TransB = 'N';
|
||||||
|
alpha = 1.0;
|
||||||
|
beta = 0.0;
|
||||||
|
|
||||||
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
|
return QMCKL_INVALID_CONTEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cord_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elec_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nucl_num <= 0) {
|
||||||
|
return QMCKL_INVALID_ARG_4;
|
||||||
|
}
|
||||||
|
|
||||||
|
M = 4*elec_num;
|
||||||
|
N = nucl_num*(cord_num + 1);
|
||||||
|
K = elec_num;
|
||||||
|
|
||||||
|
LDA = 4*sizeof(een_rescaled_e_deriv_e)/sizeof(double);
|
||||||
|
LDB = sizeof(een_rescaled_n)/sizeof(double);
|
||||||
|
LDC = 4*sizeof(dtmp_c)/sizeof(double);
|
||||||
|
|
||||||
|
// TODO Replace with cuBLAS calls
|
||||||
|
for (int nw=0; nw < walk_num; ++nw) {
|
||||||
|
for (int i=0; nw < cord_num; ++i) {
|
||||||
|
info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, \
|
||||||
|
//&een_rescaled_e_deriv_e[0+0*elec_num+0*elec_num*4+i*elec_num*4*elec_num+nw*elec_num*4*elec_num*(cord_num+1)],
|
||||||
|
&een_rescaled_e_deriv_e[i*elec_num*4*elec_num+nw*elec_num*4*elec_num*(cord_num+1)], \
|
||||||
|
LDA, \
|
||||||
|
//&een_rescaled_n[0+0*elec_num+0*elec_num*nucl_num+nw*elec_num*nucl_num*(cord_num+1)],
|
||||||
|
&een_rescaled_n[nw*elec_num*nucl_num*(cord_num+1)], \
|
||||||
|
LDB, \
|
||||||
|
beta, \
|
||||||
|
//&dtmp_c[0+0*elec_num+0*elec_num*4+0*elec_num*4*nucl_num+i*elec_num*4*nucl_num*(cord_num+1)+nw*elec_num*4*nucl_num*(cord_num+1)*cord_num],
|
||||||
|
&dtmp_c[i*elec_num*4*nucl_num*(cord_num+1)+nw*elec_num*4*nucl_num*(cord_num+1)*cord_num], \
|
||||||
|
LDC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+CALL: generate_c_header(table=qmckl_factor_dtmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||||
|
|
||||||
|
#+RESULTS:
|
||||||
|
#+begin_src c :tangle (eval h_func) :comments org
|
||||||
|
|
||||||
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload (
|
||||||
|
const qmckl_context context,
|
||||||
|
const int64_t cord_num,
|
||||||
|
const int64_t elec_num,
|
||||||
|
const int64_t nucl_num,
|
||||||
|
const int64_t walk_num,
|
||||||
|
const double* een_rescaled_e_deriv_e,
|
||||||
|
const double* een_rescaled_n,
|
||||||
|
double* const dtmp_c );
|
||||||
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
*** Test
|
*** Test
|
||||||
|
|
||||||
#+name: helper_funcs
|
#+name: helper_funcs
|
||||||
|
Loading…
Reference in New Issue
Block a user