mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Merge branch 'gpu' of github.com:TREX-CoE/qmckl into gpu
This commit is contained in:
commit
a3a1cc6428
58
configure.ac
58
configure.ac
@ -224,6 +224,47 @@ AS_IF([test "$HAVE_HPC" = "yes"], [
|
||||
AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines])
|
||||
])
|
||||
|
||||
# Enable Verificarlo tests
|
||||
AC_ARG_ENABLE([vfc_ci],
|
||||
[ --enable-vfc_ci Build the library with vfc_ci support],
|
||||
[case "${enableval}" in
|
||||
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
|
||||
no) vfc_ci=false ;;
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
|
||||
esac],[vfc_ci=false])
|
||||
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
|
||||
|
||||
if test "$FC" = "verificarlo-f"; then
|
||||
AC_MSG_NOTICE(verificarlo-f detected)
|
||||
# Arguments order is important here
|
||||
FCFLAGS="-Mpreprocess $FCFLAGS"
|
||||
fi
|
||||
|
||||
## Enable GPU offloading
|
||||
|
||||
# OpenACC offloading
|
||||
AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no)
|
||||
AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [
|
||||
AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines])
|
||||
CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS"
|
||||
FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS"
|
||||
])
|
||||
|
||||
# cuBLAS offloading
|
||||
AC_ARG_ENABLE(cublas-offload, [AS_HELP_STRING([--cublas-offload],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no)
|
||||
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
||||
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
|
||||
FCFLAGS="-DHAVE_CUBLAS_OFFLOAD"
|
||||
])
|
||||
|
||||
# General offload
|
||||
AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes" || test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
||||
CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS"
|
||||
FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS $FCFLAGS"
|
||||
])
|
||||
|
||||
##
|
||||
|
||||
AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no)
|
||||
if test "$ok" = "yes"; then
|
||||
if test "$GCC" = "yes"; then
|
||||
@ -319,21 +360,6 @@ if test "x${QMCKL_DEVEL}" != "x"; then
|
||||
|
||||
fi
|
||||
|
||||
# Enable Verificarlo tests
|
||||
AC_ARG_ENABLE([vfc_ci],
|
||||
[ --enable-vfc_ci Build the library with vfc_ci support],
|
||||
[case "${enableval}" in
|
||||
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
|
||||
no) vfc_ci=false ;;
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
|
||||
esac],[vfc_ci=false])
|
||||
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
|
||||
|
||||
if test "$FC" = "verificarlo-f"; then
|
||||
AC_MSG_NOTICE(verificarlo-f detected)
|
||||
# Arguments order is important here
|
||||
FCFLAGS="-Mpreprocess $FCFLAGS"
|
||||
fi
|
||||
|
||||
#PKG-CONFIG
|
||||
#mkl-dynamic-lp64-seq
|
||||
@ -369,6 +395,8 @@ LDFLAGS:........: ${LDFLAGS}
|
||||
LIBS............: ${LIBS}
|
||||
USE CHAMELEON...: ${with_chameleon}
|
||||
HPC version.....: ${HAVE_HPC}
|
||||
OpenACC offload.: ${HAVE_OPENACC_OFFLOAD}
|
||||
cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD}
|
||||
|
||||
Package features:
|
||||
${ARGS}
|
||||
|
@ -151,6 +151,7 @@ int main() {
|
||||
| ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative |
|
||||
| ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part |
|
||||
| ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative |
|
||||
| ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime |
|
||||
|
||||
computed data:
|
||||
|
||||
@ -327,6 +328,14 @@ kappa_inv = 1.0/kappa
|
||||
|
||||
** Data structure
|
||||
|
||||
#+begin_src c :comments org :tangle (eval h_type)
|
||||
typedef enum qmckl_jastrow_offload_type{
|
||||
OFFLOAD_NONE,
|
||||
OFFLOAD_OPENACC,
|
||||
OFFLOAD_CUBLAS
|
||||
} qmckl_jastrow_offload_type;
|
||||
#+end_src
|
||||
|
||||
#+begin_src c :comments org :tangle (eval h_private_type)
|
||||
typedef struct qmckl_jastrow_struct{
|
||||
int32_t uninitialized;
|
||||
@ -372,6 +381,7 @@ typedef struct qmckl_jastrow_struct{
|
||||
uint64_t een_rescaled_n_deriv_e_date;
|
||||
bool provided;
|
||||
char * type;
|
||||
qmckl_jastrow_offload_type offload_type;
|
||||
} qmckl_jastrow_struct;
|
||||
#+end_src
|
||||
|
||||
@ -416,6 +426,7 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int
|
||||
qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max);
|
||||
qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max);
|
||||
qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max);
|
||||
qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type);
|
||||
#+end_src
|
||||
|
||||
Along with these core functions, calculation of the jastrow factor
|
||||
@ -713,6 +724,32 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context,
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) {
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return (char) 0;
|
||||
}
|
||||
|
||||
if (offload_type == NULL) {
|
||||
return qmckl_failwith( context,
|
||||
QMCKL_INVALID_ARG_2,
|
||||
"qmckl_get_jastrow_offload_type",
|
||||
"offload_type is a null pointer");
|
||||
}
|
||||
|
||||
qmckl_context_struct* const ctx = (qmckl_context_struct* const) context;
|
||||
assert (ctx != NULL);
|
||||
|
||||
int32_t mask = 1 << 0;
|
||||
|
||||
if ( (ctx->jastrow.uninitialized & mask) != 0) {
|
||||
return QMCKL_NOT_PROVIDED;
|
||||
}
|
||||
|
||||
*offload_type = ctx->jastrow.offload_type;
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#+end_src
|
||||
|
||||
** Initialization functions
|
||||
@ -727,6 +764,7 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con
|
||||
qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max);
|
||||
qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max);
|
||||
qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max);
|
||||
qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type);
|
||||
#+end_src
|
||||
|
||||
#+NAME:pre2
|
||||
@ -1063,6 +1101,14 @@ qmckl_set_jastrow_cord_vector(qmckl_context context,
|
||||
<<post2>>
|
||||
}
|
||||
|
||||
qmckl_exit_code
|
||||
qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type)
|
||||
{
|
||||
<<pre2>>
|
||||
ctx->jastrow.offload_type = offload_type;
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
||||
#+end_src
|
||||
|
||||
When the required information is completely entered, other data structures are
|
||||
@ -2560,6 +2606,7 @@ qmckl_exit_code qmckl_compute_factor_en (
|
||||
|
||||
double x, x1, power_ser;
|
||||
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
@ -4812,7 +4859,6 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context)
|
||||
|
||||
qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
{
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_NULL_CONTEXT;
|
||||
}
|
||||
@ -4844,6 +4890,20 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
ctx->jastrow.tmp_c = tmp_c;
|
||||
}
|
||||
|
||||
/* Choose the correct compute function (depending on offload type) */
|
||||
switch(ctx->jastrow.offload_type) {
|
||||
case OFFLOAD_OPENACC:
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
rc =
|
||||
qmckl_compute_tmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
#else
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
@ -4852,8 +4912,41 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
|
||||
#endif
|
||||
break;
|
||||
case OFFLOAD_CUBLAS:
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
rc =
|
||||
qmckl_compute_tmp_c_cublas_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
#else
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->jastrow.tmp_c_date = ctx->date;
|
||||
@ -4864,7 +4957,6 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
|
||||
qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||
{
|
||||
|
||||
if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_NULL_CONTEXT;
|
||||
}
|
||||
@ -4896,6 +4988,18 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||
ctx->jastrow.dtmp_c = dtmp_c;
|
||||
}
|
||||
|
||||
switch(ctx->jastrow.offload_type) {
|
||||
case OFFLOAD_OPENACC:
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
rc = qmckl_compute_dtmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#else
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
@ -4904,6 +5008,41 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#endif
|
||||
break;
|
||||
case OFFLOAD_CUBLAS:
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
rc = qmckl_compute_dtmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#else
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
break;
|
||||
}
|
||||
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
@ -5500,6 +5639,225 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context,
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Compute tmp_c (OpenACC offload)
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_compute_tmp_c_acc_offload
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+NAME: qmckl_factor_tmp_c_acc_offload_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|------------------+------------------------------------------------------------------+--------+-----------------------------------|
|
||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||
| ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor |
|
||||
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||
| ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const tmp_c ) {
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (cord_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_2;
|
||||
}
|
||||
|
||||
if (elec_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_3;
|
||||
}
|
||||
|
||||
if (nucl_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_4;
|
||||
}
|
||||
|
||||
// Compute array access strides:
|
||||
// For tmp_c...
|
||||
int stride_k_c = elec_num;
|
||||
int stride_j_c = stride_k_c * nucl_num;
|
||||
int stride_i_c = stride_j_c * (cord_num+1);
|
||||
int stride_nw_c = stride_i_c * cord_num;
|
||||
// For een_rescaled_e...
|
||||
int stride_m_e = elec_num;
|
||||
int stride_i_e = stride_m_e * elec_num;
|
||||
int stride_nw_e = stride_i_e * (cord_num+1);
|
||||
// For een_rescaled_n...
|
||||
int stride_k_n = elec_num;
|
||||
int stride_j_n = stride_k_n * nucl_num;
|
||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop independent gang worker vector collapse(5)
|
||||
for (int nw=0; nw < walk_num; ++nw) {
|
||||
for (int i=0; i<cord_num; ++i){
|
||||
|
||||
// Replacement for single DGEMM
|
||||
for (int j=0; j<cord_num+1; j++) {
|
||||
for (int k=0; k<nucl_num; k++) {
|
||||
for (int l=0; l<elec_num; l++) {
|
||||
|
||||
// Single reduction
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0;
|
||||
for (int m=0; m<elec_num; m++) {
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] =
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] +
|
||||
een_rescaled_e[l + m*stride_m_e + i*stride_i_e + nw*stride_nw_e] *
|
||||
een_rescaled_n[m + k*stride_k_n + j*stride_j_n + nw*stride_nw_n];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_factor_tmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const tmp_c );
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
TODO: FIX dtmp_c dimension in the table.
|
||||
|
||||
*** Compute tmp_c (cuBLAS offload)
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_compute_tmp_c_acc_offload
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+NAME: qmckl_factor_tmp_c_cublas_offload_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|------------------+------------------------------------------------------------------+--------+-----------------------------------|
|
||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||
| ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor |
|
||||
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||
| ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const tmp_c ) {
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (cord_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_2;
|
||||
}
|
||||
|
||||
if (elec_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_3;
|
||||
}
|
||||
|
||||
if (nucl_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_4;
|
||||
}
|
||||
|
||||
if (walk_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_5;
|
||||
}
|
||||
|
||||
qmckl_exit_code info = QMCKL_SUCCESS;
|
||||
|
||||
const char TransA = 'N';
|
||||
const char TransB = 'N';
|
||||
const double alpha = 1.0;
|
||||
const double beta = 0.0;
|
||||
|
||||
const int64_t M = elec_num;
|
||||
const int64_t N = nucl_num*(cord_num + 1);
|
||||
const int64_t K = elec_num;
|
||||
|
||||
const int64_t LDA = elec_num;
|
||||
const int64_t LDB = elec_num;
|
||||
const int64_t LDC = elec_num;
|
||||
|
||||
const int64_t af = elec_num*elec_num;
|
||||
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
||||
const int64_t cf = bf;
|
||||
|
||||
// TODO Replace with calls to cuBLAS
|
||||
for (int64_t nw=0; nw < walk_num; ++nw) {
|
||||
for (int64_t i=0; i<cord_num; ++i){
|
||||
info = qmckl_dgemm(context, TransA, TransB, M, N, K, alpha, \
|
||||
&(een_rescaled_e[af*(i+nw*(cord_num+1))]), \
|
||||
LDA, \
|
||||
&(een_rescaled_n[bf*nw]), \
|
||||
LDB, \
|
||||
beta, \
|
||||
&(tmp_c[cf*(i+nw*cord_num)]), \
|
||||
LDC);
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_factor_tmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const tmp_c );
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
TODO: FIX dtmp_c dimension in the table.
|
||||
*** Compute dtmp_c
|
||||
:PROPERTIES:
|
||||
@ -5740,6 +6098,228 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
||||
double* const dtmp_c );
|
||||
#+end_src
|
||||
|
||||
*** Compute dtmp_c (OpenACC offload)
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_compute_dtmp_c_acc_offload
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+NAME: qmckl_factor_dtmp_c_acc_offload_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
||||
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e_deriv_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const dtmp_c ) {
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (cord_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_2;
|
||||
}
|
||||
|
||||
if (elec_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_3;
|
||||
}
|
||||
|
||||
if (nucl_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_4;
|
||||
}
|
||||
|
||||
// Compute strides...
|
||||
// For dtmp_c
|
||||
int stride_l_d = elec_num;
|
||||
int stride_k_d = stride_l_d * 4;
|
||||
int stride_j_d = stride_k_d * nucl_num;
|
||||
int stride_i_d = stride_j_d * (cord_num+1);
|
||||
int stride_nw_d = stride_i_d * cord_num;
|
||||
// For een_rescaled_e_deriv_e
|
||||
int stride_l_e = elec_num;
|
||||
int stride_n_e = stride_l_e * 4;
|
||||
int stride_i_e = stride_n_e * elec_num;
|
||||
int stride_nw_e = stride_i_e * cord_num;
|
||||
// For een_rescaled_n
|
||||
int stride_k_n = elec_num;
|
||||
int stride_j_n = stride_k_n * nucl_num;
|
||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma loop independent gang worker vector collapse(6)
|
||||
for (int nw=0; nw < walk_num; nw++) {
|
||||
for (int i=0; i < cord_num; i++) {
|
||||
|
||||
// Single DGEMM
|
||||
for(int j=0; j<cord_num+1; j++) {
|
||||
for(int k=0; k<nucl_num; k++) {
|
||||
for(int l=0; l<4; l++) {
|
||||
for(int m=0; m<elec_num; m++) {
|
||||
|
||||
// Single reduction
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0;
|
||||
for(int n=0; n<elec_num; n++){
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
||||
een_rescaled_e_deriv_e[m + l * stride_l_e + n * stride_n_e + i * stride_i_e + nw * stride_nw_e] *
|
||||
een_rescaled_n[n + k * stride_k_n + j * stride_j_n + nw * stride_nw_n];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_factor_dtmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e_deriv_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const dtmp_c );
|
||||
#endif
|
||||
#+end_src
|
||||
*** Compute dtmp_c (cuBLAS offload)
|
||||
:PROPERTIES:
|
||||
:Name: qmckl_compute_dtmp_c_cublas_offload
|
||||
:CRetType: qmckl_exit_code
|
||||
:FRetType: qmckl_exit_code
|
||||
:END:
|
||||
|
||||
#+NAME: qmckl_factor_dtmp_c_cublas_offload_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||
| ~nucl_num~ | ~int64_t~ | in | Number of nucleii |
|
||||
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
||||
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e_deriv_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const dtmp_c ) {
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
|
||||
if (cord_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_2;
|
||||
}
|
||||
|
||||
if (elec_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_3;
|
||||
}
|
||||
|
||||
if (nucl_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_4;
|
||||
}
|
||||
|
||||
if (walk_num <= 0) {
|
||||
return QMCKL_INVALID_ARG_5;
|
||||
}
|
||||
|
||||
qmckl_exit_code info = QMCKL_SUCCESS;
|
||||
|
||||
const char TransA = 'N';
|
||||
const char TransB = 'N';
|
||||
const double alpha = 1.0;
|
||||
const double beta = 0.0;
|
||||
|
||||
const int64_t M = 4*elec_num;
|
||||
const int64_t N = nucl_num*(cord_num + 1);
|
||||
const int64_t K = elec_num;
|
||||
|
||||
const int64_t LDA = 4*elec_num;
|
||||
const int64_t LDB = elec_num;
|
||||
const int64_t LDC = 4*elec_num;
|
||||
|
||||
const int64_t af = elec_num*elec_num*4;
|
||||
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
||||
const int64_t cf = elec_num*4*nucl_num*(cord_num+1);
|
||||
|
||||
// TODO Replace with calls to cuBLAS
|
||||
for (int64_t nw=0; nw < walk_num; ++nw) {
|
||||
for (int64_t i=0; i < cord_num; ++i) {
|
||||
info = qmckl_dgemm(context, TransA, TransB, M, N, K, alpha, \
|
||||
&(een_rescaled_e_deriv_e[af*(i+nw*(cord_num+1))]), \
|
||||
LDA, \
|
||||
&(een_rescaled_n[bf*nw]), \
|
||||
LDB, \
|
||||
beta, \
|
||||
&(dtmp_c[cf*(i+nw*cord_num)]), \
|
||||
LDC);
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
#+CALL: generate_c_header(table=qmckl_factor_dtmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
|
||||
|
||||
#+RESULTS:
|
||||
#+begin_src c :tangle (eval h_func) :comments org
|
||||
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload (
|
||||
const qmckl_context context,
|
||||
const int64_t cord_num,
|
||||
const int64_t elec_num,
|
||||
const int64_t nucl_num,
|
||||
const int64_t walk_num,
|
||||
const double* een_rescaled_e_deriv_e,
|
||||
const double* een_rescaled_n,
|
||||
double* const dtmp_c );
|
||||
#endif
|
||||
#+end_src
|
||||
|
||||
*** Test
|
||||
|
||||
#+name: helper_funcs
|
||||
|
Loading…
Reference in New Issue
Block a user