1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 18:16:28 +01:00

Merge pull request #1 from TREX-CoE/gpu

Gpu
This commit is contained in:
Max Hoffer 2022-04-06 17:17:45 +02:00 committed by GitHub
commit cba6477e4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1752 additions and 757 deletions

View File

@ -2,9 +2,7 @@ name: test-build
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
x86_ubuntu:

View File

@ -93,6 +93,7 @@ AC_PROG_F77
m4_version_prereq([2.70],[], [AC_PROG_CC_C99])
AS_IF([test "$ac_cv_prog_cc_c99" = "no"], [AC_MSG_ERROR([The compiler does not support C99])])
AC_PROG_CC_C_O
AM_PROG_CC_C_O
AC_PROG_FC
AC_PROG_FC_C_O
AC_FC_PP_DEFINE
@ -137,10 +138,10 @@ case "$with_chameleon" in
[PKG_CFLAGS="$PKG_CFLAGS $LIBCHAMELEON_CFLAGS"
PKG_LIBS="$PKG_LIBS $LIBCHAMELEON_LIBS"]
,[
## something went wrong.
## try to find the package without pkg-config
## check that the library is actually new enough.
## by testing for a 1.0.0+ function which we use
AC_CHECK_LIB(chameleon,CHAMELEON_finalize,[LIBCHAMELEON_LIBS="-lchameleon"])
@ -205,15 +206,18 @@ case $FC in
;;
*nvfortran*)
FCFLAGS="$FCFLAGS -fPIC -Mnomain -mp -target=gpu"
FCFLAGS="$FCFLAGS -fPIC -Mnomain"
;;
esac
case $CC in
*gcc*)
CFLAGS="$CFLAGS -fPIC"
;;
*nvc*)
CFLAGS="$CFLAGS -fPIC -mp -target=gpu"
CFLAGS="$CFLAGS -fPIC"
;;
esac
@ -224,6 +228,109 @@ AS_IF([test "$HAVE_HPC" = "yes"], [
AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines])
])
# Enable Verificarlo tests
AC_ARG_ENABLE([vfc_ci],
[ --enable-vfc_ci Build the library with vfc_ci support],
[case "${enableval}" in
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
no) vfc_ci=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
esac],[vfc_ci=false])
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
if test "$FC" = "verificarlo-f"; then
AC_MSG_NOTICE(verificarlo-f detected)
# Arguments order is important here
FCFLAGS="-Mpreprocess $FCFLAGS"
fi
## Enable GPU offloading
# GPU offloading
AC_ARG_ENABLE(gpu, [AS_HELP_STRING([--enable-gpu],[openmp|openacc : Use GPU-offloaded functions])], enable_gpu=$enableval, enable_gpu=no)
AS_IF([test "$enable_gpu" = "yes"], [enable_gpu="openmp"])
# OpenMP offloading
HAVE_OPENMP_OFFLOAD="no"
AS_IF([test "$enable_gpu" = "openmp"], [
AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines])
HAVE_OPENMP_OFFLOAD="yes"
case $CC in
*gcc*)
CFLAGS="$CFLAGS -fopenmp"
;;
*nvc*)
CFLAGS="$CFLAGS -mp=gpu"
;;
esac
case $FC in
*gfortran*)
FCFLAGS="$FCFLAGS -fopenmp"
;;
*nvfortran*)
FCFLAGS="$FCFLAGS -mp=gpu"
;;
esac]
)
# OpenMP offloading
HAVE_OPENACC_OFFLOAD="no"
AS_IF([test "$enable_gpu" = "openacc"], [
AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines])
HAVE_OPENACC_OFFLOAD="yes"
case $CC in
*gcc*)
CFLAGS="$CFLAGS -fopenacc"
;;
*nvc*)
CFLAGS="$CFLAGS -acc=gpu"
;;
esac
case $FC in
*gfortran*)
FCFLAGS="$FCFLAGS -fopenacc"
;;
*nvfortran*)
FCFLAGS="$FCFLAGS -acc=gpu"
;;
esac
])
# cuBLAS offloading
AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no)
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
case $CC in
*gcc*)
CFLAGS="$CFLAGS -fopenacc"
;;
*nvc*)
CFLAGS="$CFLAGS -acc=gpu"
;;
esac
case $FC in
*gfortran*)
FCFLAGS="$FCFLAGS -fopenacc"
;;
*nvfortran*)
FCFLAGS="$FCFLAGS -acc=gpu"
;;
esac
])
##
AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no)
if test "$ok" = "yes"; then
if test "$GCC" = "yes"; then
@ -319,21 +426,6 @@ if test "x${QMCKL_DEVEL}" != "x"; then
fi
# Enable Verificarlo tests
AC_ARG_ENABLE([vfc_ci],
[ --enable-vfc_ci Build the library with vfc_ci support],
[case "${enableval}" in
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
no) vfc_ci=false ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
esac],[vfc_ci=false])
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
if test "$FC" = "verificarlo-f"; then
AC_MSG_NOTICE(verificarlo-f detected)
# Arguments order is important here
FCFLAGS="-Mpreprocess $FCFLAGS"
fi
#PKG-CONFIG
#mkl-dynamic-lp64-seq
@ -369,6 +461,9 @@ LDFLAGS:........: ${LDFLAGS}
LIBS............: ${LIBS}
USE CHAMELEON...: ${with_chameleon}
HPC version.....: ${HAVE_HPC}
OpenMP offload..: ${HAVE_OPENMP_OFFLOAD}
OpenACC offload.: ${HAVE_OPENACC_OFFLOAD}
cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD}
Package features:
${ARGS}

View File

@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
}
}
rc = QMCKL_SUCCESS;
#ifdef HAVE_HPC
rc = qmckl_finalize_basis_hpc(context);
#else
rc = QMCKL_SUCCESS;
#endif
return rc;

View File

@ -84,8 +84,8 @@ are not intended to be passed to external codes.
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_vector {
int64_t size;
double* restrict data;
int64_t size;
} qmckl_vector;
#+end_src
@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context,
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_matrix {
int64_t size[2];
double* restrict data;
int64_t size[2];
} qmckl_matrix;
#+end_src
@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context,
#define QMCKL_TENSOR_ORDER_MAX 16
typedef struct qmckl_tensor {
double* restrict data;
int64_t order;
int64_t size[QMCKL_TENSOR_ORDER_MAX];
double* restrict data;
} qmckl_tensor;
#+end_src

File diff suppressed because it is too large Load Diff

View File

@ -655,6 +655,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, &
end if
end do
end do
info = QMCKL_SUCCESS
end function qmckl_compute_mo_basis_mo_vgl_doc_f
#+end_src
@ -790,7 +791,6 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
double av4[ao_num];
double av5[ao_num];
for (int64_t k=0 ; k<ao_num ; ++k) {
const double* restrict ck1 = coef_normalized_t + k*mo_num;
if (avgl1[k] != 0.) {
idx[nidx] = k;
av1[nidx] = avgl1[k];
@ -804,7 +804,6 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
int64_t n;
for (n=0 ; n < nidx-4 ; n+=4) {
int64_t k = idx[n];
const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num;
const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num;
const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num;
@ -849,13 +848,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
int64_t n0 = nidx-4;
n0 = n0 < 0 ? 0 : n0;
for (int64_t n=n0 ; n < nidx ; n+=1) {
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
const double a1 = av1[n];
const double a2 = av2[n];
const double a3 = av3[n];
const double a4 = av4[n];
const double a5 = av5[n];
for (int64_t m=n0 ; m < nidx ; m+=1) {
const double* restrict ck = coef_normalized_t + idx[m]*mo_num;
const double a1 = av1[m];
const double a2 = av2[m];
const double a3 = av3[m];
const double a4 = av4[m];
const double a5 = av5[m];
#ifdef HAVE_OPENMP
#pragma omp simd

View File

@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
(void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
later = later + l;
}