mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 18:16:28 +01:00
commit
cba6477e4a
2
.github/workflows/test-build.yml
vendored
2
.github/workflows/test-build.yml
vendored
@ -2,9 +2,7 @@ name: test-build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
x86_ubuntu:
|
||||
|
133
configure.ac
133
configure.ac
@ -93,6 +93,7 @@ AC_PROG_F77
|
||||
m4_version_prereq([2.70],[], [AC_PROG_CC_C99])
|
||||
AS_IF([test "$ac_cv_prog_cc_c99" = "no"], [AC_MSG_ERROR([The compiler does not support C99])])
|
||||
AC_PROG_CC_C_O
|
||||
AM_PROG_CC_C_O
|
||||
AC_PROG_FC
|
||||
AC_PROG_FC_C_O
|
||||
AC_FC_PP_DEFINE
|
||||
@ -137,10 +138,10 @@ case "$with_chameleon" in
|
||||
[PKG_CFLAGS="$PKG_CFLAGS $LIBCHAMELEON_CFLAGS"
|
||||
PKG_LIBS="$PKG_LIBS $LIBCHAMELEON_LIBS"]
|
||||
,[
|
||||
|
||||
|
||||
## something went wrong.
|
||||
## try to find the package without pkg-config
|
||||
|
||||
|
||||
## check that the library is actually new enough.
|
||||
## by testing for a 1.0.0+ function which we use
|
||||
AC_CHECK_LIB(chameleon,CHAMELEON_finalize,[LIBCHAMELEON_LIBS="-lchameleon"])
|
||||
@ -205,15 +206,18 @@ case $FC in
|
||||
;;
|
||||
|
||||
*nvfortran*)
|
||||
FCFLAGS="$FCFLAGS -fPIC -Mnomain -mp -target=gpu"
|
||||
FCFLAGS="$FCFLAGS -fPIC -Mnomain"
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
case $CC in
|
||||
|
||||
*gcc*)
|
||||
CFLAGS="$CFLAGS -fPIC"
|
||||
;;
|
||||
*nvc*)
|
||||
CFLAGS="$CFLAGS -fPIC -mp -target=gpu"
|
||||
CFLAGS="$CFLAGS -fPIC"
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -224,6 +228,109 @@ AS_IF([test "$HAVE_HPC" = "yes"], [
|
||||
AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines])
|
||||
])
|
||||
|
||||
# Enable Verificarlo tests
|
||||
AC_ARG_ENABLE([vfc_ci],
|
||||
[ --enable-vfc_ci Build the library with vfc_ci support],
|
||||
[case "${enableval}" in
|
||||
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
|
||||
no) vfc_ci=false ;;
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
|
||||
esac],[vfc_ci=false])
|
||||
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
|
||||
|
||||
if test "$FC" = "verificarlo-f"; then
|
||||
AC_MSG_NOTICE(verificarlo-f detected)
|
||||
# Arguments order is important here
|
||||
FCFLAGS="-Mpreprocess $FCFLAGS"
|
||||
fi
|
||||
|
||||
## Enable GPU offloading
|
||||
|
||||
# GPU offloading
|
||||
AC_ARG_ENABLE(gpu, [AS_HELP_STRING([--enable-gpu],[openmp|openacc : Use GPU-offloaded functions])], enable_gpu=$enableval, enable_gpu=no)
|
||||
AS_IF([test "$enable_gpu" = "yes"], [enable_gpu="openmp"])
|
||||
|
||||
# OpenMP offloading
|
||||
HAVE_OPENMP_OFFLOAD="no"
|
||||
AS_IF([test "$enable_gpu" = "openmp"], [
|
||||
AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines])
|
||||
HAVE_OPENMP_OFFLOAD="yes"
|
||||
case $CC in
|
||||
|
||||
*gcc*)
|
||||
CFLAGS="$CFLAGS -fopenmp"
|
||||
;;
|
||||
*nvc*)
|
||||
CFLAGS="$CFLAGS -mp=gpu"
|
||||
;;
|
||||
esac
|
||||
|
||||
case $FC in
|
||||
|
||||
*gfortran*)
|
||||
FCFLAGS="$FCFLAGS -fopenmp"
|
||||
;;
|
||||
*nvfortran*)
|
||||
FCFLAGS="$FCFLAGS -mp=gpu"
|
||||
;;
|
||||
esac]
|
||||
)
|
||||
|
||||
# OpenMP offloading
|
||||
HAVE_OPENACC_OFFLOAD="no"
|
||||
AS_IF([test "$enable_gpu" = "openacc"], [
|
||||
AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines])
|
||||
HAVE_OPENACC_OFFLOAD="yes"
|
||||
case $CC in
|
||||
|
||||
*gcc*)
|
||||
CFLAGS="$CFLAGS -fopenacc"
|
||||
;;
|
||||
*nvc*)
|
||||
CFLAGS="$CFLAGS -acc=gpu"
|
||||
;;
|
||||
esac
|
||||
|
||||
case $FC in
|
||||
|
||||
*gfortran*)
|
||||
FCFLAGS="$FCFLAGS -fopenacc"
|
||||
;;
|
||||
*nvfortran*)
|
||||
FCFLAGS="$FCFLAGS -acc=gpu"
|
||||
;;
|
||||
esac
|
||||
|
||||
])
|
||||
|
||||
# cuBLAS offloading
|
||||
AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no)
|
||||
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
||||
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
|
||||
case $CC in
|
||||
|
||||
*gcc*)
|
||||
CFLAGS="$CFLAGS -fopenacc"
|
||||
;;
|
||||
*nvc*)
|
||||
CFLAGS="$CFLAGS -acc=gpu"
|
||||
;;
|
||||
esac
|
||||
|
||||
case $FC in
|
||||
|
||||
*gfortran*)
|
||||
FCFLAGS="$FCFLAGS -fopenacc"
|
||||
;;
|
||||
*nvfortran*)
|
||||
FCFLAGS="$FCFLAGS -acc=gpu"
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
|
||||
##
|
||||
|
||||
AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no)
|
||||
if test "$ok" = "yes"; then
|
||||
if test "$GCC" = "yes"; then
|
||||
@ -319,21 +426,6 @@ if test "x${QMCKL_DEVEL}" != "x"; then
|
||||
|
||||
fi
|
||||
|
||||
# Enable Verificarlo tests
|
||||
AC_ARG_ENABLE([vfc_ci],
|
||||
[ --enable-vfc_ci Build the library with vfc_ci support],
|
||||
[case "${enableval}" in
|
||||
yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";;
|
||||
no) vfc_ci=false ;;
|
||||
*) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;;
|
||||
esac],[vfc_ci=false])
|
||||
AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue])
|
||||
|
||||
if test "$FC" = "verificarlo-f"; then
|
||||
AC_MSG_NOTICE(verificarlo-f detected)
|
||||
# Arguments order is important here
|
||||
FCFLAGS="-Mpreprocess $FCFLAGS"
|
||||
fi
|
||||
|
||||
#PKG-CONFIG
|
||||
#mkl-dynamic-lp64-seq
|
||||
@ -369,6 +461,9 @@ LDFLAGS:........: ${LDFLAGS}
|
||||
LIBS............: ${LIBS}
|
||||
USE CHAMELEON...: ${with_chameleon}
|
||||
HPC version.....: ${HAVE_HPC}
|
||||
OpenMP offload..: ${HAVE_OPENMP_OFFLOAD}
|
||||
OpenACC offload.: ${HAVE_OPENACC_OFFLOAD}
|
||||
cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD}
|
||||
|
||||
Package features:
|
||||
${ARGS}
|
||||
|
@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
|
||||
}
|
||||
}
|
||||
|
||||
rc = QMCKL_SUCCESS;
|
||||
#ifdef HAVE_HPC
|
||||
rc = qmckl_finalize_basis_hpc(context);
|
||||
#else
|
||||
rc = QMCKL_SUCCESS;
|
||||
#endif
|
||||
|
||||
return rc;
|
||||
|
@ -84,8 +84,8 @@ are not intended to be passed to external codes.
|
||||
|
||||
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
|
||||
typedef struct qmckl_vector {
|
||||
int64_t size;
|
||||
double* restrict data;
|
||||
int64_t size;
|
||||
} qmckl_vector;
|
||||
#+end_src
|
||||
|
||||
@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context,
|
||||
|
||||
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
|
||||
typedef struct qmckl_matrix {
|
||||
int64_t size[2];
|
||||
double* restrict data;
|
||||
int64_t size[2];
|
||||
} qmckl_matrix;
|
||||
#+end_src
|
||||
|
||||
@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context,
|
||||
#define QMCKL_TENSOR_ORDER_MAX 16
|
||||
|
||||
typedef struct qmckl_tensor {
|
||||
double* restrict data;
|
||||
int64_t order;
|
||||
int64_t size[QMCKL_TENSOR_ORDER_MAX];
|
||||
double* restrict data;
|
||||
} qmckl_tensor;
|
||||
#+end_src
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -655,6 +655,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, &
|
||||
end if
|
||||
end do
|
||||
end do
|
||||
info = QMCKL_SUCCESS
|
||||
|
||||
end function qmckl_compute_mo_basis_mo_vgl_doc_f
|
||||
#+end_src
|
||||
@ -790,7 +791,6 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
double av4[ao_num];
|
||||
double av5[ao_num];
|
||||
for (int64_t k=0 ; k<ao_num ; ++k) {
|
||||
const double* restrict ck1 = coef_normalized_t + k*mo_num;
|
||||
if (avgl1[k] != 0.) {
|
||||
idx[nidx] = k;
|
||||
av1[nidx] = avgl1[k];
|
||||
@ -804,7 +804,6 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
|
||||
int64_t n;
|
||||
for (n=0 ; n < nidx-4 ; n+=4) {
|
||||
int64_t k = idx[n];
|
||||
const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num;
|
||||
const double* restrict ck2 = coef_normalized_t + idx[n+1]*mo_num;
|
||||
const double* restrict ck3 = coef_normalized_t + idx[n+2]*mo_num;
|
||||
@ -849,13 +848,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
|
||||
int64_t n0 = nidx-4;
|
||||
n0 = n0 < 0 ? 0 : n0;
|
||||
for (int64_t n=n0 ; n < nidx ; n+=1) {
|
||||
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
|
||||
const double a1 = av1[n];
|
||||
const double a2 = av2[n];
|
||||
const double a3 = av3[n];
|
||||
const double a4 = av4[n];
|
||||
const double a5 = av5[n];
|
||||
for (int64_t m=n0 ; m < nidx ; m+=1) {
|
||||
const double* restrict ck = coef_normalized_t + idx[m]*mo_num;
|
||||
const double a1 = av1[m];
|
||||
const double a2 = av2[m];
|
||||
const double a3 = av3[m];
|
||||
const double a4 = av4[m];
|
||||
const double a5 = av5[m];
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
|
@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant);
|
||||
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
||||
(void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
|
||||
later = later + l;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user