1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-23 04:44:03 +01:00

Merge branch 'gpu' into gpu

This commit is contained in:
Max Hoffer 2022-04-08 10:43:42 +02:00 committed by GitHub
commit 9b806aa071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 10 deletions

View File

@ -304,26 +304,27 @@ AS_IF([test "$enable_gpu" = "openacc"], [
]) ])
# cuBLAS offloading # cuBLAS offloading
AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no)
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
case $CC in case $CC in
*gcc*) *gcc*)
CFLAGS="$CFLAGS -fopenacc" CFLAGS="$CFLAGS -fopenmp"
LDFLAGS="-lcublas"
;; ;;
*nvc*) *nvc*)
CFLAGS="$CFLAGS -acc=gpu" CFLAGS="$CFLAGS -mp=gpu -cudalib=cublas"
;; ;;
esac esac
case $FC in case $FC in
*gfortran*) *gfortran*)
FCFLAGS="$FCFLAGS -fopenacc" FCFLAGS="$FCFLAGS -fopenmp"
;; ;;
*nvfortran*) *nvfortran*)
FCFLAGS="$FCFLAGS -acc=gpu" FCFLAGS="$FCFLAGS -mp=gpu -cudalib=cublas"
;; ;;
esac esac
]) ])

View File

@ -58,7 +58,6 @@
The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that
$J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero. $J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero.
* Headers :noexport: * Headers :noexport:
#+begin_src elisp :noexport :results none #+begin_src elisp :noexport :results none
(org-babel-lob-ingest "../tools/lib.org") (org-babel-lob-ingest "../tools/lib.org")
@ -2352,6 +2351,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc &
const double* ee_distance_rescaled, const double* ee_distance_rescaled,
const double* ee_distance_rescaled_deriv_e, const double* ee_distance_rescaled_deriv_e,
double* const factor_ee_deriv_e ); double* const factor_ee_deriv_e );
#+end_src #+end_src
#+begin_src c :tangle (eval h_private_func) :comments org #+begin_src c :tangle (eval h_private_func) :comments org
@ -2367,6 +2367,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc &
double* const factor_ee_deriv_e ); double* const factor_ee_deriv_e );
#+end_src #+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes #+begin_src c :comments org :tangle (eval c) :noweb yes
qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( qmckl_exit_code qmckl_compute_factor_ee_deriv_e (
const qmckl_context context, const qmckl_context context,
@ -6239,6 +6240,19 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
qmckl_exit_code info;
//Initialisation of cublas
cublasHandle_t handle;
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
{
fprintf(stdout, "CUBLAS initialization failed!\n");
exit(EXIT_FAILURE);
}
if (context == QMCKL_NULL_CONTEXT) { if (context == QMCKL_NULL_CONTEXT) {
return QMCKL_INVALID_CONTEXT; return QMCKL_INVALID_CONTEXT;
} }
@ -6270,6 +6284,7 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t bf = elec_num*nucl_num*(cord_num+1);
const int64_t cf = bf; const int64_t cf = bf;
#pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
#pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
{ {
@ -6284,18 +6299,30 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
&(tmp_c[nw*cord_num]), \ &(tmp_c[nw*cord_num]), \
LDC, cf, cord_num); LDC, cf, cord_num);
//Manage cublas ERROR //Manage cublas ERROR
if(cublasError != CUBLAS_STATUS_SUCCESS){ if(cublasError != CUBLAS_STATUS_SUCCESS){
printf("CUBLAS ERROR %d", cublasError); printf("CUBLAS ERROR %d", cublasError);
info = QMCKL_FAILURE; info = QMCKL_FAILURE;
return info;
}else{ }else{
info = QMCKL_SUCCESS; info = QMCKL_SUCCESS;
} }
} }
} }
cudaDeviceSynchronize();
cublasDestroy(handle); cublasDestroy(handle);
#pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
return info; return info;
} }
#endif #endif
@ -6303,8 +6330,6 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
#+end_src #+end_src
#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
#ifdef HAVE_CUBLAS_OFFLOAD #ifdef HAVE_CUBLAS_OFFLOAD
qmckl_exit_code qmckl_exit_code