mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Merge branch 'gpu' into gpu
This commit is contained in:
commit
9b806aa071
11
configure.ac
11
configure.ac
@ -304,26 +304,27 @@ AS_IF([test "$enable_gpu" = "openacc"], [
|
|||||||
])
|
])
|
||||||
|
|
||||||
# cuBLAS offloading
|
# cuBLAS offloading
|
||||||
AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no)
|
AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no)
|
||||||
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
||||||
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
|
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
|
||||||
case $CC in
|
case $CC in
|
||||||
|
|
||||||
*gcc*)
|
*gcc*)
|
||||||
CFLAGS="$CFLAGS -fopenacc"
|
CFLAGS="$CFLAGS -fopenmp"
|
||||||
|
LDFLAGS="-lcublas"
|
||||||
;;
|
;;
|
||||||
*nvc*)
|
*nvc*)
|
||||||
CFLAGS="$CFLAGS -acc=gpu"
|
CFLAGS="$CFLAGS -mp=gpu -cudalib=cublas"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
case $FC in
|
case $FC in
|
||||||
|
|
||||||
*gfortran*)
|
*gfortran*)
|
||||||
FCFLAGS="$FCFLAGS -fopenacc"
|
FCFLAGS="$FCFLAGS -fopenmp"
|
||||||
;;
|
;;
|
||||||
*nvfortran*)
|
*nvfortran*)
|
||||||
FCFLAGS="$FCFLAGS -acc=gpu"
|
FCFLAGS="$FCFLAGS -mp=gpu -cudalib=cublas"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
])
|
])
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
\[
|
\[
|
||||||
J(\mathbf{r},\mathbf{R}) = J_{\text{eN}}(\mathbf{r},\mathbf{R}) + J_{\text{ee}}(\mathbf{r}) + J_{\text{eeN}}(\mathbf{r},\mathbf{R})
|
J(\mathbf{r},\mathbf{R}) = J_{\text{eN}}(\mathbf{r},\mathbf{R}) + J_{\text{ee}}(\mathbf{r}) + J_{\text{eeN}}(\mathbf{r},\mathbf{R})
|
||||||
\]
|
\]
|
||||||
|
|
||||||
In the following, we us the notations $r_{ij} = |\mathbf{r}_i - \mathbf{r}_j|$ and
|
In the following, we us the notations $r_{ij} = |\mathbf{r}_i - \mathbf{r}_j|$ and
|
||||||
$R_{i\alpha} = |\mathbf{r}_i - \mathbf{R}_\alpha|$.
|
$R_{i\alpha} = |\mathbf{r}_i - \mathbf{R}_\alpha|$.
|
||||||
|
|
||||||
@ -58,7 +58,6 @@
|
|||||||
The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that
|
The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that
|
||||||
$J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero.
|
$J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero.
|
||||||
|
|
||||||
|
|
||||||
* Headers :noexport:
|
* Headers :noexport:
|
||||||
#+begin_src elisp :noexport :results none
|
#+begin_src elisp :noexport :results none
|
||||||
(org-babel-lob-ingest "../tools/lib.org")
|
(org-babel-lob-ingest "../tools/lib.org")
|
||||||
@ -2352,6 +2351,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc &
|
|||||||
const double* ee_distance_rescaled,
|
const double* ee_distance_rescaled,
|
||||||
const double* ee_distance_rescaled_deriv_e,
|
const double* ee_distance_rescaled_deriv_e,
|
||||||
double* const factor_ee_deriv_e );
|
double* const factor_ee_deriv_e );
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
#+begin_src c :tangle (eval h_private_func) :comments org
|
#+begin_src c :tangle (eval h_private_func) :comments org
|
||||||
@ -2367,6 +2367,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc &
|
|||||||
double* const factor_ee_deriv_e );
|
double* const factor_ee_deriv_e );
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
qmckl_exit_code qmckl_compute_factor_ee_deriv_e (
|
qmckl_exit_code qmckl_compute_factor_ee_deriv_e (
|
||||||
const qmckl_context context,
|
const qmckl_context context,
|
||||||
@ -6239,6 +6240,19 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
qmckl_exit_code info;
|
||||||
|
|
||||||
|
//Initialisation of cublas
|
||||||
|
|
||||||
|
cublasHandle_t handle;
|
||||||
|
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
|
||||||
|
{
|
||||||
|
fprintf(stdout, "CUBLAS initialization failed!\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (context == QMCKL_NULL_CONTEXT) {
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
return QMCKL_INVALID_CONTEXT;
|
return QMCKL_INVALID_CONTEXT;
|
||||||
}
|
}
|
||||||
@ -6270,6 +6284,7 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
||||||
const int64_t cf = bf;
|
const int64_t cf = bf;
|
||||||
|
|
||||||
|
|
||||||
#pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
|
#pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
|
||||||
#pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
|
#pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
|
||||||
{
|
{
|
||||||
@ -6284,25 +6299,35 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
&(tmp_c[nw*cord_num]), \
|
&(tmp_c[nw*cord_num]), \
|
||||||
LDC, cf, cord_num);
|
LDC, cf, cord_num);
|
||||||
|
|
||||||
|
|
||||||
//Manage cublas ERROR
|
//Manage cublas ERROR
|
||||||
if(cublasError != CUBLAS_STATUS_SUCCESS){
|
if(cublasError != CUBLAS_STATUS_SUCCESS){
|
||||||
printf("CUBLAS ERROR %d", cublasError);
|
printf("CUBLAS ERROR %d", cublasError);
|
||||||
info = QMCKL_FAILURE;
|
info = QMCKL_FAILURE;
|
||||||
|
|
||||||
|
return info;
|
||||||
}else{
|
}else{
|
||||||
info = QMCKL_SUCCESS;
|
info = QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
cudaDeviceSynchronize();
|
||||||
cublasDestroy(handle);
|
cublasDestroy(handle);
|
||||||
|
|
||||||
|
|
||||||
#pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
|
#pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
|
#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
|
||||||
|
Loading…
Reference in New Issue
Block a user