mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-11-19 20:42:50 +01:00
Fix cublas
This commit is contained in:
parent
a7fac59f04
commit
47d63aa9d3
@ -310,20 +310,21 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
|
|||||||
case $CC in
|
case $CC in
|
||||||
|
|
||||||
*gcc*)
|
*gcc*)
|
||||||
CFLAGS="$CFLAGS -fopenacc"
|
CFLAGS="$CFLAGS -fopenmp"
|
||||||
|
LDFLAGS="-lcublas"
|
||||||
;;
|
;;
|
||||||
*nvc*)
|
*nvc*)
|
||||||
CFLAGS="$CFLAGS -acc=gpu"
|
CFLAGS="$CFLAGS -mp=gpu -cudalib=cublas"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
case $FC in
|
case $FC in
|
||||||
|
|
||||||
*gfortran*)
|
*gfortran*)
|
||||||
FCFLAGS="$FCFLAGS -fopenacc"
|
FCFLAGS="$FCFLAGS -fopenmp"
|
||||||
;;
|
;;
|
||||||
*nvfortran*)
|
*nvfortran*)
|
||||||
FCFLAGS="$FCFLAGS -acc=gpu"
|
FCFLAGS="$FCFLAGS -mp=gpu -cudalib=cublas"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
])
|
])
|
||||||
|
@ -5870,14 +5870,15 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context,
|
|||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
#ifdef HAVE_OPENACC_OFFLOAD
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
|
qmckl_exit_code
|
||||||
const int64_t cord_num,
|
qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
|
||||||
const int64_t elec_num,
|
const int64_t cord_num,
|
||||||
const int64_t nucl_num,
|
const int64_t elec_num,
|
||||||
const int64_t walk_num,
|
const int64_t nucl_num,
|
||||||
const double* een_rescaled_e,
|
const int64_t walk_num,
|
||||||
const double* een_rescaled_n,
|
const double* een_rescaled_e,
|
||||||
double* const tmp_c )
|
const double* een_rescaled_n,
|
||||||
|
double* const tmp_c )
|
||||||
{
|
{
|
||||||
|
|
||||||
if (context == QMCKL_NULL_CONTEXT) {
|
if (context == QMCKL_NULL_CONTEXT) {
|
||||||
@ -6062,6 +6063,7 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
|
|||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
|
qmckl_exit_code
|
||||||
qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
||||||
const int64_t cord_num,
|
const int64_t cord_num,
|
||||||
const int64_t elec_num,
|
const int64_t elec_num,
|
||||||
@ -6116,16 +6118,19 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
||||||
const int64_t cf = bf;
|
const int64_t cf = bf;
|
||||||
|
|
||||||
|
info = QMCKL_SUCCESS;
|
||||||
|
|
||||||
|
|
||||||
#pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
|
#pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
|
||||||
#pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
|
#pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#pragma omp target teams distribute parallel for collapse(2)
|
||||||
for (int nw=0; nw < walk_num; ++nw) {
|
for (int nw=0; nw < walk_num; ++nw) {
|
||||||
for (int i=0; i<cord_num; ++i){
|
for (int i=0; i<cord_num; ++i){
|
||||||
|
|
||||||
//CuBlas implementation
|
cublasStatus_t cublasError =
|
||||||
int cublasError = cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
|
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
|
||||||
&(een_rescaled_e[af*(i+nw*(cord_num+1))]), \
|
&(een_rescaled_e[af*(i+nw*(cord_num+1))]), \
|
||||||
LDA, \
|
LDA, \
|
||||||
&(een_rescaled_n[bf*nw]), \
|
&(een_rescaled_n[bf*nw]), \
|
||||||
@ -6134,6 +6139,7 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
&(tmp_c[cf*(i+nw*cord_num)]), \
|
&(tmp_c[cf*(i+nw*cord_num)]), \
|
||||||
LDC);
|
LDC);
|
||||||
|
|
||||||
|
/*
|
||||||
//Manage cublas ERROR
|
//Manage cublas ERROR
|
||||||
if(cublasError != CUBLAS_STATUS_SUCCESS){
|
if(cublasError != CUBLAS_STATUS_SUCCESS){
|
||||||
printf("CUBLAS ERROR %d", cublasError);
|
printf("CUBLAS ERROR %d", cublasError);
|
||||||
@ -6142,6 +6148,7 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
}else{
|
}else{
|
||||||
info = QMCKL_SUCCESS;
|
info = QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -6162,7 +6169,8 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
|
|||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
|
#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
|
||||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (
|
qmckl_exit_code
|
||||||
|
qmckl_compute_tmp_c_cublas_offload (
|
||||||
const qmckl_context context,
|
const qmckl_context context,
|
||||||
const int64_t cord_num,
|
const int64_t cord_num,
|
||||||
const int64_t elec_num,
|
const int64_t elec_num,
|
||||||
@ -6419,7 +6427,8 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
|||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
#ifdef HAVE_OPENACC_OFFLOAD
|
#ifdef HAVE_OPENACC_OFFLOAD
|
||||||
qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
qmckl_exit_code
|
||||||
|
qmckl_compute_dtmp_c_acc_offload (
|
||||||
const qmckl_context context,
|
const qmckl_context context,
|
||||||
const int64_t cord_num,
|
const int64_t cord_num,
|
||||||
const int64_t elec_num,
|
const int64_t elec_num,
|
||||||
@ -6570,7 +6579,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
|
|||||||
#pragma omp target teams distribute parallel for collapse(6) \
|
#pragma omp target teams distribute parallel for collapse(6) \
|
||||||
map(to:een_rescaled_e_deriv_e[0:size_e], \
|
map(to:een_rescaled_e_deriv_e[0:size_e], \
|
||||||
een_rescaled_n[0:size_n]), \
|
een_rescaled_n[0:size_n]), \
|
||||||
map(tofrom:dtmp_c[0:size_dtmp_c])
|
map(from:dtmp_c[0:size_dtmp_c])
|
||||||
for (int nw=0; nw < walk_num; nw++) {
|
for (int nw=0; nw < walk_num; nw++) {
|
||||||
for (int i=0; i < cord_num; i++) {
|
for (int i=0; i < cord_num; i++) {
|
||||||
|
|
||||||
@ -6618,7 +6627,8 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
|
|||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||||
qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload (
|
qmckl_exit_code
|
||||||
|
qmckl_compute_dtmp_c_cublas_offload (
|
||||||
const qmckl_context context,
|
const qmckl_context context,
|
||||||
const int64_t cord_num,
|
const int64_t cord_num,
|
||||||
const int64_t elec_num,
|
const int64_t elec_num,
|
||||||
|
Loading…
Reference in New Issue
Block a user