Configure cuBLAS with --enable-gpu and clean code

2024-12-22 12:23:56 +01:00 · 2022-04-08 11:11:15 +02:00 · 2022-04-08 11:11:15 +02:00 · ad531dddf9
commit ad531dddf9
parent 54f60480fa
2 changed files with 40 additions and 92 deletions
--- a/configure.ac
+++ b/configure.ac
@ -304,9 +304,10 @@ AS_IF([test "$enable_gpu" = "openacc"], [
 ])

 # cuBLAS offloading
-AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no)
-AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
+HAVE_OPENACC_OFFLOAD="no"
+AS_IF([test "$enable_gpu" = "cublas"], [
  AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
+  HAVE_CUBLAS_OFFLOAD="yes"
  case $CC in

    *gcc*)
--- a/org/qmckl_jastrow.org
+++ b/org/qmckl_jastrow.org
@ -118,7 +118,6 @@ int main() {
 #include "qmckl_jastrow_private_type.h"

 #ifdef HAVE_CUBLAS_OFFLOAD
-#include <cuda_runtime_api.h>
 #include "cublas_v2.h"
 #endif

@ -6229,30 +6228,6 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
 {
  qmckl_exit_code info;

-  //Initialisation of cublas
-
-  cublasHandle_t handle;
-  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
-    {
-      fprintf(stdout, "CUBLAS initialization failed!\n");
-      exit(EXIT_FAILURE);
-    }
-
-
-
-  qmckl_exit_code info;
-
-  //Initialisation of cublas
-
-  cublasHandle_t handle;
-  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
-    {
-      fprintf(stdout, "CUBLAS initialization failed!\n");
-      exit(EXIT_FAILURE);
-    }
-
-
-
  if (context == QMCKL_NULL_CONTEXT) {
    return QMCKL_INVALID_CONTEXT;
  }
@ -6269,6 +6244,14 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
    return QMCKL_INVALID_ARG_4;
  }

+  //cuBLAS initialization
+  cublasHandle_t handle;
+  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
+  {
+    fprintf(stdout, "CUBLAS initialization failed!\n");
+    exit(EXIT_FAILURE);
+  }
+
  const double alpha = 1.0;
  const double beta  = 0.0;

@ -6284,45 +6267,24 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
  const int64_t bf = elec_num*nucl_num*(cord_num+1);
  const int64_t cf = bf;

-
  #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
  #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
  {
  for (int nw=0; nw < walk_num; ++nw) {

-      int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
-                                    &(een_rescaled_e[nw*(cord_num+1)]), \
-                                    LDA, af,                                \
-                                    &(een_rescaled_n[bf*nw]),           \
-                                    LDB, 0,                            \
-                                    &beta,                               \
-                                    &(tmp_c[nw*cord_num]),       \
+    int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
+                                    &(een_rescaled_e[nw*(cord_num+1)]),
+                                    LDA, af,
+                                    &(een_rescaled_n[bf*nw]),
+                                    LDB, 0,
+                                    &beta,
+                                    &(tmp_c[nw*cord_num]),
                                    LDC, cf, cord_num);
-
-
-        //Manage cublas ERROR
-      if(cublasError != CUBLAS_STATUS_SUCCESS){
-            printf("CUBLAS ERROR %d", cublasError);
-            info = QMCKL_FAILURE;
-
-            return info;
-      }else{
-        info = QMCKL_SUCCESS;
-      }
-
-
-
-    
  }
  }
-  cudaDeviceSynchronize();
-  cublasDestroy(handle);
-
-
  #pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])

-
-
+  cublasDestroy(handle);
  return info;
  }
 #endif
@ -6801,15 +6763,6 @@ qmckl_compute_dtmp_c_cublas_offload (
      const double* een_rescaled_n,
      double* const dtmp_c ) {

-  cublasHandle_t handle;
-  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
-    {
-      fprintf(stdout, "CUBLAS initialization failed!\n");
-      exit(EXIT_FAILURE);
-    }
-
-
-
  if (context == QMCKL_NULL_CONTEXT) {
    return QMCKL_INVALID_CONTEXT;
  }
@ -6832,6 +6785,14 @@ qmckl_compute_dtmp_c_cublas_offload (

  qmckl_exit_code  info = QMCKL_SUCCESS;

+  //cuBLAS initialization
+  cublasHandle_t handle;
+  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
+  {
+    fprintf(stdout, "CUBLAS initialization failed!\n");
+    exit(EXIT_FAILURE);
+  }
+
  const double alpha = 1.0;
  const double beta  = 0.0;

@ -6847,38 +6808,24 @@ qmckl_compute_dtmp_c_cublas_offload (
  const int64_t bf = elec_num*nucl_num*(cord_num+1);
  const int64_t cf = elec_num*4*nucl_num*(cord_num+1);

-#pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num])
-#pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c)
+  #pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num])
+  #pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c)
  {
-    for (int64_t nw=0; nw < walk_num; ++nw) {
-           //Manage CUBLAS ERRORS
-
- int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,        \
-                                      &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]), \
-                                      LDA, af,                                               \
-                                      &(een_rescaled_n[bf*nw]),   \
-                                      LDB, 0,                                               \
-                                      &beta,                                              \
-                                      &(dtmp_c[(nw*cord_num)]), \
+  for (int64_t nw=0; nw < walk_num; ++nw) {
+    int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
+                                      &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]),
+                                      LDA, af,
+                                      &(een_rescaled_n[bf*nw]), LDB, 0,
+                                      &beta,
+                                      &(dtmp_c[(nw*cord_num)]),
                                      LDC, cf, cord_num);

-
-        if(cublasError != CUBLAS_STATUS_SUCCESS){
-        printf("CUBLAS ERROR %d", cublasError);
-        info = QMCKL_FAILURE;
-        return info;
-        }else{
-        info = QMCKL_SUCCESS;
-        }
-
-      //}
-    }
  }
-  cudaDeviceSynchronize();
+  }
+
+  #pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num])
+
  cublasDestroy(handle);
-
-#pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num])
-
  return info;
 }
 #endif