diff --git a/configure.ac b/configure.ac
index 0ec94b8..17350c7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -304,9 +304,10 @@ AS_IF([test "$enable_gpu" = "openacc"], [
 ])
 
 # cuBLAS offloading
-AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no)
-AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
+HAVE_OPENACC_OFFLOAD="no"
+AS_IF([test "$enable_gpu" = "cublas"], [
   AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
+  HAVE_CUBLAS_OFFLOAD="yes"
   case $CC in
 
     *gcc*)
diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org
index adf66d1..fb3cf3e 100644
--- a/org/qmckl_jastrow.org
+++ b/org/qmckl_jastrow.org
@@ -118,7 +118,6 @@ int main() {
 #include "qmckl_jastrow_private_type.h"
 
 #ifdef HAVE_CUBLAS_OFFLOAD
-#include <cuda_runtime_api.h>
 #include "cublas_v2.h"
 #endif
 
@@ -6229,30 +6228,6 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
 {
   qmckl_exit_code info;
 
-  //Initialisation of cublas
-
-  cublasHandle_t handle;
-  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
-    {
-      fprintf(stdout, "CUBLAS initialization failed!\n");
-      exit(EXIT_FAILURE);
-    }
-
-
-
-  qmckl_exit_code info;
-
-  //Initialisation of cublas
-
-  cublasHandle_t handle;
-  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
-    {
-      fprintf(stdout, "CUBLAS initialization failed!\n");
-      exit(EXIT_FAILURE);
-    }
-
-
-
   if (context == QMCKL_NULL_CONTEXT) {
     return QMCKL_INVALID_CONTEXT;
   }
@@ -6269,6 +6244,14 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
     return QMCKL_INVALID_ARG_4;
   }
 
+  //cuBLAS initialization
+  cublasHandle_t handle;
+  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
+  {
+    fprintf(stdout, "CUBLAS initialization failed!\n");
+    exit(EXIT_FAILURE);
+  }
+
   const double alpha = 1.0;
   const double beta  = 0.0;
 
@@ -6284,45 +6267,24 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
   const int64_t bf = elec_num*nucl_num*(cord_num+1);
   const int64_t cf = bf;
 
-
   #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
   #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
   {
   for (int nw=0; nw < walk_num; ++nw) {
 
-      int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
-                                    &(een_rescaled_e[nw*(cord_num+1)]), \
-                                    LDA, af,                                \
-                                    &(een_rescaled_n[bf*nw]),           \
-                                    LDB, 0,                            \
-                                    &beta,                               \
-                                    &(tmp_c[nw*cord_num]),       \
+    int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
+                                    &(een_rescaled_e[nw*(cord_num+1)]),
+                                    LDA, af,
+                                    &(een_rescaled_n[bf*nw]),
+                                    LDB, 0,
+                                    &beta,
+                                    &(tmp_c[nw*cord_num]),
                                     LDC, cf, cord_num);
-
-
-        //Manage cublas ERROR
-      if(cublasError != CUBLAS_STATUS_SUCCESS){
-            printf("CUBLAS ERROR %d", cublasError);
-            info = QMCKL_FAILURE;
-
-            return info;
-      }else{
-        info = QMCKL_SUCCESS;
-      }
-
-
-
-    
   }
   }
-  cudaDeviceSynchronize();
-  cublasDestroy(handle);
-
-
   #pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
 
-
-
+  cublasDestroy(handle);
   return info;
   }
 #endif
@@ -6801,15 +6763,6 @@ qmckl_compute_dtmp_c_cublas_offload (
       const double* een_rescaled_n,
       double* const dtmp_c ) {
 
-  cublasHandle_t handle;
-  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
-    {
-      fprintf(stdout, "CUBLAS initialization failed!\n");
-      exit(EXIT_FAILURE);
-    }
-
-
-
   if (context == QMCKL_NULL_CONTEXT) {
     return QMCKL_INVALID_CONTEXT;
   }
@@ -6832,6 +6785,14 @@ qmckl_compute_dtmp_c_cublas_offload (
 
   qmckl_exit_code  info = QMCKL_SUCCESS;
 
+  //cuBLAS initialization
+  cublasHandle_t handle;
+  if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
+  {
+    fprintf(stdout, "CUBLAS initialization failed!\n");
+    exit(EXIT_FAILURE);
+  }
+
   const double alpha = 1.0;
   const double beta  = 0.0;
 
@@ -6847,38 +6808,24 @@ qmckl_compute_dtmp_c_cublas_offload (
   const int64_t bf = elec_num*nucl_num*(cord_num+1);
   const int64_t cf = elec_num*4*nucl_num*(cord_num+1);
 
-#pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num])
-#pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c)
+  #pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num])
+  #pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c)
   {
-    for (int64_t nw=0; nw < walk_num; ++nw) {
-           //Manage CUBLAS ERRORS
-
- int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,        \
-                                      &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]), \
-                                      LDA, af,                                               \
-                                      &(een_rescaled_n[bf*nw]),   \
-                                      LDB, 0,                                               \
-                                      &beta,                                              \
-                                      &(dtmp_c[(nw*cord_num)]), \
+  for (int64_t nw=0; nw < walk_num; ++nw) {
+    int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
+                                      &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]),
+                                      LDA, af,
+                                      &(een_rescaled_n[bf*nw]), LDB, 0,
+                                      &beta,
+                                      &(dtmp_c[(nw*cord_num)]),
                                       LDC, cf, cord_num);
 
-
-        if(cublasError != CUBLAS_STATUS_SUCCESS){
-        printf("CUBLAS ERROR %d", cublasError);
-        info = QMCKL_FAILURE;
-        return info;
-        }else{
-        info = QMCKL_SUCCESS;
-        }
-
-      //}
-    }
   }
-  cudaDeviceSynchronize();
+  }
+
+  #pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num])
+
   cublasDestroy(handle);
-
-#pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num])
-
   return info;
 }
 #endif