diff --git a/configure.ac b/configure.ac
index d3c9471..055ca86 100644
--- a/configure.ac
+++ b/configure.ac
@@ -310,20 +310,21 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [
   case $CC in
 
     *gcc*)
-          CFLAGS="$CFLAGS -fopenacc"
+          CFLAGS="$CFLAGS -fopenmp"
+          LDFLAGS="-lcublas"
           ;;
     *nvc*)
-          CFLAGS="$CFLAGS -acc=gpu"
+          CFLAGS="$CFLAGS -mp=gpu -cudalib=cublas"
           ;;
   esac
 
   case $FC in
 
     *gfortran*)
-          FCFLAGS="$FCFLAGS -fopenacc"
+          FCFLAGS="$FCFLAGS -fopenmp"
           ;;
     *nvfortran*)
-          FCFLAGS="$FCFLAGS -acc=gpu"
+          FCFLAGS="$FCFLAGS -mp=gpu -cudalib=cublas"
           ;;
   esac
 ])
diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org
index e1fc423..1e17c11 100644
--- a/org/qmckl_jastrow.org
+++ b/org/qmckl_jastrow.org
@@ -5870,14 +5870,15 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context,
 
      #+begin_src c :comments org :tangle (eval c) :noweb yes
 #ifdef HAVE_OPENACC_OFFLOAD
-qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
-                                                 const int64_t cord_num,
-                                                 const int64_t elec_num,
-                                                 const int64_t nucl_num,
-                                                 const int64_t walk_num,
-                                                 const double* een_rescaled_e,
-                                                 const double* een_rescaled_n,
-                                                 double* const tmp_c )
+qmckl_exit_code
+qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
+                                 const int64_t cord_num,
+                                 const int64_t elec_num,
+                                 const int64_t nucl_num,
+                                 const int64_t walk_num,
+                                 const double* een_rescaled_e,
+                                 const double* een_rescaled_n,
+                                 double* const tmp_c )
 {
 
   if (context == QMCKL_NULL_CONTEXT) {
@@ -6062,6 +6063,7 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
 
 #+begin_src c :comments org :tangle (eval c) :noweb yes
 #ifdef HAVE_CUBLAS_OFFLOAD
+qmckl_exit_code
 qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
                                     const int64_t cord_num,
                                     const int64_t elec_num,
@@ -6116,16 +6118,19 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
   const int64_t bf = elec_num*nucl_num*(cord_num+1);
   const int64_t cf = bf;
 
+  info = QMCKL_SUCCESS;
+
+
   #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
   #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
   {
 
-
+#pragma omp target teams distribute parallel for collapse(2) 
   for (int nw=0; nw < walk_num; ++nw) {
     for (int i=0; i<cord_num; ++i){
 
-        //CuBlas implementation
-      int cublasError = cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
+      cublasStatus_t cublasError =
+                        cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
                                     &(een_rescaled_e[af*(i+nw*(cord_num+1))]), \
                                     LDA,                                \
                                     &(een_rescaled_n[bf*nw]),           \
@@ -6134,6 +6139,7 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
                                     &(tmp_c[cf*(i+nw*cord_num)]),       \
                                     LDC);
 
+/*
         //Manage cublas ERROR
       if(cublasError != CUBLAS_STATUS_SUCCESS){
             printf("CUBLAS ERROR %d", cublasError);
@@ -6142,6 +6148,7 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
       }else{
         info = QMCKL_SUCCESS;
       }
+*/
 
 
     }
@@ -6162,7 +6169,8 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
 
     #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
 #ifdef HAVE_CUBLAS_OFFLOAD
-    qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (
+qmckl_exit_code
+qmckl_compute_tmp_c_cublas_offload (
       const qmckl_context context,
       const int64_t cord_num,
       const int64_t elec_num,
@@ -6419,7 +6427,8 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
 
      #+begin_src c :comments org :tangle (eval c) :noweb yes
 #ifdef HAVE_OPENACC_OFFLOAD
-qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
+qmckl_exit_code
+qmckl_compute_dtmp_c_acc_offload (
       const qmckl_context context,
       const int64_t cord_num,
       const int64_t elec_num,
@@ -6570,7 +6579,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
 #pragma omp target teams distribute parallel for collapse(6) \
             map(to:een_rescaled_e_deriv_e[0:size_e], \
                    een_rescaled_n[0:size_n]), \
-            map(tofrom:dtmp_c[0:size_dtmp_c])
+            map(from:dtmp_c[0:size_dtmp_c])
       for (int nw=0; nw < walk_num; nw++) {
         for (int i=0; i < cord_num; i++) {
 
@@ -6618,7 +6627,8 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
 
      #+begin_src c :comments org :tangle (eval c) :noweb yes
 #ifdef HAVE_CUBLAS_OFFLOAD
-qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload (
+qmckl_exit_code
+qmckl_compute_dtmp_c_cublas_offload (
       const qmckl_context context,
       const int64_t cord_num,
       const int64_t elec_num,