From 39bcc569e0d6d7f5c9fc35f26919f47a8886b83d Mon Sep 17 00:00:00 2001 From: hoffer Date: Wed, 6 Apr 2022 11:16:17 +0200 Subject: [PATCH 1/3] Start implementing cublas --- org/qmckl_jastrow.org | 137 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..1cbd030 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -108,6 +108,12 @@ int main() { #include #include + +#include +#include "cublas_v2.h" + + + #include #include "qmckl.h" @@ -4857,7 +4863,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } - +/* qmckl_exit_code rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, @@ -4870,6 +4876,20 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } +,*/ + qmckl_exit_code rc = + qmckl_compute_tmp_c_cuBlas(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + ctx->jastrow.tmp_c_date = ctx->date; } @@ -4899,7 +4919,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4910,7 +4930,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) } ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, @@ -4924,6 +4943,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) return rc; } + ctx->jastrow.dtmp_c_date = ctx->date; } @@ -5453,6 +5473,105 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( } #+end_src +#+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + + qmckl_exit_code info; + + //Initialisation of cublas + + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + const double alpha = 1.0; + const double beta = 0.0; + + const int64_t M = elec_num; + const int64_t N = nucl_num*(cord_num + 1); + const int64_t K = elec_num; + + const int64_t LDA = elec_num; + const int64_t LDB = elec_num; + const int64_t LDC = elec_num; + + const int64_t af = elec_num*elec_num; + const int64_t bf = elec_num*nucl_num*(cord_num+1); + const int64_t cf = bf; + + const double* tmp_c_gpu = malloc(sizeof(tmp_c)); + + #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*K],tmp_c_gpu[0:sizeof(tmp_c_gpu)/sizeof(double)]) + #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) + { + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; i Date: Wed, 6 Apr 2022 16:20:29 +0200 Subject: [PATCH 2/3] Add openmp and cublas --- org/qmckl_jastrow.org | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 1cbd030..2ac3438 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5528,11 +5528,11 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - const double* tmp_c_gpu = malloc(sizeof(tmp_c)); - - #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*K],tmp_c_gpu[0:sizeof(tmp_c_gpu)/sizeof(double)]) + #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { + + for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Wed, 6 Apr 2022 17:04:00 +0200 Subject: [PATCH 3/3] Ok for openmp and Cublas --- org/qmckl_jastrow.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 2ac3438..5d164ad 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5528,7 +5528,7 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) + #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) {