From 39bcc569e0d6d7f5c9fc35f26919f47a8886b83d Mon Sep 17 00:00:00 2001 From: hoffer Date: Wed, 6 Apr 2022 11:16:17 +0200 Subject: [PATCH 1/7] Start implementing cublas --- org/qmckl_jastrow.org | 137 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..1cbd030 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -108,6 +108,12 @@ int main() { #include #include + +#include +#include "cublas_v2.h" + + + #include #include "qmckl.h" @@ -4857,7 +4863,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } - +/* qmckl_exit_code rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, @@ -4870,6 +4876,20 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } +,*/ + qmckl_exit_code rc = + qmckl_compute_tmp_c_cuBlas(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + ctx->jastrow.tmp_c_date = ctx->date; } @@ -4899,7 +4919,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4910,7 +4930,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) } ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, @@ -4924,6 +4943,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) return rc; } + ctx->jastrow.dtmp_c_date = ctx->date; } @@ -5453,6 +5473,105 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( } #+end_src +#+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + + qmckl_exit_code info; + + //Initialisation of cublas + + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + const double alpha = 1.0; + const double beta = 0.0; + + const int64_t M = elec_num; + const int64_t N = nucl_num*(cord_num + 1); + const int64_t K = elec_num; + + const int64_t LDA = elec_num; + const int64_t LDB = elec_num; + const int64_t LDC = elec_num; + + const int64_t af = elec_num*elec_num; + const int64_t bf = elec_num*nucl_num*(cord_num+1); + const int64_t cf = bf; + + const double* tmp_c_gpu = malloc(sizeof(tmp_c)); + + #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*K],tmp_c_gpu[0:sizeof(tmp_c_gpu)/sizeof(double)]) + #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) + { + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; i Date: Wed, 6 Apr 2022 16:20:29 +0200 Subject: [PATCH 2/7] Add openmp and cublas --- org/qmckl_jastrow.org | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 1cbd030..2ac3438 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5528,11 +5528,11 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - const double* tmp_c_gpu = malloc(sizeof(tmp_c)); - - #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*K],tmp_c_gpu[0:sizeof(tmp_c_gpu)/sizeof(double)]) + #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { + + for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Wed, 6 Apr 2022 16:26:35 +0200 Subject: [PATCH 3/7] Cleaning --- org/qmckl_jastrow.org | 1248 +++++++++++++++++++++-------------------- org/qmckl_mo.org | 2 - 2 files changed, 646 insertions(+), 604 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 35003f5..666da47 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -151,7 +151,6 @@ int main() { | ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative | | ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part | | ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative | - | ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime | computed data: @@ -328,14 +327,6 @@ kappa_inv = 1.0/kappa ** Data structure -#+begin_src c :comments org :tangle (eval h_type) -typedef enum qmckl_jastrow_offload_type{ - OFFLOAD_NONE, - OFFLOAD_OPENACC, - OFFLOAD_CUBLAS -} qmckl_jastrow_offload_type; -#+end_src - #+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; @@ -381,7 +372,10 @@ typedef struct qmckl_jastrow_struct{ uint64_t een_rescaled_n_deriv_e_date; bool provided; char * type; - qmckl_jastrow_offload_type offload_type; + + #ifdef HAVE_HPC + bool gpu_offload; + #endif } qmckl_jastrow_struct; #+end_src @@ -426,7 +420,6 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max); -qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type); #+end_src Along with these core functions, calculation of the jastrow factor @@ -724,32 +717,6 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context, return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return (char) 0; - } - - if (offload_type == NULL) { - return qmckl_failwith( context, - QMCKL_INVALID_ARG_2, - "qmckl_get_jastrow_offload_type", - "offload_type is a null pointer"); - } - - qmckl_context_struct* const ctx = (qmckl_context_struct*) context; - assert (ctx != NULL); - - int32_t mask = 1 << 0; - - if ( (ctx->jastrow.uninitialized & mask) != 0) { - return QMCKL_NOT_PROVIDED; - } - - *offload_type = ctx->jastrow.offload_type; - return QMCKL_SUCCESS; -} - #+end_src ** Initialization functions @@ -764,7 +731,6 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max); -qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type); #+end_src #+NAME:pre2 @@ -1101,14 +1067,6 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, <> } -qmckl_exit_code -qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type) -{ -<> - ctx->jastrow.offload_type = offload_type; - return QMCKL_SUCCESS; -} - #+end_src When the required information is completely entered, other data structures are @@ -1155,6 +1113,13 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { NULL); } + /* Decide if the Jastrow if offloaded on GPU or not */ +#if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) + ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; +#else + ctx->jastrow.gpu_offload = false; +#endif + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1540,16 +1505,16 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( } #+end_src - #+CALL: generate_c_header(table=qmckl_asymp_jasb_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_asymp_jasb_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ); #+end_src @@ -1892,19 +1857,19 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ); #+end_src @@ -2202,21 +2167,21 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & end function qmckl_compute_factor_ee_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, - double* const factor_ee_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); #+end_src @@ -2224,8 +2189,8 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & - (context, & + integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & + (context, & walk_num, & elec_num, & up_num, & @@ -2235,7 +2200,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & ee_distance_rescaled_deriv_e, & asymp_jasb, & factor_ee_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -2245,7 +2210,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer (c_int64_t) , intent(in) , value :: elec_num integer (c_int64_t) , intent(in) , value :: up_num integer (c_int64_t) , intent(in) , value :: bord_num - real (c_double ) , intent(in) :: bord_vector(bord_num + 1) + real (c_double ) , intent(in) :: bord_vector(bord_num+1) real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) real (c_double ) , intent(in) :: ee_distance_rescaled_deriv_e(elec_num,elec_num,4,walk_num) real (c_double ) , intent(in) :: asymp_jasb(2) @@ -2253,7 +2218,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f info = qmckl_compute_factor_ee_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & up_num, & @@ -2676,21 +2641,20 @@ qmckl_exit_code qmckl_compute_factor_en ( #+end_src - #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ); #+end_src @@ -2970,22 +2934,21 @@ integer function qmckl_compute_factor_en_deriv_e_f( & end function qmckl_compute_factor_en_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_en_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_en_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_en_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + const double* en_distance_rescaled_deriv_e, + double* const factor_en_deriv_e ); #+end_src @@ -2994,7 +2957,7 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_en_deriv_e & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -3005,7 +2968,7 @@ end function qmckl_compute_factor_en_deriv_e_f en_distance_rescaled, & en_distance_rescaled_deriv_e, & factor_en_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3017,14 +2980,14 @@ end function qmckl_compute_factor_en_deriv_e_f integer (c_int64_t) , intent(in) , value :: type_nucl_num integer (c_int64_t) , intent(in) :: type_nucl_vector(nucl_num) integer (c_int64_t) , intent(in) , value :: aord_num - real (c_double ) , intent(in) :: aord_vector(aord_num + 1, type_nucl_num) + real (c_double ) , intent(in) :: aord_vector(type_nucl_num,aord_num+1) real (c_double ) , intent(in) :: en_distance_rescaled(elec_num,nucl_num,walk_num) real (c_double ) , intent(in) :: en_distance_rescaled_deriv_e(elec_num,nucl_num,4,walk_num) real (c_double ) , intent(out) :: factor_en_deriv_e(elec_num,4,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_en_deriv_e_f info = qmckl_compute_factor_en_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -3366,18 +3329,17 @@ integer function qmckl_compute_een_rescaled_e_f( & end function qmckl_compute_een_rescaled_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3385,9 +3347,8 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_een_rescaled_e & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & - ee_distance, een_rescaled_e) & - bind(C) result(info) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3402,7 +3363,7 @@ end function qmckl_compute_een_rescaled_e_f integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f info = qmckl_compute_een_rescaled_e_f & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) end function qmckl_compute_een_rescaled_e #+end_src @@ -3597,7 +3558,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) *** Compute :PROPERTIES: - :Name: qmckl_compute_een_rescaled_e_deriv_e + :Name: qmckl_compute_factor_een_rescaled_e_deriv_e :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: @@ -3704,21 +3665,20 @@ integer function qmckl_compute_factor_een_rescaled_e_deriv_e_f( & end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* coord_new, - const double* ee_distance, - const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); - #+end_src + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* coord_new, + const double* ee_distance, + const double* een_rescaled_e, + double* const een_rescaled_e_deriv_e ); + #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3726,7 +3686,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_e_deriv_e & - (context, & + (context, & walk_num, & elec_num, & cord_num, & @@ -3735,7 +3695,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f ee_distance, & een_rescaled_e, & een_rescaled_e_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3752,7 +3712,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_e_deriv_e_f info = qmckl_compute_factor_een_rescaled_e_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & cord_num, & @@ -4126,19 +4086,18 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ); #+end_src *** Test @@ -4433,22 +4392,21 @@ integer function qmckl_compute_factor_een_rescaled_n_deriv_e_f( & end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_een_rescaled_n_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* coord_new, - const double* coord, - const double* en_distance, - const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* coord_new, + const double* coord, + const double* en_distance, + const double* een_rescaled_n, + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -4456,7 +4414,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_n_deriv_e & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -4467,7 +4425,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f en_distance, & een_rescaled_n, & een_rescaled_n_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -4481,12 +4439,12 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f real (c_double ) , intent(in) :: coord_new(elec_num,3,walk_num) real (c_double ) , intent(in) :: coord(nucl_num,3) real (c_double ) , intent(in) :: en_distance(nucl_num,elec_num,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n(0:cord_num,nucl_num,elec_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_n_deriv_e_f info = qmckl_compute_factor_een_rescaled_n_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -4891,64 +4849,54 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } /* Choose the correct compute function (depending on offload type) */ - switch(ctx->jastrow.offload_type) { - case OFFLOAD_OPENACC: - #ifdef HAVE_OPENACC_OFFLOAD - rc = - qmckl_compute_tmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #else - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - - #endif - break; - case OFFLOAD_CUBLAS: - #ifdef HAVE_CUBLAS_OFFLOAD - rc = - qmckl_compute_tmp_c_cublas_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #else - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #endif - break; - default: - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - break; +#ifdef HAVE_HPC + const bool gpu_offload = ctx->jastrow.gpu_offload; +#else + const bool gpu_offload = false; +#endif + + if (gpu_offload) { +#ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#elif HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#elif HAVE_OPENMP_OFFLOAD + rc = qmckl_compute_tmp_c_omp_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#else + rc = QMCKL_FAILURE; +#endif + } else { + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); } - + ctx->jastrow.tmp_c_date = ctx->date; } @@ -4988,18 +4936,44 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - switch(ctx->jastrow.offload_type) { - case OFFLOAD_OPENACC: - #ifdef HAVE_OPENACC_OFFLOAD - rc = qmckl_compute_dtmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #else +#ifdef HAVE_HPC + const bool gpu_offload = ctx->jastrow.gpu_offload; +#else + const bool gpu_offload = false; +#endif + + if (gpu_offload) { +#ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_dtmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#elif HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#elif HAVE_OPENMP_OFFLOAD + rc = qmckl_compute_dtmp_c_omp_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#else + rc = QMCKL_FAILURE; +#endif + } else { rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -5008,39 +4982,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #endif - break; - case OFFLOAD_CUBLAS: - #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_dtmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #else - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #endif - break; - default: - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - break; } if (rc != QMCKL_SUCCESS) { @@ -5148,14 +5089,13 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect ); + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect ); #+end_src @@ -5224,18 +5164,17 @@ integer function qmckl_compute_cord_vect_full_f( & end function qmckl_compute_cord_vect_full_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); #+end_src @@ -5244,8 +5183,8 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_cord_vect_full & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & - bind(C) result(info) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -5260,7 +5199,7 @@ end function qmckl_compute_cord_vect_full_f integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f info = qmckl_compute_cord_vect_full_f & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) end function qmckl_compute_cord_vect_full #+end_src @@ -5381,15 +5320,14 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ); #+end_src @@ -5413,6 +5351,38 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) +{ +#ifdef HAVE_HPC + return qmckl_compute_tmp_c_hpc(context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c); +#else + return qmckl_compute_tmp_c_doc(context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c); +#endif +} + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_tmp_c_doc_f( & context, cord_num, elec_num, nucl_num, & @@ -5481,8 +5451,20 @@ integer function qmckl_compute_tmp_c_doc_f( & end function qmckl_compute_tmp_c_doc_f #+end_src -#+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5508,6 +5490,7 @@ integer(c_int32_t) function qmckl_compute_tmp_c_doc & end function qmckl_compute_tmp_c_doc #+end_src +**** CPU :noexport: #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_tmp_c_hpc ( @@ -5559,16 +5542,15 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; +#ifdef HAVE_OPENMP +#pragma omp parallel for collapse(2) +#endif for (int64_t nw=0; nw < walk_num; ++nw) { for (int64_t i=0; i Date: Wed, 6 Apr 2022 17:04:00 +0200 Subject: [PATCH 4/7] Ok for openmp and Cublas --- org/qmckl_jastrow.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 2ac3438..5d164ad 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5528,7 +5528,7 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) + #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { From 9cef7048d3482a20a6150494bfa47554023d6a33 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 6 Apr 2022 17:10:23 +0200 Subject: [PATCH 5/7] Fix CI --- org/qmckl_jastrow.org | 2 -- 1 file changed, 2 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 9173a43..c4f2e28 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1116,8 +1116,6 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { /* Decide if the Jastrow if offloaded on GPU or not */ #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; -#else - ctx->jastrow.gpu_offload = false; #endif qmckl_exit_code rc = QMCKL_SUCCESS; From 7dc02571e92cdece74e64d368eff4a55a4f752c4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 7 Apr 2022 13:33:50 +0200 Subject: [PATCH 6/7] Fix build --- org/qmckl_jastrow.org | 742 +++++++++++++++++++----------------------- 1 file changed, 334 insertions(+), 408 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 269d3fd..b9981b5 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -109,11 +109,6 @@ int main() { #include -#include -#include "cublas_v2.h" - - - #include #include "qmckl.h" @@ -122,6 +117,13 @@ int main() { #include "qmckl_memory_private_func.h" #include "qmckl_jastrow_private_func.h" #include "qmckl_jastrow_private_type.h" + +#ifdef HAVE_CUBLAS_OFFLOAD +#include +#include "cublas_v2.h" +#endif + + #+end_src * Context @@ -1123,7 +1125,7 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; #endif - + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1517,7 +1519,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const int64_t bord_num, const double* bord_vector, const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + double* const asymp_jasb ); #+end_src @@ -1808,21 +1810,21 @@ qmckl_exit_code qmckl_compute_factor_ee ( int ipar; // can we use a smaller integer? double x, x1, spin_fact, power_ser; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; } - + if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (bord_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } for (int nw = 0; nw < walk_num; ++nw) { factor_ee[nw] = 0.0; // put init array here. @@ -1833,9 +1835,9 @@ qmckl_exit_code qmckl_compute_factor_ee ( x1 = x; power_ser = 0.0; spin_fact = 1.0; - ipar = 0; // index of asymp_jasb + ipar = 0; // index of asymp_jasb - for (int p = 1; p < bord_num; ++p) { + for (int p = 1; p < bord_num; ++p) { x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1844,7 +1846,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( spin_fact = 0.5; ipar = 1; } - + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ x1 / \ (1.0 + bord_vector[1] * \ @@ -1860,7 +1862,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src # #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( const qmckl_context context, @@ -1871,7 +1873,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( const double* bord_vector, const double* ee_distance_rescaled, const double* asymp_jasb, - double* const factor_ee ); + double* const factor_ee ); #+end_src @@ -2183,7 +2185,7 @@ end function qmckl_compute_factor_ee_deriv_e_f const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, const double* asymp_jasb, - double* const factor_ee_deriv_e ); + double* const factor_ee_deriv_e ); #+end_src @@ -2457,7 +2459,7 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2556,7 +2558,7 @@ integer function qmckl_compute_factor_en_f( & end function qmckl_compute_factor_en_f #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_en ( @@ -2625,7 +2627,7 @@ qmckl_exit_code qmckl_compute_factor_en ( x1 = x; power_ser = 0.0; - for (int p = 2; p < aord_num+1; ++p) { + for (int p = 2; p < aord_num+1; ++p) { x = x * x1; power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; } @@ -2656,7 +2658,7 @@ qmckl_exit_code qmckl_compute_factor_en ( const int64_t aord_num, const double* aord_vector, const double* en_distance_rescaled, - double* const factor_en ); + double* const factor_en ); #+end_src @@ -2950,7 +2952,7 @@ end function qmckl_compute_factor_en_deriv_e_f const double* aord_vector, const double* en_distance_rescaled, const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + double* const factor_en_deriv_e ); #+end_src @@ -3343,7 +3345,7 @@ end function qmckl_compute_een_rescaled_e_doc_f const int64_t cord_num, const double rescale_factor_kappa_ee, const double* ee_distance, - double* const een_rescaled_e ); + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") @@ -3382,13 +3384,13 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double rescale_factor_kappa_ee, const double* ee_distance, double* const een_rescaled_e ) { - - double *een_rescaled_e_ij; + + double *een_rescaled_e_ij; double x; const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2; const int64_t len_een_ij = elec_pairs * (cord_num + 1); - int64_t k; - + int64_t k; + // number of element for the een_rescaled_e_ij[N_e*(N_e-1)/2][cord+1] // probably in C is better [cord+1, Ne*(Ne-1)/2] //elec_pairs = (elec_num * (elec_num - 1)) / 2; @@ -3397,7 +3399,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -3412,8 +3414,8 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } // Prepare table of exponentiated distances raised to appropriate power - // init - + // init + for (int kk = 0; kk < walk_num*(cord_num+1)*elec_num*elec_num; ++kk) { een_rescaled_e[kk]= 0.0; } @@ -3431,14 +3433,14 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( */ for (int nw = 0; nw < walk_num; ++nw) { - + for (int kk = 0; kk < len_een_ij; ++kk) { // this array initialized at 0 except een_rescaled_e_ij(:, 1) = 1.0d0 // and the arrangement of indices is [cord_num+1, ne*(ne-1)/2] een_rescaled_e_ij[kk]= ( kk < (elec_pairs) ? 1.0 : 0.0 ); } - k = 0; + k = 0; for (int i = 0; i < elec_num; ++i) { for (int j = 0; j < i; ++j) { // een_rescaled_e_ij(k, 2) = dexp(-rescale_factor_kappa_ee * ee_distance(i, j, nw)); @@ -3456,7 +3458,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e_ij[k + elec_pairs]; } } - + // prepare the actual een table for (int i = 0; i < elec_num; ++i){ @@ -3464,7 +3466,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0; } } - + // Up to here it should work. for ( int l = 1; l < (cord_num+1); ++l) { k = 0; @@ -3487,7 +3489,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } free(een_rescaled_e_ij); - + return QMCKL_SUCCESS; } #+end_src @@ -3526,7 +3528,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double* ee_distance, double* const een_rescaled_e ); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_een_rescaled_e ( const qmckl_context context, @@ -3854,7 +3856,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f const double* coord_new, const double* ee_distance, const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); + double* const een_rescaled_e_deriv_e ); #+end_src @@ -4213,7 +4215,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -4274,7 +4276,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( const int64_t cord_num, const double rescale_factor_kappa_en, const double* en_distance, - double* const een_rescaled_n ); + double* const een_rescaled_n ); #+end_src *** Test @@ -4583,7 +4585,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f const double* coord, const double* en_distance, const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -5032,8 +5034,8 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_tmp_c_cublas_offload(context, ctx->jastrow.cord_num, @@ -5074,7 +5076,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); } - + ctx->jastrow.tmp_c_date = ctx->date; } @@ -5121,8 +5123,8 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_dtmp_c_cublas_offload(context, ctx->jastrow.cord_num, @@ -5238,10 +5240,10 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, int64_t* const dim_cord_vect){ - + int lmax; - + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5251,7 +5253,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } *dim_cord_vect = 0; - + for (int p=2; p <= cord_num; ++p){ for (int k=p-1; k >= 0; --k) { if (k != 0) { @@ -5265,7 +5267,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } } } - + return QMCKL_SUCCESS; } #+end_src @@ -5276,7 +5278,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, - int64_t* const dim_cord_vect ); + int64_t* const dim_cord_vect ); #+end_src @@ -5541,15 +5543,15 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( int kk, lmax, m; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } - if (cord_num <= 0) { + if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; } - if (dim_cord_vect <= 0) { + if (dim_cord_vect <= 0) { return QMCKL_INVALID_ARG_3; } @@ -5586,7 +5588,7 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src @@ -5627,7 +5629,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, #endif } #+end_src - + # #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none @@ -5639,7 +5641,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -5719,11 +5721,11 @@ qmckl_exit_code qmckl_compute_tmp_c_doc ( const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") - + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5768,19 +5770,19 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; - } + } if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (nucl_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_5; - } + } qmckl_exit_code info = QMCKL_SUCCESS; @@ -5818,16 +5820,264 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_hpc") + + #+RESULTS: + + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +**** OpenACC offload :noexport: + + #+begin_src c :comments org :tangle (eval c) :noweb yes +#ifdef HAVE_OPENACC_OFFLOAD +qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) +{ + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + // Compute array access strides: + // For tmp_c... + const int64_t stride_k_c = elec_num; + const int64_t stride_j_c = stride_k_c * nucl_num; + const int64_t stride_i_c = stride_j_c * (cord_num+1); + const int64_t stride_nw_c = stride_i_c * cord_num; + // For een_rescaled_e... + const int64_t stride_m_e = elec_num; + const int64_t stride_i_e = stride_m_e * elec_num; + const int64_t stride_nw_e = stride_i_e * (cord_num+1); + // For een_rescaled_n... + const int64_t stride_k_n = elec_num; + const int64_t stride_j_n = stride_k_n * nucl_num; + const int64_t stride_nw_n = stride_j_n * (cord_num+1); + + const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num; + const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; + const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; + +#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + { +#pragma acc loop independent gang worker vector + for (int64_t i=0 ; i Date: Thu, 7 Apr 2022 13:57:20 +0200 Subject: [PATCH 7/7] Fix OpenACC and OpenMP implementations --- org/qmckl_jastrow.org | 146 +++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 81 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index b9981b5..e42a86e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5915,36 +5915,31 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; -#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) { -#pragma acc loop independent gang worker vector - for (int64_t i=0 ; i