diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 79183a6..e1fc423 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -108,6 +108,7 @@ int main() { #include #include + #include #include "qmckl.h" @@ -116,6 +117,13 @@ int main() { #include "qmckl_memory_private_func.h" #include "qmckl_jastrow_private_func.h" #include "qmckl_jastrow_private_type.h" + +#ifdef HAVE_CUBLAS_OFFLOAD +#include +#include "cublas_v2.h" +#endif + + #+end_src * Context @@ -1117,7 +1125,7 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; #endif - + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1511,7 +1519,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const int64_t bord_num, const double* bord_vector, const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + double* const asymp_jasb ); #+end_src @@ -1802,21 +1810,21 @@ qmckl_exit_code qmckl_compute_factor_ee ( int ipar; // can we use a smaller integer? double x, x1, spin_fact, power_ser; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; } - + if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (bord_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } for (int nw = 0; nw < walk_num; ++nw) { factor_ee[nw] = 0.0; // put init array here. @@ -1827,9 +1835,9 @@ qmckl_exit_code qmckl_compute_factor_ee ( x1 = x; power_ser = 0.0; spin_fact = 1.0; - ipar = 0; // index of asymp_jasb + ipar = 0; // index of asymp_jasb - for (int p = 1; p < bord_num; ++p) { + for (int p = 1; p < bord_num; ++p) { x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1838,7 +1846,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( spin_fact = 0.5; ipar = 1; } - + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ x1 / \ (1.0 + bord_vector[1] * \ @@ -1854,7 +1862,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src # #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( const qmckl_context context, @@ -1865,7 +1873,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( const double* bord_vector, const double* ee_distance_rescaled, const double* asymp_jasb, - double* const factor_ee ); + double* const factor_ee ); #+end_src @@ -2177,7 +2185,7 @@ end function qmckl_compute_factor_ee_deriv_e_f const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, const double* asymp_jasb, - double* const factor_ee_deriv_e ); + double* const factor_ee_deriv_e ); #+end_src @@ -2451,7 +2459,7 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2550,7 +2558,7 @@ integer function qmckl_compute_factor_en_f( & end function qmckl_compute_factor_en_f #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_en ( @@ -2619,7 +2627,7 @@ qmckl_exit_code qmckl_compute_factor_en ( x1 = x; power_ser = 0.0; - for (int p = 2; p < aord_num+1; ++p) { + for (int p = 2; p < aord_num+1; ++p) { x = x * x1; power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; } @@ -2650,7 +2658,7 @@ qmckl_exit_code qmckl_compute_factor_en ( const int64_t aord_num, const double* aord_vector, const double* en_distance_rescaled, - double* const factor_en ); + double* const factor_en ); #+end_src @@ -2944,7 +2952,7 @@ end function qmckl_compute_factor_en_deriv_e_f const double* aord_vector, const double* en_distance_rescaled, const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + double* const factor_en_deriv_e ); #+end_src @@ -3337,7 +3345,7 @@ end function qmckl_compute_een_rescaled_e_doc_f const int64_t cord_num, const double rescale_factor_kappa_ee, const double* ee_distance, - double* const een_rescaled_e ); + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") @@ -3376,13 +3384,13 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double rescale_factor_kappa_ee, const double* ee_distance, double* const een_rescaled_e ) { - - double *een_rescaled_e_ij; + + double *een_rescaled_e_ij; double x; const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2; const int64_t len_een_ij = elec_pairs * (cord_num + 1); - int64_t k; - + int64_t k; + // number of element for the een_rescaled_e_ij[N_e*(N_e-1)/2][cord+1] // probably in C is better [cord+1, Ne*(Ne-1)/2] //elec_pairs = (elec_num * (elec_num - 1)) / 2; @@ -3391,7 +3399,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -3406,8 +3414,8 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } // Prepare table of exponentiated distances raised to appropriate power - // init - + // init + for (int kk = 0; kk < walk_num*(cord_num+1)*elec_num*elec_num; ++kk) { een_rescaled_e[kk]= 0.0; } @@ -3425,14 +3433,14 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( */ for (int nw = 0; nw < walk_num; ++nw) { - + for (int kk = 0; kk < len_een_ij; ++kk) { // this array initialized at 0 except een_rescaled_e_ij(:, 1) = 1.0d0 // and the arrangement of indices is [cord_num+1, ne*(ne-1)/2] een_rescaled_e_ij[kk]= ( kk < (elec_pairs) ? 1.0 : 0.0 ); } - k = 0; + k = 0; for (int i = 0; i < elec_num; ++i) { for (int j = 0; j < i; ++j) { // een_rescaled_e_ij(k, 2) = dexp(-rescale_factor_kappa_ee * ee_distance(i, j, nw)); @@ -3450,7 +3458,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e_ij[k + elec_pairs]; } } - + // prepare the actual een table for (int i = 0; i < elec_num; ++i){ @@ -3458,7 +3466,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0; } } - + // Up to here it should work. for ( int l = 1; l < (cord_num+1); ++l) { k = 0; @@ -3481,7 +3489,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } free(een_rescaled_e_ij); - + return QMCKL_SUCCESS; } #+end_src @@ -3520,7 +3528,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double* ee_distance, double* const een_rescaled_e ); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_een_rescaled_e ( const qmckl_context context, @@ -3848,7 +3856,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f const double* coord_new, const double* ee_distance, const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); + double* const een_rescaled_e_deriv_e ); #+end_src @@ -4207,7 +4215,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -4268,7 +4276,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( const int64_t cord_num, const double rescale_factor_kappa_en, const double* en_distance, - double* const een_rescaled_n ); + double* const een_rescaled_n ); #+end_src *** Test @@ -4577,7 +4585,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f const double* coord, const double* en_distance, const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -5019,14 +5027,15 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c = tmp_c; } + /* Choose the correct compute function (depending on offload type) */ #ifdef HAVE_HPC const bool gpu_offload = ctx->jastrow.gpu_offload; #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_tmp_c_cublas_offload(context, ctx->jastrow.cord_num, @@ -5067,7 +5076,8 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); } - + + ctx->jastrow.tmp_c_date = ctx->date; } @@ -5107,13 +5117,14 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } + #ifdef HAVE_HPC const bool gpu_offload = ctx->jastrow.gpu_offload; #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_dtmp_c_cublas_offload(context, ctx->jastrow.cord_num, @@ -5159,6 +5170,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) return rc; } + ctx->jastrow.dtmp_c_date = ctx->date; } @@ -5228,10 +5240,10 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, int64_t* const dim_cord_vect){ - + int lmax; - + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5241,7 +5253,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } *dim_cord_vect = 0; - + for (int p=2; p <= cord_num; ++p){ for (int k=p-1; k >= 0; --k) { if (k != 0) { @@ -5255,7 +5267,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } } } - + return QMCKL_SUCCESS; } #+end_src @@ -5266,7 +5278,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, - int64_t* const dim_cord_vect ); + int64_t* const dim_cord_vect ); #+end_src @@ -5531,15 +5543,15 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( int kk, lmax, m; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } - if (cord_num <= 0) { + if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; } - if (dim_cord_vect <= 0) { + if (dim_cord_vect <= 0) { return QMCKL_INVALID_ARG_3; } @@ -5576,7 +5588,7 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src @@ -5617,7 +5629,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, #endif } #+end_src - + # #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none @@ -5629,7 +5641,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -5709,11 +5721,11 @@ qmckl_exit_code qmckl_compute_tmp_c_doc ( const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") - + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5758,19 +5770,19 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; - } + } if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (nucl_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_5; - } + } qmckl_exit_code info = QMCKL_SUCCESS; @@ -5807,6 +5819,42 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( } #+end_src + + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_hpc") + + #+RESULTS: + #+begin_src c :tangle (eval h_private_func) :comments org qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context, const int64_t cord_num, @@ -5815,7 +5863,7 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context, const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src **** OpenACC offload :noexport: @@ -5865,7 +5913,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num; const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; - const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; + const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) { @@ -5877,7 +5925,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, for (int j=0; j